In [3]:
import numpy as np
from scipy.sparse import issparse, csc_matrix
import time
from data_loader import load_svm_file
from sklearn.model_selection import train_test_split
import datetime

def load_svm_file(file_path, zero_based=True):
    labels = []
    rows = []
    cols = []
    data = []

    with open(file_path, 'r') as f:
        for i, line in enumerate(f):
            parts = line.strip().split()
            labels.append(float(parts[0]))

            for feat in parts[1:]:
                idx, val = feat.split(':')
                idx = int(idx) - (0 if zero_based else 1)
                rows.append(i)
                cols.append(idx)
                data.append(float(val))

    # Jawna konwersja do CSC
    from scipy.sparse import coo_matrix
    X = coo_matrix((data, (rows, cols))).tocsc()
    y = np.array(labels)

    return X, y 
X,y =load_svm_file('../data/paper_data/news20.binary')

In [6]:
import numpy as np
from scipy.sparse import issparse, csc_matrix
import time
from data_loader import load_svm_file
from sklearn.model_selection import train_test_split
import datetime

class CDPER_L2SVM:
    def __init__(self, C=1.0, sigma=0.01, beta=0.5, max_iter=1000, tol=1e-4, random_state=42, exact_hessian=True):
        self.C = C
        self.sigma = sigma
        self.beta = beta
        self.max_iter = max_iter
        self.tol = tol
        self.random_state = random_state
        self.exact_hessian = exact_hessian
        self.w = None
        self.z = None
        self.H = None
        self.lambdas = {}
        np.random.seed(random_state)

    def _precompute_H(self, X):
        """
            Precompute diagonal elements of Hessian matrix:
            H_i = 1 + 2C * sum_j x_ji^2
            Used for second derivatives during coordinate updates.
        """
        self.H = 1 + 2 * self.C * (X.power(2).sum(axis=0)).A1

    def _get_active_mask(self, y):
        """
            Identify samples violating margin (i.e., 1 - y*z > 0).
            Only these contribute to gradient updates.
        """
        return (1 - y * self.z) > 0

    def _d_prime_i(self, X, y, i):
        """
            Compute gradient (first derivative) w.r.t. w[i]
        """
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        active_mask = self._get_active_mask(y)[indices]
        if not np.any(active_mask):
            return self.w[i]

        y_active = y[indices[active_mask]]
        z_active = self.z[indices[active_mask]]
        data_active = data[active_mask]

        margins = 1 - y_active * z_active
        return self.w[i] - 2 * self.C * np.sum(data_active * y_active * margins)

    def _d_double_prime_i(self, X, y, i, exact=False):
        """
        Return the second derivative (Hessian diagonal) for feature i.

        exact:  If True, compute using only active set (violating examples)
                If False (default), return precomputed H[i] as upper bound
        """
        if not exact:
            return self.H[i]

        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        active_mask = self._get_active_mask(y)[indices]
        if not np.any(active_mask):
            return 1.0

        data_active = data[active_mask]
        return 1.0 + 2 * self.C * np.sum(data_active ** 2)

    def _newton_direction(self, X, y, i):
        """
            Compute Newton direction for coordinate i:
            direction = -gradient / hessian
        """
        d_prime = self._d_prime_i(X, y, i)
        d_double_prime = self._d_double_prime_i(X, y, i, exact=self.exact_hessian)
        return -d_prime / d_double_prime if d_double_prime != 0 else 0.0

    def _line_search(self, X, y, i, d):
        """
            Perform backtracking line search to ensure sufficient objective decrease:
            D(w + λd) - D(w) <= -σ * (λd)^2
        """
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        D0 = self._objective(y=y)

        lam = 1.0
        while True:
            delta = lam * d

            z_new = self.z.copy()
            z_new[indices] += delta * data

            D_new = self._objective(w=self.w, z=z_new, y=y)

            if (D_new - D0) <= -self.sigma * (delta ** 2):
                return lam
            lam *= self.beta
            if lam < 1e-10:
                return lam

    def _objective(self, w=None, z=None, y=None):
        """
            Compute the primal objective:
            0.5 * ||w||^2 + C * sum((1 - y*z)_+^2)
        """
        if w is None:
            w = self.w
        if z is None:
            z = self.z
        if y is None:
            raise ValueError("y must be provided")

        margins = 1 - y * z
        loss = np.sum(margins[margins > 0] ** 2)
        return 0.5 * np.dot(w, w) + self.C * loss

    def _compute_constant_lambda(self, X, y, i):
        """
            Precompute λ̄ upper bound from the line search theorem:
            λ̄ = d'' / (0.5 * H_i + σ)
        """
        dii = self._d_double_prime_i(X, y, i, exact=self.exact_hessian)

        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        data = X.data[col_start:col_end]
        Xi_squared_sum = np.sum(data ** 2)

        Hi = 1 + 2 * self.C * Xi_squared_sum
        return dii / (0.5 * Hi + self.sigma), dii

    def fit(self, X, y):
        print(f"start  time = {datetime.datetime.now()}")
        if not issparse(X) or not isinstance(X, csc_matrix):
            X = csc_matrix(X)

        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.z = X @ self.w
        self._precompute_H(X)

        for k in range(self.max_iter):
            perm = np.random.permutation(n_features)

            start_time = time.time()
            inner_iter = 0
            
            for i in perm:
                inner_iter += 1

                d = self._newton_direction(X, y, i)
                if abs(d) < 1e-10:
                    continue

                lam = self._line_search(X, y, i, d)
                delta = lam * d
                self.w[i] += delta

                col_start = X.indptr[i]
                col_end = X.indptr[i + 1]
                indices = X.indices[col_start:col_end]
                data = X.data[col_start:col_end]
                self.z[indices] += delta * data

                if inner_iter % 1000 == 0:
                    print(f"Inner iteration {inner_iter}, error = {1 - self.score(X, y):.6f}, time = {datetime.datetime.now()}")

            if k % 2 == 0 or k == self.max_iter - 1:
                print(f"Iter {k}, error = {1 - self.score(X, y):.6f}")

        print(f"finish  time = {datetime.datetime.now()}")
        return self

    def predict(self, X):
        if issparse(X):
            return np.sign(X @ self.w)
        return np.sign(np.dot(X, self.w))

    def score(self, X, y):
        return np.mean(self.predict(X) == y)


In [7]:
model = CDPER_L2SVM(C=1.0, max_iter=1, random_state=42, exact_hessian=False)
model.fit(X,y)

start  time = 2025-06-07 18:49:08.066658
Inner iteration 1000, error = 0.810612, time = 2025-06-07 18:49:09.567918
Inner iteration 2000, error = 0.713793, time = 2025-06-07 18:49:10.480074
Inner iteration 3000, error = 0.621724, time = 2025-06-07 18:49:11.244630
Inner iteration 4000, error = 0.563913, time = 2025-06-07 18:49:12.019720
Inner iteration 5000, error = 0.518904, time = 2025-06-07 18:49:12.821910
Inner iteration 6000, error = 0.479796, time = 2025-06-07 18:49:13.596911
Inner iteration 7000, error = 0.448790, time = 2025-06-07 18:49:14.389988
Inner iteration 8000, error = 0.424935, time = 2025-06-07 18:49:15.167596
Inner iteration 9000, error = 0.392478, time = 2025-06-07 18:49:15.948263
Inner iteration 10000, error = 0.373025, time = 2025-06-07 18:49:16.925391
Inner iteration 11000, error = 0.355221, time = 2025-06-07 18:49:17.947633
Inner iteration 12000, error = 0.347319, time = 2025-06-07 18:49:18.888593
Inner iteration 13000, error = 0.326815, time = 2025-06-07 18:49:19.

<__main__.CDPER_L2SVM at 0x21eff7483d0>

In [11]:
X.shape

(19996, 1355192)