In [3]:
import numpy as np
from scipy.sparse import issparse, csc_matrix
import time
from sklearn.model_selection import train_test_split
import datetime

def load_svm_file(file_path, zero_based=True):
    labels = []
    rows = []
    cols = []
    data = []

    with open(file_path, 'r') as f:
        for i, line in enumerate(f):
            parts = line.strip().split()
            labels.append(float(parts[0]))

            for feat in parts[1:]:
                idx, val = feat.split(':')
                idx = int(idx) - (0 if zero_based else 1)
                rows.append(i)
                cols.append(idx)
                data.append(float(val))

    # Jawna konwersja do CSC
    from scipy.sparse import coo_matrix
    X = coo_matrix((data, (rows, cols))).tocsc()
    y = np.array(labels)

    return X, y 
X,y =load_svm_file('../data/paper_data/news20.binary')
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)

In [20]:
import numpy as np
import time
from scipy.sparse import issparse, csc_matrix
from sklearn.model_selection import train_test_split
import datetime
from data_loader import load_svm_file




class SparseCoordinateDescentSVM_2:
    def __init__(self, C=1.0, max_iter=10000, tol=1e-8, sigma=0.01, beta=0.5, verbose=True, maxtime = np.inf):
        self.C = C
        self.max_iter = max_iter
        self.tol = tol
        self.sigma = sigma
        self.beta = beta
        self.verbose = verbose
        self.w = None
        self.z = None
        self.H = None
        self.lambdas = {}
        self.times = []
        self.relative_diffs = []
        self.obj_values =[]
        self.time_start = time.time()
        self.max_time = maxtime


        self.objective_values = []
        self.gradient_values = []
        self.gradient_norm_values = []
        self.accuracies = []

    def _precompute_H(self, X):
        """
            Precompute diagonal elements of Hessian matrix:
            H_i = 1 + 2C * sum_j x_ji^2
            Used for second derivatives during coordinate updates.
        """
        return  1 + 2 * self.C * (X.power(2).sum(axis=0)).A1
    
    def _d_prime_i_0(self, X, y, i):
        margins = 1 - y * self.z
        active = margins > 0

        # Indeksy kolumny i w formacie CSC
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        row_indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        # Maska aktywnych przykładów w tych wierszach
        mask = active[row_indices]

        if not np.any(mask):
            return self.w[i]  # brak aktywnych przykładów, gradient to tylko w[i]

        filtered_y = y[row_indices][mask]
        filtered_margins = margins[row_indices][mask]
        filtered_data = data[mask]

        gradient_sum = np.sum(filtered_data * filtered_y * filtered_margins)

        d_prime_i = self.w[i] - 2 * self.C * gradient_sum
        self.gradient_values[i] = d_prime_i

        return d_prime_i


    def _d_double_prime_i_0(self, X, y, i):
        # Wyciągamy niezerowe elementy kolumny i z csc_matrix X
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]  # wiersze niezerowych elementów kolumny i
        data = X.data[col_start:col_end]       # wartości tych elementów

        # Obliczamy marginesy
        margins = 1 - y * self.z
        # Tworzymy maskę aktywnych przykładów (tych, które łamią warunek margin > 0)
        active_mask = margins[indices] > 0

        if not np.any(active_mask):
            return 1.0  # tylko regularizacja, brak strat

        data_active = data[active_mask]


        return 1.0 + 2 * self.C * np.sum(data_active ** 2)


    def _newton_direction(self, X, y, i,  denominator = None):

        numerator = self._d_prime_i_0(X, y, i)

        if (denominator == None):
            denominator = self._d_double_prime_i_0(X, y, i)
        return -numerator / denominator if denominator != 0 else 0.0

    def _d_i_z(self, X, y, i, z):
        x_col = X[:, i]
        indices = x_col.indices
        delta = z * x_col.data
        z_new_part = self.z[indices] + delta
    
        margins_part = 1 - y[indices] * z_new_part
        active = margins_part > 0
        loss_term = np.sum(margins_part[active] ** 2)
    
        w_norm_sq = np.dot(self.w, self.w) + 2 * z * self.w[i] + z**2
        return 0.5 * w_norm_sq + self.C * loss_term


    def _compute_threshhold_lambda(self, X, y, i):
  
        dii = self._d_double_prime_i_0(X, y, i)

        return dii / (0.5 * self.H[i]+ self.sigma), dii



    def _compute_lambda(self, X, y, i, d):
        lambda_bar = self.lambdas[i]
        if abs(d) < 1e-12:
            return 0.0
        if 1.0 <= lambda_bar:
            return 1.0

        # Otherwise, perform line search
        D0 = self._d_i_z(X, y, i, 0)
        k = 0
        while True:
            lam = self.beta ** k
            z = lam * d
            Dz = self._d_i_z(X, y, i, z)
            if Dz - D0 <= -self.sigma * (z ** 2):
                return lam
            k += 1
            if k > 20:  # Prevent infinite loop
                return lam

    def _coordinate_update(self, X, y, i, z, dii=None):
        d = self._newton_direction(X, y, i, dii)
        if abs(d) < 1e-12:
            return

        lam = 1.0
        if self.lambdas[i] < 1.0:  # Only do line search if needed
            lam = self._compute_lambda(X, y, i, d)

        delta = lam * d
        self.w[i] += delta

        # Efficient update of self.z for sparse column
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        self.z[indices] += delta * data



    def fit(self, X, y, X_test, y_test):
        print(f"start  time = {datetime.datetime.now()}")
        if not issparse(X) or not isinstance(X, csc_matrix):
            raise ValueError("X must be a CSC (Compressed Sparse Column) matrix")
        #start with initial w0
        
        self.w = np.zeros(X.shape[1],dtype=np.float64)
        self.H = self._precompute_H(X)
        self.z = X.dot(self.w)

        self.gradient_values = np.ones(X.shape[1])

        start_even_before = time.time()
        for iteration in range(self.max_iter):
            i = np.random.randint(0, X.shape[1])
            if i not in self.lambdas:
                self.lambdas[i], dii = self._compute_threshhold_lambda(X, y, i)
            else:
                dii = None
            self._coordinate_update(X, y, i, dii)
            
            elapsed = time.time() - start_even_before
            if iteration % 10000 == 9999:
                    f_w = self._objective(y=y)
                    self.objective_values.append((elapsed, f_w))
                    print(f"Inner iteration {iteration}, error = {1 - self.score(X, y):.6f}. Time elapsed: {elapsed:.2f} s.")

                    final_grad_norm = np.linalg.norm(self.gradient_values)
                    self.gradient_norm_values.append((elapsed, final_grad_norm))
                    if X_test is not None and y_test is not None:
                        accuracy = self.score(X_test, y_test)
                        print(f"Accuracy on test set at iter {iteration+1}: {accuracy:.4f}")
                        self.accuracies.append((elapsed, accuracy))

            if elapsed > self.max_time:
                return self

            elapsed = time.time() - start_even_before
            final_grad_norm = np.linalg.norm(self.gradient_values)
            self.gradient_norm_values.append((elapsed, final_grad_norm))


    

        print(f"finish  time = {datetime.datetime.now()}")

        print(f"Exited outer iteration loop number {iteration+1}, grad_norm = {final_grad_norm:.4f}, time: {elapsed:.2f} s.")
        print(f"And moreover, objective value function: {self.objective_values[-1][1]}")
        return self


    def predict(self, X):
        return np.sign(X @self.w)

    def score(self, X, y):
        return np.mean(self.predict(X) == y)
        
    def _objective(self, y):
        margins = 1 - y * self.z
        loss = np.sum((margins[margins > 0]) ** 2)
        return 0.5 * np.dot(self.w, self.w) + self.C * loss

model = SparseCoordinateDescentSVM_2(C=1.0, max_iter=11355192,maxtime = 400)
#X,y = load_svm_file('../data/paper_data/news20.binary')
#X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
#X_train = X_train.tocsc()

model.fit(X_train, y_train, X_test, y_test)
score_model1 = model.score(X_test, y_test)
print(f"Accuracy: {score_model1}")
print(model.objective_values)
print(model.gradient_values)
data = 'news20'
np.save(f'model4_{data}_objective_values.npy', model.objective_values)
np.save(f'model4_{data}_gradient_values.npy', model.gradient_norm_values)
np.save(f'model4_{data}_accuracy_values.npy', model.accuracies)


start  time = 2025-06-11 23:15:39.440477
Inner iteration 9999, error = 0.369217. Time elapsed: 20.40 s.
Accuracy on test set at iter 10000: 0.5827
Inner iteration 19999, error = 0.272818. Time elapsed: 36.94 s.
Accuracy on test set at iter 20000: 0.6887
Inner iteration 29999, error = 0.222618. Time elapsed: 54.75 s.
Accuracy on test set at iter 30000: 0.7282
Inner iteration 39999, error = 0.206927. Time elapsed: 72.97 s.
Accuracy on test set at iter 40000: 0.7440
Inner iteration 49999, error = 0.173418. Time elapsed: 90.66 s.
Accuracy on test set at iter 50000: 0.7768
Inner iteration 59999, error = 0.147099. Time elapsed: 108.44 s.
Accuracy on test set at iter 60000: 0.8020
Inner iteration 69999, error = 0.128345. Time elapsed: 126.21 s.
Accuracy on test set at iter 70000: 0.8225
Inner iteration 79999, error = 0.119717. Time elapsed: 143.47 s.
Accuracy on test set at iter 80000: 0.8300
Inner iteration 89999, error = 0.107964. Time elapsed: 162.62 s.
Accuracy on test set at iter 90000: 

In [7]:
model.w.shape

(1355192,)

In [8]:
X_test.shape

(4000, 1355192)

In [12]:
X_test

<4000x1355192 sparse matrix of type '<class 'numpy.float64'>'
	with 1786585 stored elements in Compressed Sparse Column format>

In [15]:
X_test[:100] @ model.w

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [25]:
import numpy as np
import time
from scipy.sparse import issparse, csc_matrix
import datetime


class SparseCoordinateDescentSVM_3:
    def __init__(self, C=1.0, max_iter=10000, tol=1e-8, sigma=0.01, beta=0.5, verbose=True, max_time = 400):
        self.C = C
        self.max_iter = max_iter
        self.tol = tol
        self.sigma = sigma
        self.beta = beta
        self.verbose = verbose
        self.w = None
        self.z = None
        self.H = None
        self.lambdas = {}
        self.times = []
        self.relative_diffs = []
        self.obj_values =[]
        self.time_start = time.time()

        self.objective_values = []
        self.gradient_values = []
        self.gradient_norm_values = []
        self.accuracies = []
        self.max_time = max_time


    def _precompute_H(self, X):
        """
            Precompute diagonal elements of Hessian matrix:
            H_i = 1 + 2C * sum_j x_ji^2
            Used for second derivatives during coordinate updates.
        """
        return  1 + 2 * self.C * (X.power(2).sum(axis=0)).A1
    def _d_prime_i_0(self, X, y, i):
        margins = 1 - y * self.z
        active = margins > 0

        # Indeksy kolumny i w formacie CSC
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        row_indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        # Maska aktywnych przykładów w tych wierszach
        mask = active[row_indices]

        if not np.any(mask):
            return self.w[i]  # brak aktywnych przykładów, gradient to tylko w[i]

        filtered_y = y[row_indices][mask]
        filtered_margins = margins[row_indices][mask]
        filtered_data = data[mask]

        gradient_sum = np.sum(filtered_data * filtered_y * filtered_margins)
        d_prime_i = self.w[i] - 2 * self.C * gradient_sum
        self.gradient_values[i] = d_prime_i
        return d_prime_i


    def _d_double_prime_i_0(self, X, y, i):
        # Wyciągamy niezerowe elementy kolumny i z csc_matrix X
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]  # wiersze niezerowych elementów kolumny i
        data = X.data[col_start:col_end]       # wartości tych elementów

        # Obliczamy marginesy
        margins = 1 - y * self.z
        # Tworzymy maskę aktywnych przykładów (tych, które łamią warunek margin > 0)
        active_mask = margins[indices] > 0

        if not np.any(active_mask):
            return 1.0  # tylko regularizacja, brak strat

        data_active = data[active_mask]

        return 1.0 + 2 * self.C * np.sum(data_active ** 2)


    def _newton_direction(self, X, y, i,  denominator = None):

        numerator = self._d_prime_i_0(X, y, i)

        if (denominator == None):
            denominator = self._d_double_prime_i_0(X, y, i)
        return -numerator / denominator if denominator != 0 else 0.0

    def _d_i_z(self, X, y, i, z):
        x_col = X[:, i]
        indices = x_col.indices
        delta = z * x_col.data
        z_new_part = self.z[indices] + delta
    
        margins_part = 1 - y[indices] * z_new_part
        active = margins_part > 0
        loss_term = np.sum(margins_part[active] ** 2)
    
        w_norm_sq = np.dot(self.w, self.w) + 2 * z * self.w[i] + z**2
        return 0.5 * w_norm_sq + self.C * loss_term


    def _compute_threshhold_lambda(self, X, y, i):
  
        dii = self._d_double_prime_i_0(X, y, i)

        return dii / (0.5 * self.H[i]+ self.sigma), dii



    def _compute_lambda(self, X, y, i, d):
        lambda_bar = self.lambdas[i]
        if abs(d) < 1e-12:
            return 0.0
        if 1.0 <= lambda_bar:
            return 1.0

        # Otherwise, perform line search
        D0 = self._d_i_z(X, y, i, 0)
        k = 0
        while True:
            lam = self.beta ** k
            z = lam * d
            Dz = self._d_i_z(X, y, i, z)
            if Dz - D0 <= -self.sigma * (z ** 2):
                return lam
            k += 1
            if k > 20:  # Prevent infinite loop
                return lam

    def _coordinate_update(self, X, y, i, z, dii=None):
        d = self._newton_direction(X, y, i, dii)
        if abs(d) < 1e-12:
            return

        lam = 1.0
        if self.lambdas[i] < 1.0:  # Only do line search if needed
            lam = self._compute_lambda(X, y, i, d)

        delta = lam * d
        self.w[i] += delta

        # Efficient update of self.z for sparse column
        col_start = X.indptr[i]
        col_end = X.indptr[i + 1]
        indices = X.indices[col_start:col_end]
        data = X.data[col_start:col_end]

        self.z[indices] += delta * data



    def fit(self, X, y, X_test, y_test):
        print(f"start  time = {datetime.datetime.now()}")
        if not issparse(X) or not isinstance(X, csc_matrix):
            raise ValueError("X must be a CSC (Compressed Sparse Column) matrix")
        #start with initial w0
        
        self.w = np.zeros(X.shape[1],dtype=np.float64)
        self.H = self._precompute_H(X)
        self.z = X.dot(self.w)
        self.gradient_values = np.ones(X.shape[1])

        start_even_before =time.time()

        for iteration in range(self.max_iter):
            perm = np.random.permutation(X.shape[1])
            counter = 0
            for i in perm:
                counter +=1
                if i not in self.lambdas:
                    self.lambdas[i], dii = self._compute_threshhold_lambda(X, y, i)
                else:
                    dii = None
                self._coordinate_update(X, y, i, dii)
                
                elapsed = time.time() - start_even_before
                if counter % 10000 == 9999:
                        f_w = self._objective(y=y)
                        self.objective_values.append((elapsed, f_w))
                        print(f"Inner iteration {counter}, error = {1 - self.score(X, y):.6f}. Time elapsed: {elapsed:.2f} s.")

                        final_grad_norm = np.linalg.norm(self.gradient_values)
                        self.gradient_norm_values.append((elapsed, final_grad_norm))
                        if X_test is not None and y_test is not None:
                            accuracy = self.score(X_test, y_test)
                            print(f"Accuracy on test set at iter {counter+1}: {accuracy:.4f}")
                            self.accuracies.append((elapsed, accuracy))

                if elapsed > self.max_time:
                    return self

                elapsed = time.time() - start_even_before
                final_grad_norm = np.linalg.norm(self.gradient_values)
                self.gradient_norm_values.append((elapsed, final_grad_norm))
    

        print(f"finish  time = {datetime.datetime.now()}")
        return self


    def predict(self, X):
        return np.sign(X @self.w)

    def score(self, X, y):
        return np.mean(self.predict(X) == y)
        
    def _objective(self, y):
        margins = 1 - y * self.z
        loss = np.sum((margins[margins > 0]) ** 2)
        return 0.5 * np.dot(self.w, self.w) + self.C * loss

model = SparseCoordinateDescentSVM_3(C=1.0, max_iter=1
                                     )


#X,y = load_svm_file('../data/paper_data/news20.binary')
#X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
#X_train = X_train.tocsc()

model.fit(X_train, y_train, X_test, y_test)
score_model1 = model.score(X_test, y_test)
print(f"Accuracy: {score_model1}")
print(model.objective_values)
print(model.gradient_values)
data = 'news20'
np.save(f'model5_{data}_objective_values.npy', model.objective_values)
np.save(f'model5_{data}_gradient_values.npy', model.gradient_norm_values)
np.save(f'model5_{data}_accuracy_values.npy', model.accuracies)


start  time = 2025-06-12 00:15:45.340815
Inner iteration 9999, error = 0.348587. Time elapsed: 12.94 s.
Accuracy on test set at iter 10000: 0.6160
Inner iteration 19999, error = 0.238497. Time elapsed: 23.20 s.
Accuracy on test set at iter 20000: 0.7232
Inner iteration 29999, error = 0.192923. Time elapsed: 34.03 s.
Accuracy on test set at iter 30000: 0.7682
Inner iteration 39999, error = 0.168917. Time elapsed: 45.31 s.
Accuracy on test set at iter 40000: 0.7887
Inner iteration 49999, error = 0.141035. Time elapsed: 56.67 s.
Accuracy on test set at iter 50000: 0.8130
Inner iteration 59999, error = 0.124719. Time elapsed: 69.02 s.
Accuracy on test set at iter 60000: 0.8315
Inner iteration 69999, error = 0.115466. Time elapsed: 81.16 s.
Accuracy on test set at iter 70000: 0.8433
Inner iteration 79999, error = 0.103713. Time elapsed: 93.41 s.
Accuracy on test set at iter 80000: 0.8540
Inner iteration 89999, error = 0.092961. Time elapsed: 105.76 s.
Accuracy on test set at iter 90000: 0.8