In [12]:
import numpy as np

class SDCAClassifier:
    def __init__(self, loss="squared", lam=1e-3, max_iter=1500, tol=1e-5, verbose=False, use_preSGD=True):
        self.loss = loss
        self.lam = lam
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose
        self.use_preSGD = use_preSGD
        self.w = None
        self.alpha = None
        self.losses_ = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        y_internal = np.where(y <= 0, -1, 1) if self.loss == "log" else y.copy()

        self.alpha = np.zeros(n_samples)
        self.w = np.zeros(n_features)
        alpha_x_sum = np.zeros(n_features)
        start_epoch = 0

        # --- FIRST EPOCH: MODIFIED-SGD (PER SCREENSHOT) ---
        if self.use_preSGD:
            if self.verbose: print("Running Modified-SGD (Epoch 1)...")
            
            for t_idx in range(n_samples):
                t = t_idx + 1  # Using t normalization as per ss
                xi = X[t_idx]
                yi = y_internal[t_idx]
                norm_sq = np.dot(xi, xi)
                
                if norm_sq == 0: 
                    # If feature is zero, w remains unchanged, alpha_t is effectively 0
                    continue

                # Prediction using previous w: w^(t-1)
                pred = np.dot(self.w, xi)
                
                # Denominator for solving the maximization in the box:
                # Based on the screenshot: Find alpha_t to maximize -phi*(-alpha) - (lam*t/2) ||w + (1/lam*t) alpha*x||^2
                # This leads to a denominator using (lam * t)
                denom = norm_sq / (self.lam * t) + 1
                
                if self.loss == "squared":
                    # alpha_t = (y_t - x_t @ w^(t-1)) / (||x_t||^2 / (lambda * t) + 1)
                    alpha_t = (yi - pred) / denom
                elif self.loss == "log":
                    exp_term = np.exp(np.clip(yi * pred, -50, 50))
                    grad = yi / (1 + exp_term)
                    alpha_t = np.clip(grad / denom, min(0, yi), max(0, yi))

                self.alpha[t_idx] = alpha_t
                alpha_x_sum += alpha_t * xi
                
                # Update w based on the screenshot: w = (1 / lam*t) * sum_{i=1 to t} alpha_i*xi
                self.w = alpha_x_sum / (self.lam * t)
            print("Finnished Modified-SGD (Epoch 1)")

            start_epoch = 1
            
        # --- REMAINING EPOCHS: CLASSIC SDCA (NORMALIZED BY n) ---
        for it in range(start_epoch, self.max_iter):
            indices = np.random.permutation(n_samples)
            for i in indices:
                xi = X[i]
                yi = y_internal[i]
                norm_sq = np.dot(xi, xi)
                if norm_sq == 0: continue

                pred = np.dot(self.w, xi)
                # Classic SDCA uses full n normalization
                denom = norm_sq / (self.lam * n_samples) + 1
                
                if self.loss == "squared":
                    delta = (yi - pred - self.alpha[i]) / denom
                elif self.loss == "log":
                    exp_term = np.exp(np.clip(yi * pred, -50, 50))
                    grad = yi / (1 + exp_term) - self.alpha[i]
                    delta = grad / denom
                    new_alpha = np.clip(self.alpha[i] + delta, min(0, yi), max(0, yi))
                    delta = new_alpha - self.alpha[i]

                self.alpha[i] += delta
                self.w += (delta / (self.lam * n_samples)) * xi
            
            primal_obj, dual_obj = self._compute_objectives(X, y_internal)
            gap = primal_obj - dual_obj
            self.losses_.append(primal_obj)

            if self.verbose and (it + 1) % 10 == 0:
                print(f"Iter {it+1:3d} | Gap: {gap:.2e} | Primal Loss: {primal_obj:.6f}")
            
            if gap < self.tol:
                break
                
        return self

    def _compute_objectives(self, X, y):
        n = X.shape[0]
        preds = X @ self.w
        if self.loss == "squared":
            loss_term = np.mean(0.5 * (preds - y)**2)
            dual_res = np.mean(0.5 * self.alpha**2 - y * self.alpha)
        else:
            loss_term = np.mean(np.log(1 + np.exp(-y * preds)))
            v = np.clip(y * self.alpha, 1e-12, 1 - 1e-12)
            dual_res = np.mean(v * np.log(v) + (1 - v) * np.log(1 - v))
            
        primal_obj = loss_term + 0.5 * self.lam * np.dot(self.w, self.w)
        dual_obj = -dual_res - 0.5 * self.lam * np.dot(self.w, self.w)
        return primal_obj, dual_obj
 
    def _compute_loss(self, X, y):
        pred = X @ self.w 
        if self.loss == "log":
            z = y * pred
            return np.mean(np.log(1 + np.exp(-z))) + 0.5 * self.lam * np.sum(self.w**2)
        else:
            return np.mean((y - pred) ** 2) + 0.5 * self.lam * np.sum(self.w**2)
        
    def predict(self, X):
        return (X @ self.w > 0).astype(int)

In [None]:
import numpy as np

class SDCAClassifier1:
    def __init__(self, loss="squared", lam=1e-3, max_iter=10000, tol=1e-5, verbose=False, use_preSGD=True):
        self.loss = loss
        self.lam = lam
        self.max_iter = max_iter  # Now total number of individual updates
        self.tol = tol
        self.verbose = verbose
        self.use_preSGD = use_preSGD
        self.w = None
        self.alpha = None
        self.losses_ = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        y_internal = np.where(y <= 0, -1, 1) if self.loss == "log" else y.copy()

        # Initialize: w^(0) = 0
        self.alpha = np.zeros(n_samples)
        self.w = np.zeros(n_features)
        alpha_x_sum = np.zeros(n_features)
        
        current_step = 0

        # --- PRE-STEP: Sequential Modified-SGD (1 update per step) ---
        if self.use_preSGD:
            if self.verbose: print("Running Modified-SGD Phase...")
            for t_idx in range(n_samples):
                t = t_idx + 1
                xi, yi = X[t_idx], y_internal[t_idx]
                norm_sq = np.dot(xi, xi)
                
                if norm_sq > 0:
                    pred = np.dot(self.w, xi)
                    # Denominator uses lambda * t
                    denom = norm_sq / (self.lam * t) + 1
                    
                    if self.loss == "squared":
                        alpha_t = (yi - pred) / denom
                    elif self.loss == "log":
                        exp_term = np.exp(np.clip(yi * pred, -50, 50))
                        grad = yi / (1 + exp_term)
                        alpha_t = np.clip(grad / denom, min(0, yi), max(0, yi))

                    self.alpha[t_idx] = alpha_t
                    alpha_x_sum += alpha_t * xi
                    # Update w = (1/lambda*t) * sum
                    self.w = alpha_x_sum / (self.lam * t)
                
                current_step += 1
                
        # --- MAIN PHASE: Random SDCA (1 update per step) ---
        # We continue from current_step until max_iter
        if self.verbose: print(f"Starting Main SDCA Phase from step {current_step}...")
        
        for step in range(current_step, self.max_iter):
            # Select exactly one random index
            i = np.random.randint(n_samples)
            xi, yi = X[i], y_internal[i]
            norm_sq = np.dot(xi, xi)
            
            if norm_sq > 0:
                pred = np.dot(self.w, xi)
                # Standard SDCA uses lambda * n
                denom = norm_sq / (self.lam * n_samples) + 1
                
                if self.loss == "squared":
                    delta = (yi - pred - self.alpha[i]) / denom
                elif self.loss == "log":
                    exp_term = np.exp(np.clip(yi * pred, -50, 50))
                    grad = yi / (1 + exp_term) - self.alpha[i]
                    delta = grad / denom
                    new_alpha = np.clip(self.alpha[i] + delta, min(0, yi), max(0, yi))
                    delta = new_alpha - self.alpha[i]

                self.alpha[i] += delta
                # Incrementally update w
                self.w += (delta / (self.lam * n_samples)) * xi

            # Periodic convergence check (calculating objectives is expensive)
            if step % n_samples == 0:
                primal_obj, dual_obj = self._compute_objectives(X, y_internal)
                gap = primal_obj - dual_obj
                self.losses_.append(primal_obj)
                
                if self.verbose:
                    print(f"Step {step:6d} | Gap: {gap:.2e} | Primal: {primal_obj:.6f}")
                
                if gap < self.tol:
                    if self.verbose: print(f"Converged at step {step}")
                    break
                    
        return self

    def _compute_objectives(self, X, y):
        n = X.shape[0]
        preds = X @ self.w
        if self.loss == "squared":
            loss_term = np.mean(0.5 * (preds - y)**2)
            dual_res = np.mean(0.5 * self.alpha**2 - y * self.alpha)
        else:
            loss_term = np.mean(np.log(1 + np.exp(-y * preds)))
            v = np.clip(y * self.alpha, 1e-12, 1 - 1e-12)
            dual_res = np.mean(v * np.log(v) + (1 - v) * np.log(1 - v))
            
        primal_obj = loss_term + 0.5 * self.lam * np.dot(self.w, self.w)
        dual_obj = -dual_res - 0.5 * self.lam * np.dot(self.w, self.w)
        return primal_obj, dual_obj
    

    def _compute_loss(self, X, y):
        pred = X @ self.w 
        if self.loss == "log":
            z = y * pred
            return np.mean(np.log(1 + np.exp(-z))) + 0.5 * self.lam * np.sum(self.w**2)
        else:
            return np.mean((y - pred) ** 2) + 0.5 * self.lam * np.sum(self.w**2)
        
        
    def predict(self, X):
        return (X @ self.w > 0).astype(int)

In [17]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo

# Fetch dataset
bank_marketing = fetch_ucirepo(id=222)
X = bank_marketing.data.features
y = (bank_marketing.data.targets == "yes").astype(int)  # convert yes/no -> 1/0

# One-hot encode categorical features
X = pd.get_dummies(X)

# Scale numeric features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y.values.ravel(), test_size=0.2, random_state=42
)

In [18]:
from sklearn.datasets import make_classification, make_regression
from sklearn.metrics import accuracy_score, mean_squared_error
import matplotlib.pyplot as plt

In [None]:
import numpy as np

lr_set = [1]
lams = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
lams.reverse()

best_loss = float('inf')
best_params = None

for lr in lr_set:
    for lam in lams:
        classifier = SDCAClassifier(loss="squared", lam=lam, verbose=0, tol=1e-6)
        classifier.fit(X_train, y_train)
        test_loss = classifier._compute_loss(X_test, y_test)
        y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"LR {lr:.1e} - Lambda {lam:.1e} - Test Loss: {test_loss:.6f}- Steps {len(classifier.losses_)} - Accuracy {accuracy}")
        
        if test_loss < best_loss:
            best_loss = test_loss
            best_params = (lr, lam) 

print("\nBest Parameters:")
print("LR:", best_params[0], "Lambda:", best_params[1], "Test Loss:", best_loss)

In [None]:
classifier = SDCAClassifier(loss="log", lam=1e-3, max_iter=5000000, verbose=1, tol=1e-5)
classifier.fit(X_train, y_train)

Running Modified-SGD (Epoch 1)...
Finnished Modified-SGD (Epoch 1)
Iter  10 | Gap: 3.12e-03 | Primal Loss: 0.616186
Iter  20 | Gap: 4.27e-04 | Primal Loss: 0.615847
Iter  30 | Gap: 1.93e-04 | Primal Loss: 0.615823
Iter  40 | Gap: 1.47e-04 | Primal Loss: 0.615818
Iter  50 | Gap: 1.30e-04 | Primal Loss: 0.615815


In [None]:
plt.figure(figsize=(6,4))
plt.plot(classifier.losses_)
plt.xlabel("Iteration (every 10 steps)")
plt.ylabel("Training Loss")
plt.title("SCDA Training Loss Curve in Log Scale")
plt.yscale('log')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.plot(classifier.losses_)
plt.xlabel("Iteration (every 10 steps)")
plt.ylabel("Training Loss")
plt.title("SCDA Training Loss Curve")
#plt.yscale('log')
plt.grid(True)
plt.show()

In [None]:
classifier.losses_

In [38]:
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7887869069998894


In [11]:
test_loss = classifier._compute_loss(X_test, y_test)
print("Test Loss:", test_loss)

AttributeError: 'SDCAClassifier' object has no attribute 'bias'