In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tabulate import tabulate

In [2]:
data = load_breast_cancer()
X, y = data.data, data.target

In [3]:
column_labels = np.append(data.feature_names, ['target'])
df = pd.DataFrame(np.column_stack((X, y)), columns=column_labels)
print("First 4 rows of the labeled dataset:")
print(tabulate(df.head(4), headers='keys', tablefmt='psql', showindex=False))

First 4 rows of the labeled dataset:
+---------------+----------------+------------------+-------------+-------------------+--------------------+------------------+-----------------------+-----------------+--------------------------+----------------+-----------------+-------------------+--------------+--------------------+---------------------+-------------------+------------------------+------------------+---------------------------+----------------+-----------------+-------------------+--------------+--------------------+---------------------+-------------------+------------------------+------------------+---------------------------+----------+
|   mean radius |   mean texture |   mean perimeter |   mean area |   mean smoothness |   mean compactness |   mean concavity |   mean concave points |   mean symmetry |   mean fractal dimension |   radius error |   texture error |   perimeter error |   area error |   smoothness error |   compactness error |   concavity error |   concave point

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
def proximal_svm(X, y, lambda_val, max_iter=100, alpha=0.01):
    m, n = X.shape
    theta = np.zeros(n)
    for _ in range(max_iter):
        margin = y * np.dot(X, theta)
        hinge_loss = np.maximum(0, 1 - margin)
        gradient = -np.dot(X.T, y * (margin < 1))
        theta -= alpha * (gradient + lambda_val * np.sign(theta))
    return theta

In [7]:
lambda_val = 0.1
theta = proximal_svm(X_train, y_train, lambda_val)

In [8]:
y_pred = np.sign(np.dot(X_test, theta))
y_pred

array([ 1., -1., -1.,  1.,  1., -1., -1., -1., -1.,  1.,  1., -1.,  1.,
       -1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
        1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,
        1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1.,
       -1.,  1.,  1.,  1.,  1., -1., -1.,  1.,  1., -1., -1.,  1.,  1.,
        1., -1., -1.,  1.,  1., -1., -1.,  1., -1.,  1.,  1.,  1., -1.,
        1.,  1., -1.,  1., -1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,
       -1.,  1.,  1.,  1.,  1., -1., -1.,  1., -1., -1.,  1., -1., -1.,
        1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.])

In [9]:
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)


Accuracy: 0.5964912280701754
