In [37]:
import cvxpy as cp
import numpy as np
import pandas as pd

In [38]:
def load_and_split_data(train_path="train.csv", test_path="test.csv"):
    """
    Load data from CSV files and split into training, validation, and test sets.
    
    Parameters:
    - train_path: Path to the training CSV file.
    - test_path: Path to the testing CSV file.
    
    Returns:
    - train_data, train_labels: Training data and labels.
    - val_data, val_labels: Validation data and labels.
    - test_data, test_labels: Test data and labels.
    """
    
    # Load train.csv and split
    train_df = pd.read_csv(train_path, header=None)

    train_data = train_df.iloc[:4000, 1:].values
    train_labels = train_df.iloc[:4000, 0].values

    # print("The first 10 labels in the training set are: ", train_labels[:10])
    # print("The first 10 features in the training set are: ", train_data[:10])    
    
    val_data = train_df.iloc[4000:, 1:].values
    val_labels = train_df.iloc[4000:, 0].values
    
    # Load test.csv
    test_df = pd.read_csv(test_path, header=None)
    test_data = test_df.iloc[:, 1:].values
    test_labels = test_df.iloc[:, 0].values

    test_labels = test_labels * 2 - 1
    val_labels = val_labels * 2 - 1
    train_labels = train_labels * 2 - 1
    
    return train_data, train_labels, val_data, val_labels, test_data, test_labels

# Load data
train_data, train_labels, val_data, val_labels, test_data, test_labels = load_and_split_data()



# Q2

In [39]:
def svm_train_primal(data_train, label_train, regularisation_para_C):    
    N, d = data_train.shape
    w = cp.Variable(d)
    b = cp.Variable()
    xi = cp.Variable(N)
    
    # Objective function
    objective = cp.Minimize(0.5 * cp.norm(w,2)**2 + (regularisation_para_C/N) * cp.sum(xi))
    
    # Constraints
    constraints = [cp.multiply(label_train, data_train @ w + b) >= 1 - xi, xi >= 0]
    
    # Solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve()

    svm_model = {'w': w.value, 'b': b.value}
    
    return svm_model


In [40]:
def svm_predict_primal(data_test, label_test, svm_model):
    # Extract w and b from the model
    w = svm_model['w']
    b = svm_model['b']

    # Predict
    preds = np.sign(np.dot(data_test, w) + b)
    
    # Calculate accuracy
    test_accuracy = np.mean(preds == label_test)
    return test_accuracy


In [41]:
svm_model = svm_train_primal(train_data, train_labels, 100)
#print b and sum of w
print(f"b: {svm_model['b']}")
print(f"sum of w: {np.sum(svm_model['w'])}")
#test for the accuraccy
test_accuracy = svm_predict_primal(test_data, test_labels, svm_model)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

b: 1.779813717087077
sum of w: -0.1452156803361282
Test Accuracy: 96.80%


# Q3

In [None]:
def svm_train_dual(data_train, label_train, regularisation_para_C):
    # regularization_term = 0
    N,_ = data_train.shape
    alpha = cp.Variable((N,1))

    # Reshape label_train to be a column vector
    label_train = label_train.reshape(-1, 1)
    mult = label_train * data_train

    mult_transpose = mult.T

    # Construct the kernel (gram matrix) with regularization\
    K = cp.sum_squares(mult_transpose @ alpha)
    
    # Objective
    objective = cp.Minimize(-1 * cp.sum(alpha) + K)
    
    # Constraints
    constraints = [
        cp.sum(cp.multiply(alpha, label_train)) == 0,
        alpha >= 0,
        alpha <= regularisation_para_C / N
    ]
    
    # Solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve()
    
    return alpha.value


# Q4

In [None]:
def compute_primal_from_dual(alpha, data_train, label_train, regularisation_para_C):
    """
    Compute the primal SVM solution (w*, b*) from the dual solution alpha*.

    Parameters:
    - alpha: Dual solution (N,)
    - data_train: Training data matrix (N x D)
    - label_train: Training labels (N,)
    - regularisation_para_C: Regularization parameter C

    Returns:
    - w: Weight vector (D,)
    - b: Bias scalar
    """
    # Compute w* from alpha*
    w = np.sum((alpha * label_train)[:, None] * data_train, axis=0)

    # Compute b* using a support vector
    # Find a support vector index (any example where 0 < alpha < C/N)
    support_vector_indices = np.where((alpha > 1e-5) & (alpha < (regularisation_para_C / data_train.shape[0])))[0]
    if len(support_vector_indices) > 0:
        sv_index = support_vector_indices[0]
        b = label_train[sv_index] - np.dot(w, data_train[sv_index])
    else:
        b = 0

    return w, b

# Q5

In [None]:
def find_support_vectors_from_primal(data_train, label_train, svm_model):
    """
    Find the support vectors based on the primal SVM model.

    Parameters:
    - data_train: Training data matrix (N x D)
    - label_train: Training labels (N,)
    - svm_model: Trained SVM model with 'w' and 'b' as keys

    Returns:
    - support_vectors: Matrix of support vectors (S x D)
    - support_vector_labels: Labels of the support vectors (S,)
    - support_vector_indices: Indices of the support vectors in the training data
    """
    # Extract w and b from the model
    w = svm_model['w']
    b = svm_model['b']

    # Compute decision values
    decision_values = label_train * (np.dot(data_train, w) + b)
    
    # Identify support vectors
    support_vector_indices = np.where(decision_values <= 1)[0]
    support_vectors = data_train[support_vector_indices]
    support_vector_labels = label_train[support_vector_indices]

    return support_vectors, support_vector_labels, support_vector_indices

# Test the function using the provided SVM model (assuming it's available in the notebook)
# This step assumes that the variables train_data, train_labels, and svm_model are already loaded.
# svm_model = {"w": w, "b": b}
# support_vectors, support_vector_labels, support_vector_indices = find_support_vectors_from_primal(train_data, train_labels, svm_model)
# len(support_vector_indices)  # This will display the number of support vectors found


# Q6

In [None]:
def find_support_vectors_from_dual(data_train, label_train, alpha_values, threshold=1e-5):
    """
    Find the support vectors based on the dual SVM solution.

    Parameters:
    - data_train: Training data matrix (N x D)
    - label_train: Training labels (N,)
    - alpha_values: Values of the Lagrange multipliers from the dual solution (N,)
    - threshold: Threshold for considering a data point as a support vector

    Returns:
    - support_vectors: Matrix of support vectors (S x D)
    - support_vector_labels: Labels of the support vectors (S,)
    - support_vector_indices: Indices of the support vectors in the training data
    - support_vector_alphas: Values of the Lagrange multipliers for the support vectors
    """
    # Identify support vectors based on alpha values
    support_vector_indices = np.where(alpha_values > threshold)[0]
    support_vectors = data_train[support_vector_indices]
    support_vector_labels = label_train[support_vector_indices]
    support_vector_alphas = alpha_values[support_vector_indices]

    return support_vectors, support_vector_labels, support_vector_indices, support_vector_alphas


## Q7 (BEST C IS 4)

In [None]:
def select_best_C(train_data, train_labels, val_data, val_labels):
    """
    Choose the best value of C using the validation set.

    Parameters:
    - train_data: Training data matrix
    - train_labels: Training labels
    - val_data: Validation data matrix
    - val_labels: Validation labels

    Returns:
    - best_C: Optimal value of C
    - best_accuracy: Accuracy on the validation set using the best C
    """
    C_values = [2**i for i in range(-2, 11)]
    best_accuracy = -0.1
    best_C = None
    
    for C in C_values:
        # print("Training for C={}".format(C))
        # Train SVM using the current value of C
        svm_model = svm_train_primal(train_data, train_labels, C)
        # print(svm_model)

        val_preds = svm_predict_primal(val_data, val_labels, svm_model)
        # print(f"PREDICTIONS: {val_preds[:30]}")
        
        # Compute accuracy on the validation set
        accuracy = np.mean(val_preds == val_labels)
        # print("Accuracy for C={}: {}".format(C, accuracy))
        
        if accuracy > best_accuracy:
            # print("Found better value of C: {} with accuracy: {}".format(C, accuracy))
            best_accuracy = accuracy
            best_C = C
            
    return best_C, best_accuracy

# Using the above function
best_C, best_val_accuracy = select_best_C(train_data, train_labels, val_data, val_labels)
print(f"Best C: {best_C}, Validation Accuracy: {best_val_accuracy * 100:.2f}%")

# Train the SVM using the best C and report test accuracy
svm_model = svm_train_primal(train_data, train_labels, best_C)

test_preds = svm_predict_primal(test_data, test_labels, svm_model)
test_accuracy = np.mean(test_preds == test_labels)
print(f"Test Accuracy using best C: {test_accuracy * 100:.2f}%")

# Q8

In [None]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
def sklearn_svm():

    #CREAT A SVC model, with C= 4
    svc_model = Pipeline([('svc', SVC(C=4, kernel='linear'))])

    #fit the model using the pipeline
    svc_model.fit(train_data, train_labels)

    #predict the model
    svc_preds = svc_model.predict(test_data)

    #compute the accuracy
    svc_accuracy = np.mean(svc_preds == test_labels)
    print(f"Test Accuracy using best C: {svc_accuracy * 100:.2f}%")
    return svc_accuracy

# Q8

In [None]:
sklearn_svm()

# Q7

In [None]:
# Using the above function
best_C, best_val_accuracy = select_best_C(train_data, train_labels, val_data, val_labels)
print(f"Best C: {best_C}, Validation Accuracy: {best_val_accuracy * 100:.2f}%")

# Train the SVM using the best C and report test accuracy
svm_model = svm_train_primal(train_data, train_labels, best_C)

test_preds = svm_predict_primal(test_data, test_labels, svm_model)
test_accuracy = np.mean(test_preds == test_labels)
print(f"Test Accuracy using best C: {test_accuracy * 100:.2f}%")

# Q3

In [33]:
alpha = svm_train_dual(train_data, train_labels, 100)
print("Dual: sum of alpha: ", np.sum(alpha))

Dual: sum of alpha:  6.469633954792531




# Q4

In [None]:
w, b = compute_primal_from_dual(alpha, train_data, train_labels, 100)
print("Primal from Dual:\tsum of w: ", np.sum(w))
print("Primal from Dual:\tb: ", b)

# Q2

In [None]:
svm_model = svm_train_primal(train_data, train_labels, 100)
print("Primal straight away:\tsum of w: ", np.sum(svm_model['w']))
print("Primal straight away:\tb: ", svm_model['b'])
#test for the accuraccy
test_preds = svm_predict_primal(test_data, test_labels, svm_model)
print("Primal straight away:\ttest accuracy: ", np.mean(test_preds == test_labels))

# Q5

In [None]:
support_vectors, support_vector_labels, support_vector_indices = find_support_vectors_from_primal(train_data, train_labels, svm_model)
print("Number of support vectors: ", len(support_vector_indices))
print("Support vector indices: ", support_vector_indices)

# Q6

In [None]:
support_vectors, support_vector_labels, support_vector_indices, support_vector_alphas = find_support_vectors_from_dual(train_data, train_labels, alpha)
print("Number of support vectors: ", len(support_vector_indices))
print("Support vector indices: ", support_vector_indices)