In [1]:
import numpy as np
import cupy as cp


def svm_inference(X, w, b):
    """SVM prediction of the class labels.
    Parameters
    ----------
    X : ndarray, shape (m, n)
         input features (one row per feature vector).
    w : ndarray, shape (n,)
         weight vector.
    b : float
         scalar bias.
    Returns
    -------
    ndarray, shape (m,)
        predicted labels (one per feature vector).
    ndarray, shape (m,)
        classification scores (one per feature vector).
    """
    #logits = X @ w + b
    logits=cp.add(cp.matmul(X, w),b)
    labels = (logits > 0).astype(int)
    return labels, logits


def hinge_loss(labels, logits):
    """Average hinge loss.
    Parameters
    ----------
    labels : ndarray, shape (m,)
        binary target labels (0 or 1).
    logits : ndarray, shape (m,)
        classification scores (logits).
    Returns
    -------
    float
        average hinge loss.
    """
    loss = cp.maximum(0, 1 - (2 * labels - 1) * logits)
    return loss.mean()


def svm_train(X, Y, lambda_, lr=1e-3, steps=1000, init_w=None, init_b=0, lr0=1):
    """Train a binary SVM classifier.
    Parameters
    ----------
    X : ndarray, shape (m, n)
        training features.
    Y : ndarray, shape (m,)
        binary training labels.
    lambda_ : float
        regularization coefficient.
    lr : float
        learning rate
    steps : int
        number of training steps
    init_w : ndarray, shape (n,)
        initial weights (None for zero initialization)
    init_b : float
        initial bias
    Returns
    -------
    w : ndarray, shape (n,)
        learned weight vector.
    b : float
        learned bias.
    """
    m, n = X.shape
    w = (init_w if init_w is not None else cp.zeros(n))
    b = init_b
    C = 2*Y - 1
    for step in range(steps):
        lr=lr0/(step+1)**0.5
        labels, logits = svm_inference(X, w, b)
        hinge_diff = -C * ((C * logits) < 1)
        #grad_w = (hinge_diff @ X) / m + lambda_ * w
        grad_w = (cp.multiply(cp.matmul(hinge_diff, X), 1/m) + lambda_*w)
        grad_b = hinge_diff.mean()
        w -= cp.multiply(lr, grad_w)
        b -= cp.multiply(lr, grad_b)
        if (step+1)%100==0:
            print("Step: ", step+1, ", Accuracy: ", (labels==Y).mean()*100)
    return w, b



In [2]:
import pandas as pd
n=5000
steps=2000
train_name= "train_" + str(n)
test_name="test_" + str(n)
val_name="val_" + str(n)
train_data=pd.read_csv(train_name+".gz", compression="gzip", dtype=np.int32, sep=" ", header=None).to_numpy()
val_data=pd.read_csv(val_name + ".gz", compression="gzip", dtype=np.int32, sep=" ", header=None).to_numpy()
test_data=pd.read_csv(test_name + ".gz", compression="gzip", dtype=np.int32, sep=" ", header=None).to_numpy()

In [3]:
X=cp.array(train_data[:,:-1])
Y=cp.array(train_data[:,-1])

w,b=svm_train(X,Y, lambda_=0.0001, steps=steps, lr0=1.)
#print(b)
labels,scores=svm_inference(X,w,b)
accuracy=(labels==Y).mean()*100
print("Training Accuracy: ", accuracy)

X=cp.array(val_data[:,:-1])
Y=cp.array(val_data[:,-1])
labels,logits=svm_inference(X,w,b)
accuracy=(labels==Y).mean()*100
print("Validation Accuracy: ", accuracy)

X=cp.array(test_data[:,:-1])
Y=cp.array(test_data[:,-1])
labels,scores=svm_inference(X,w,b)
accuracy=(labels==Y).mean()*100
print("Test Accuracy: ", accuracy)

Step:  100 , Accuracy:  86.688
Step:  200 , Accuracy:  87.69200000000001
Step:  300 , Accuracy:  88.112
Step:  400 , Accuracy:  88.424
Step:  500 , Accuracy:  88.612
Step:  600 , Accuracy:  88.828
Step:  700 , Accuracy:  88.952
Step:  800 , Accuracy:  89.092
Step:  900 , Accuracy:  89.22
Step:  1000 , Accuracy:  89.312
Step:  1100 , Accuracy:  89.4
Step:  1200 , Accuracy:  89.492
Step:  1300 , Accuracy:  89.524
Step:  1400 , Accuracy:  89.584
Step:  1500 , Accuracy:  89.604
Step:  1600 , Accuracy:  89.656
Step:  1700 , Accuracy:  89.69200000000001
Step:  1800 , Accuracy:  89.748
Step:  1900 , Accuracy:  89.832
Step:  2000 , Accuracy:  89.86800000000001
Training Accuracy:  89.86
Validation Accuracy:  86.976
Test Accuracy:  87.088


In [4]:
np.savetxt("svm_"+ str(n)+ "_"+ str(steps)+ "_weights.txt", cp.asnumpy(w))
f=open("svm_"+ str(n)+ "_bias.txt", "w")
f.write(str(cp.asnumpy(b)))
f.close()

In [5]:
def logreg_inference(X, w, b):
    """Predict class probabilities.
    Parameters
    ----------
    X : ndarray, shape (m, n)
         input features (one row per feature vector).
    w : ndarray, shape (n,)
         weight vector.
    b : float
         scalar bias.
    Returns
    -------
    ndarray, shape (m,)
        probability estimates (one per feature vector).
    """
    logits = cp.add(cp.matmul(X, w), b)
    return 1 / (1 + cp.exp(-logits))


def binary_cross_entropy(Y, P):
    """Average cross entropy.
    Parameters
    ----------
    Y : ndarray, shape (m,)
        binary target labels (0 or 1).
    P : ndarray, shape (m,)
        probability estimates.
    Returns
    -------
    float
        average cross entropy.
    """
    eps = 1e-3
    P = np.clip(P, eps, 1 - eps)  # This prevents overflows
    return -(Y * cp.log(P) + (1 - Y) * cp.log(1 - P)).mean()


def logreg_train(X, Y, lr=1e-3, steps=1000, init_w=None, init_b=0):
    """Train a binary classifier based on logistic regression.
    Parameters
    ----------
    X : ndarray, shape (m, n)
        training features.
    Y : ndarray, shape (m,)
        binary training labels.
    lr : float
        learning rate
    steps : int
        number of training steps
    init_w : ndarray, shape (n,)
        initial weights (None for zero initialization)
    init_b : float
        initial bias
    Returns
    -------
    w : ndarray, shape (n,)
        learned weight vector.
    b : float
        learned bias.
    """
    m, n = X.shape
    w = (init_w if init_w is not None else cp.zeros(n))
    b = init_b
    for step in range(steps):
        P = logreg_inference(X, w, b)
        grad_w = cp.multiply(cp.matmul((P - Y), X), 1/m)
        grad_b = (P - Y).mean()
        w -= cp.multiply(lr ,grad_w)
        b -= lr * grad_b
        if step%100==0:
            print("Step: ", step, ", Loss: ", binary_cross_entropy(Y,P))
    return w, b


def logreg_l2_train(X, Y, lambda_, lr=1e-3, steps=1000, init_w=None,
                    init_b=0, lr0=1):
    """Train a binary classifier based on L2-regularized logistic regression.
    Parameters
    ----------
    X : ndarray, shape (m, n)
        training features.
    Y : ndarray, shape (m,)
        binary training labels.
    lambda_ : float
        regularization coefficient.
    lr : float
        learning rate.
    steps : int
        number of training steps.
    init_w : ndarray, shape (n,)
        initial weights (None for zero initialization)
    init_b : float
        initial bias
    Returns
    -------
    w : ndarray, shape (n,)
        learned weight vector.
    b : float
        learned bias.
    """
    m, n = X.shape
    w = (init_w if init_w is not None else cp.zeros(n))
    b = init_b
    for step in range(steps):
        lr=lr0/(step+1)**0.5
        P = logreg_inference(X, w, b)
        grad_w = cp.multiply(cp.matmul((P - Y), X), 1/m)+2*lambda_*w
        grad_b = (P - Y).mean()
        w -= cp.multiply(lr ,grad_w)
        b -= lr * grad_b
        if step%1000==0:
            print("Step: ", step, ", Loss: ", binary_cross_entropy(Y,P))
    return w, b


def logreg_l1_train(X, Y, lambda_, lr=1e-3, steps=1000, init_w=None, init_b=0, lr0=1):
    """Train a binary classifier based on L1-regularized logistic regression.
    Parameters
    ----------
    X : ndarray, shape (m, n)
        training features.
    Y : ndarray, shape (m,)
        binary training labels.
    lambda_ : float
        regularization coefficient.
    lr : float
        learning rate.
    steps : int
        number of training steps.
    loss : ndarray, shape (steps,)
        loss value after each training step.
    Returns
    -------
    w : ndarray, shape (n,)
        learned weight vector.
    b : float
        learned bias.
    """
    m, n = X.shape
    w = (init_w if init_w is not None else cp.zeros(n))
    b = init_b
    for step in range(steps):
        lr=lr0/(step+1)**0.5
        P = logreg_inference(X, w, b)
        grad_w = cp.multiply(cp.matmul((P - Y), X), 1/m)+lambda_*cp.sign(w)
        grad_b = (P - Y).mean()
        w -= cp.multiply(lr ,grad_w)
        b -= lr * grad_b
        if step%100==0:
            prob=logreg_inference(X,w,b)
            pred=cp.asarray(prob>0.5)
            accuracy=(pred==Y).mean()*100
            print("Step: ", step, ", Accuracy: ", accuracy)
    return w, b

In [6]:
#from pvml import logistic_regression as lg

X=cp.array(train_data[:,:-1])
Y=cp.array(train_data[:,-1])
w,b=logreg_l1_train(X,Y, 0.,steps=steps, lr0=1)

Step:  0 , Accuracy:  67.43599999999999
Step:  100 , Accuracy:  84.664
Step:  200 , Accuracy:  85.656
Step:  300 , Accuracy:  86.16
Step:  400 , Accuracy:  86.44
Step:  500 , Accuracy:  86.712
Step:  600 , Accuracy:  86.932
Step:  700 , Accuracy:  87.092
Step:  800 , Accuracy:  87.252
Step:  900 , Accuracy:  87.38
Step:  1000 , Accuracy:  87.424
Step:  1100 , Accuracy:  87.556
Step:  1200 , Accuracy:  87.66000000000001
Step:  1300 , Accuracy:  87.76
Step:  1400 , Accuracy:  87.824
Step:  1500 , Accuracy:  87.884
Step:  1600 , Accuracy:  87.94800000000001
Step:  1700 , Accuracy:  88.0
Step:  1800 , Accuracy:  88.06
Step:  1900 , Accuracy:  88.116


In [7]:
np.savetxt("logreg_"+ str(n)+ "_"+ str(steps)+ "_weights.txt", cp.asnumpy(w))
f=open("logreg_"+ str(n)+ "_bias.txt", "w")
f.write(str(cp.asnumpy(b)))
f.close()

In [8]:
#print(b)
train_prob=(logreg_inference(X,w,b))

train_pred=cp.asarray(train_prob>0.5)
accuracy=(train_pred==Y).mean()*100
print("Training Accuracy: ", accuracy)

X=cp.array(val_data[:,:-1])
Y=cp.array(val_data[:,-1])
val_prob=logreg_inference(X,w,b)
val_pred=cp.asarray(val_prob>0.5)
accuracy=(val_pred==Y).mean()*100
print("Validation Accuracy: ", accuracy)

X=cp.array(test_data[:,:-1])
Y=cp.array(test_data[:,-1])
test_prob=logreg_inference(X,w,b)
test_pred=cp.asarray(test_prob>0.5)
accuracy=(test_pred==Y).mean()*100
print("Test Accuracy: ", accuracy)

Training Accuracy:  88.188
Validation Accuracy:  86.24000000000001
Test Accuracy:  86.304
