In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing

device = torch.device("mps")

In [6]:
def hingeLoss(x, y):
    return torch.max(torch.zeros_like(y), 1-y*x).mean()

### Create Dataset

In [33]:
data = load_breast_cancer()
X, Y = data.data, data.target
X = preprocessing.normalize(X)
print(X.shape)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=100, random_state=2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=100, random_state=1)

(569, 30)


### SVM

In [34]:
class SVM(torch.nn.Module):
    def __init__(self, X, kernel='linear', gamma=1.0, gammaGrad=True, p=2):
        super().__init__()
        assert kernel in ['linear', 'rbf', 'poly']
        self.X = torch.FloatTensor(X)
        
        if kernel == 'linear':
            self._kernel = self.linear
            self._num_c = self.X.shape[1]
        
        elif kernel == 'rbf':
            self._kernel = self.rbf
            self._num_c = self.X.shape[0]
            self._gamma = torch.nn.Parameter(torch.FloatTensor([gamma]),
                                             requires_grad=gammaGrad)
        
        elif kernel == 'poly':
            self._p=p
            self._kernel = self.poly
            self._num_c = self.X.shape[0]
           
        else:
            assert False
            
        self._w = torch.nn.Linear(in_features=self._num_c, out_features=1)

    def rbf(self, x):
        y = self.X.repeat(x.size(0), 1, 1)
        return torch.exp(-self._gamma*((x[:,None]-y)**2).sum(dim=2))

    def poly(self, x, c=1):
        y = self.X.repeat(x.size(0), 1, 1)
        return ((x@(self.X.T))+c) ** self._p
    
    @staticmethod
    def linear(x):
        return x

    def forward(self, x):
        y = self._kernel(x)
        y = self._w(y)
        return y
    
    def train(self, x, y, X_val, y_val, epochs=200, lambda_reg=0):
        x = torch.FloatTensor(x)
        y = 2*y-1
        y = torch.FloatTensor(y)
        X_val = torch.FloatTensor(X_val)
        y_val = 2*y_val-1
        valat=epochs/10
        optim=torch.optim.SGD(self.parameters(), lr=0.01)
        for i in range(epochs):
            optim.zero_grad()
            pred=self(x)
            loss=hingeLoss(pred,y.unsqueeze(1))+lambda_reg*torch.norm(self._w.weight,1)
            loss.backward()
            optim.step()
            ypred=self(X_val).detach()
            ypred=np.sign(ypred.transpose(1,0).numpy().reshape(y_val.shape))
            if i%valat==0:
                print("Validation Accuracy at epoch",i,":",accuracy_score(y_val, ypred))
        print("Training Loss is :",loss.item())
    
    def test(self, x_test, y_test):
        x_test = torch.FloatTensor(x_test)
        y_test = 2*y_test-1
        ypred=self(x_test).detach()
        ypred=np.sign(ypred.transpose(1,0).numpy().reshape(y_test.shape))
        print("Test Accuracy:", accuracy_score(y_test,ypred))

In [35]:
linSVM=SVM(X, kernel='linear')
linSVM.train(X_train, y_train, X_val, y_val, epochs=1000)
linSVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.56
Validation Accuracy at epoch 100 : 0.56
Validation Accuracy at epoch 200 : 0.56
Validation Accuracy at epoch 300 : 0.56
Validation Accuracy at epoch 400 : 0.56
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 600 : 0.56
Validation Accuracy at epoch 700 : 0.56
Validation Accuracy at epoch 800 : 0.56
Validation Accuracy at epoch 900 : 0.56
Training Loss is : 0.6992371678352356
Test Accuracy: 0.62


In [43]:
polySVM=SVM(X_train, kernel='poly', p=5)
polySVM.train(X_train, y_train, X_val, y_val, epochs=10000)
polySVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.44
Validation Accuracy at epoch 1000 : 0.44
Validation Accuracy at epoch 2000 : 0.56
Validation Accuracy at epoch 3000 : 0.64
Validation Accuracy at epoch 4000 : 0.85
Validation Accuracy at epoch 5000 : 0.69
Validation Accuracy at epoch 6000 : 0.69
Validation Accuracy at epoch 7000 : 0.71
Validation Accuracy at epoch 8000 : 0.74
Validation Accuracy at epoch 9000 : 0.79
Training Loss is : 72.92414855957031
Test Accuracy: 0.76


In [37]:
rbfSVM=SVM(X_train, kernel='rbf')
rbfSVM.train(X_train , y_train, X_val, y_val, epochs=10000)
rbfSVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.56
Validation Accuracy at epoch 1000 : 0.63
Validation Accuracy at epoch 2000 : 0.88
Validation Accuracy at epoch 3000 : 0.91
Validation Accuracy at epoch 4000 : 0.9
Validation Accuracy at epoch 5000 : 0.88
Validation Accuracy at epoch 6000 : 0.88
Validation Accuracy at epoch 7000 : 0.89
Validation Accuracy at epoch 8000 : 0.89
Validation Accuracy at epoch 9000 : 0.9
Training Loss is : 0.23805198073387146
Test Accuracy: 0.89


### Logistic Regression

In [38]:
class LogReg(torch.nn.Module):
    def __init__(self, X,kernel='linear', gamma=1.0, gammaGrad=True, p=2):
        super().__init__()
        assert kernel in ['linear', 'rbf', 'poly']
        self.X = torch.FloatTensor(X)
        
        if kernel == 'linear':
            self._kernel = self.linear
            self._num_c = self.X.shape[1]

        elif kernel == 'rbf':
            self._kernel = self.rbf
            self._num_c = self.X.shape[0]
            self._gamma = torch.nn.Parameter(torch.FloatTensor([gamma]),
                                             requires_grad=gammaGrad)
        
        elif kernel == 'poly':
            self._p=p
            self._kernel = self.poly
            self._num_c = self.X.shape[0] 
           
        else:
            assert False
            
        self._w = torch.nn.Linear(in_features=self._num_c, out_features=1)

    def rbf(self, x):
        y = self.X.repeat(x.size(0), 1, 1)
        return torch.exp(-self._gamma*((x[:,None]-y)**2).sum(dim=2))

    def poly(self, x, c=1):
        y = self.X.repeat(x.size(0), 1, 1)
        return ((x@(self.X.T))+c) ** self._p     
    
    @staticmethod
    def linear(x):
        return x

    def forward(self, x):
        y = self._kernel(x)
        y = self._w(y)
        #For Logistic we use Sigmoid
        return torch.sigmoid(y)
    
    def train(self, x, y, X_val, y_val, epochs=200):
        x = torch.FloatTensor(x)
        y = torch.FloatTensor(y)
        X_val = torch.FloatTensor(X_val)

        valat=epochs/10
        optim=torch.optim.SGD(self.parameters(),lr=0.01)
        criterion = torch.nn.BCELoss()
        
        for i in range(epochs):
            optim.zero_grad()
            pred=self(x)
            loss=criterion(pred,y.unsqueeze(1))
            loss.backward()
            optim.step()
            if i%valat==0:
                ypred=self(X_val).detach()
                ypred=ypred.transpose(1,0).reshape(y_val.shape).numpy()
                ypred=np.asarray(ypred>=0.5,dtype=np.int64)
                print("Validation Accuracy at epoch",i,":",accuracy_score(y_val,ypred))
        print("Training Loss is :",loss.item())
        
    def test(self, x_test, y_test):
        x_test = torch.FloatTensor(x_test)
        ypred=self(x_test).detach()
        ypred=ypred.transpose(1,0).reshape(y_test.shape).numpy()
        ypred=np.asarray(ypred>=0.5,dtype=np.int64)
        print("Test Accuracy:",accuracy_score(y_test,ypred))


In [39]:
linLog=LogReg(X_train,kernel='linear')
linLog.train(X_train, y_train, X_val, y_val, epochs=1000)
linLog.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.44
Validation Accuracy at epoch 100 : 0.56
Validation Accuracy at epoch 200 : 0.56
Validation Accuracy at epoch 300 : 0.56
Validation Accuracy at epoch 400 : 0.56
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 600 : 0.56
Validation Accuracy at epoch 700 : 0.56
Validation Accuracy at epoch 800 : 0.56
Validation Accuracy at epoch 900 : 0.56
Training Loss is : 0.6455172300338745
Test Accuracy: 0.62


In [40]:
polyLog=LogReg(X_train, kernel='poly', p=10)
polyLog.train(X_train, y_train, X_val, y_val, epochs=1000)
polyLog.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.56
Validation Accuracy at epoch 100 : 0.56
Validation Accuracy at epoch 200 : 0.56
Validation Accuracy at epoch 300 : 0.56
Validation Accuracy at epoch 400 : 0.56
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 600 : 0.56
Validation Accuracy at epoch 700 : 0.56
Validation Accuracy at epoch 800 : 0.56
Validation Accuracy at epoch 900 : 0.56
Training Loss is : 35.230350494384766
Test Accuracy: 0.62


In [41]:
rbfLog=LogReg(X_train, kernel='rbf')
rbfLog.train(X_train, y_train, X_val, y_val, epochs=1000)
rbfLog.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.56
Validation Accuracy at epoch 100 : 0.56
Validation Accuracy at epoch 200 : 0.56
Validation Accuracy at epoch 300 : 0.56
Validation Accuracy at epoch 400 : 0.56
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 600 : 0.56
Validation Accuracy at epoch 700 : 0.56
Validation Accuracy at epoch 800 : 0.56
Validation Accuracy at epoch 900 : 0.56
Training Loss is : 0.6233896017074585
Test Accuracy: 0.62


### KNN

In [44]:
for i in range(1, 10):
    knn = KNeighborsRegressor(n_neighbors=i, weights='distance')
    knn.fit(X_train, y_train)
    pred_i = np.round(knn.predict(X_val),decimals=0)
    mae = accuracy_score(y_val, pred_i)
    print(f'Accuracy for K={i} is {round(mae,5)}')
knn = KNeighborsRegressor(n_neighbors=6, weights='distance')
knn.fit(X_train, y_train)
pred_i = np.round(knn.predict(X_test),decimals=0)
mae = accuracy_score(y_test, pred_i)
print(f'Test Accuracy for K=6 is {round(mae,5)}')

Accuracy for K=1 is 0.96
Accuracy for K=2 is 0.96
Accuracy for K=3 is 0.95
Accuracy for K=4 is 0.97
Accuracy for K=5 is 0.96
Accuracy for K=6 is 0.97
Accuracy for K=7 is 0.95
Accuracy for K=8 is 0.94
Accuracy for K=9 is 0.95
Test Accuracy for K=6 is 0.9


### Neural Network

In [58]:

train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
test_dataset = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
train_dataloader = DataLoader(train_dataset, batch_size=32)
val_dataloader = DataLoader(val_dataset, batch_size=32)
test_dataloader = DataLoader(test_dataset, batch_size=32)

def train(model, epochs):
    losses = []
    val_losses = []
    max_epochs = epochs
    loss_function = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())
    for epoch in range(max_epochs):
        epochloss=0
        val_epochloss=0
        model.train()
        for X_batch, y_batch in train_dataloader:
            optimizer.zero_grad()
            outp = model(X_batch)
            loss = loss_function(outp.flatten(), y_batch)
            loss.backward()
            epochloss = epochloss+loss.detach().flatten()[0]/len(X_batch)
            optimizer.step()
        losses.append(epochloss/len(train_dataloader))
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_dataloader:
                outp_val = model(X_val_batch).detach()
                loss_val = loss_function(outp_val.flatten(), y_val_batch)
                val_epochloss = val_epochloss+loss_val.detach().flatten()[0]/len(X_val_batch)
            val_losses.append(val_epochloss/len(val_dataloader))
        print(f"Epoch: {epoch+1}, Train Loss: {epochloss}, Val Loss: {val_epochloss}")
    return model, losses, val_losses

def predict(dataloader, model):
    model.eval()
    predictions = np.array([])
    for x_batch, _ in dataloader:
        outp = model(x_batch)
        probs = torch.sigmoid(outp)
        preds = (probs > 0.5).type(torch.long)
        predictions = np.hstack((predictions, preds.numpy().flatten()))
    predictions = predictions
    return predictions.flatten()

In [59]:
model = nn.Sequential(
    nn.Linear(30, 256),
    nn.ReLU(),
    nn.Linear(256, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)

model, losses, val_losses = train(model, 500)
accuracy = accuracy_score(y_test, predict(test_dataloader, model))

print("Test Accuracy:",accuracy)

Epoch: 1, Train Loss: 0.2793208062648773, Val Loss: 0.23421035706996918
Epoch: 2, Train Loss: 0.27313175797462463, Val Loss: 0.22713787853717804
Epoch: 3, Train Loss: 0.26694124937057495, Val Loss: 0.21735507249832153
Epoch: 4, Train Loss: 0.26197466254234314, Val Loss: 0.21129542589187622
Epoch: 5, Train Loss: 0.26066654920578003, Val Loss: 0.21037116646766663
Epoch: 6, Train Loss: 0.2595447599887848, Val Loss: 0.21050210297107697
Epoch: 7, Train Loss: 0.2584787607192993, Val Loss: 0.210190549492836
Epoch: 8, Train Loss: 0.25733107328414917, Val Loss: 0.20938587188720703
Epoch: 9, Train Loss: 0.2559090852737427, Val Loss: 0.2084861844778061
Epoch: 10, Train Loss: 0.2540963888168335, Val Loss: 0.20754404366016388
Epoch: 11, Train Loss: 0.2517347037792206, Val Loss: 0.2063717544078827
Epoch: 12, Train Loss: 0.24851948022842407, Val Loss: 0.2047843039035797
Epoch: 13, Train Loss: 0.2439887672662735, Val Loss: 0.20258092880249023
Epoch: 14, Train Loss: 0.23741744458675385, Val Loss: 0.199

### Regulazised SVM

In [60]:
linSVM=SVM(X, kernel='linear')
linSVM.train(X_train, y_train, X_val, y_val, epochs=1000, lambda_reg=0.03)
linSVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.44
Validation Accuracy at epoch 100 : 0.56
Validation Accuracy at epoch 200 : 0.56
Validation Accuracy at epoch 300 : 0.56
Validation Accuracy at epoch 400 : 0.56
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 600 : 0.56
Validation Accuracy at epoch 700 : 0.56
Validation Accuracy at epoch 800 : 0.56
Validation Accuracy at epoch 900 : 0.56
Training Loss is : 0.7200688719749451
Test Accuracy: 0.62


In [None]:
w=(linSVM._w.weight)/0.01
w=w.detach().numpy()
print(w[0][np.where(w>0.01)[1]])
data.feature_names[np.where(w>0.01)[1]]

[1.7347666e-02 4.1757957e+01 1.7208124e-02 1.2365642e-02 1.3283963e-02
 1.6796706e-02 2.8829953e-02 1.9389622e-02 1.8899956e+01 2.1611920e-02
 2.1470062e-02]


array(['mean perimeter', 'mean area', 'mean smoothness', 'mean concavity',
       'texture error', 'concavity error', 'concave points error',
       'worst perimeter', 'worst area', 'worst concavity',
       'worst fractal dimension'], dtype='<U23')

In [70]:
polySVM=SVM(X_train, kernel='poly', p=7)
polySVM.train(X_train, y_train, X_val, y_val, epochs=5000, lambda_reg=0.03)
polySVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.44
Validation Accuracy at epoch 500 : 0.56
Validation Accuracy at epoch 1000 : 0.44
Validation Accuracy at epoch 1500 : 0.63
Validation Accuracy at epoch 2000 : 0.56
Validation Accuracy at epoch 2500 : 0.9
Validation Accuracy at epoch 3000 : 0.81
Validation Accuracy at epoch 3500 : 0.78
Validation Accuracy at epoch 4000 : 0.79
Validation Accuracy at epoch 4500 : 0.81
Training Loss is : 734.1683349609375
Test Accuracy: 0.78


In [68]:
rbfSVM=SVM(X_train, kernel='rbf', gamma=15)
rbfSVM.train(X_train , y_train, X_val, y_val, epochs=1000, lambda_reg=0.03)
rbfSVM.test(X_test, y_test)

Validation Accuracy at epoch 0 : 0.56
Validation Accuracy at epoch 100 : 0.69
Validation Accuracy at epoch 200 : 0.77
Validation Accuracy at epoch 300 : 0.8
Validation Accuracy at epoch 400 : 0.82
Validation Accuracy at epoch 500 : 0.82
Validation Accuracy at epoch 600 : 0.82
Validation Accuracy at epoch 700 : 0.85
Validation Accuracy at epoch 800 : 0.85
Validation Accuracy at epoch 900 : 0.85
Training Loss is : 0.6085423231124878
Test Accuracy: 0.91
