In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [54]:
data = pd.read_csv('./biodeg.csv', header=None,sep = ';')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,3.919,2.6909,0,0,0,0,0,31.4,2,0,...,0,0,0,2.949,1.591,0,7.253,0,0,RB
1,4.170,2.1144,0,0,0,0,0,30.8,1,1,...,0,0,0,3.315,1.967,0,7.257,0,0,RB
2,3.932,3.2512,0,0,0,0,0,26.7,2,4,...,0,0,1,3.076,2.417,0,7.601,0,0,RB
3,3.000,2.7098,0,0,0,0,0,20.0,0,2,...,0,0,1,3.046,5.000,0,6.690,0,0,RB
4,4.236,3.3944,0,0,0,0,0,29.4,2,4,...,0,0,0,3.351,2.405,0,8.003,0,0,RB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050,5.431,2.8955,0,0,0,2,0,32.1,4,1,...,0,6,1,3.573,2.242,1,8.088,0,0,NRB
1051,5.287,3.3732,0,0,9,0,0,35.3,0,9,...,0,3,0,3.787,3.083,3,9.278,0,0,NRB
1052,4.869,1.7670,0,1,9,0,5,44.4,0,4,...,4,13,0,3.848,2.576,5,9.537,1,0,NRB
1053,5.158,1.6914,2,0,36,0,9,56.1,0,0,...,1,16,0,5.808,2.055,8,11.055,0,1,NRB


In [55]:
y = data.iloc[:,-1:]
X = data.iloc[:, :-1]
y = y[41]

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

In [180]:
C_values = [0.001,0.01,0.1,1,10,100,1000]
lin_acc = []
rbf_acc = []
poly_acc = []
for i in C_values:
    clf = svm.SVC(kernel='linear',C=i)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    lin_acc.append(clf.score(X_test,y_test))
    
    clf = svm.SVC(kernel='rbf',C=i)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    rbf_acc.append(clf.score(X_test,y_test))
    
    clf = svm.SVC(kernel='poly',degree = 2,C=i)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    poly_acc.append(clf.score(X_test,y_test))
    

In [193]:
best_acc = max(max(lin_acc),max(rbf_acc),max(poly_acc))
best_acc

0.8767772511848341

In [202]:
print("Linear Kernel" )
print(lin_acc)
print("Rbf Kernel" )
print(rbf_acc)
print("Poly_ Kernel" )
print(poly_acc)

Linear Kernel
[0.8151658767772512, 0.8483412322274881, 0.8672985781990521, 0.8672985781990521, 0.8720379146919431, 0.8625592417061612, 0.8530805687203792]
Rbf Kernel
[0.6445497630331753, 0.6445497630331753, 0.6445497630331753, 0.8104265402843602, 0.8720379146919431, 0.8767772511848341, 0.8672985781990521]
Poly_ Kernel
[0.6445497630331753, 0.6445497630331753, 0.6445497630331753, 0.7914691943127962, 0.8578199052132701, 0.8578199052132701, 0.8672985781990521]


# Part 2

In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import optim
import random

In [30]:
class set_up_data(Dataset):
    def __init__(self,filename):
        data_matrix = filename.values
        self.data = data_matrix[:,:-1]
        self.target = data_matrix[:,-1:]
        self.n_samples = self.data.shape[0]
        
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self,index):
        return self.data[index],self.target[index]
        

In [57]:
for i in range(data.shape[0]):
    if data[41][i]=='RB':
        data.at[i,41] = 1.0
    else:
        data.at[i,41] = 0.0
        
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,3.919,2.6909,0,0,0,0,0,31.4,2,0,...,0,0,0,2.949,1.591,0,7.253,0,0,1
1,4.170,2.1144,0,0,0,0,0,30.8,1,1,...,0,0,0,3.315,1.967,0,7.257,0,0,1
2,3.932,3.2512,0,0,0,0,0,26.7,2,4,...,0,0,1,3.076,2.417,0,7.601,0,0,1
3,3.000,2.7098,0,0,0,0,0,20.0,0,2,...,0,0,1,3.046,5.000,0,6.690,0,0,1
4,4.236,3.3944,0,0,0,0,0,29.4,2,4,...,0,0,0,3.351,2.405,0,8.003,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050,5.431,2.8955,0,0,0,2,0,32.1,4,1,...,0,6,1,3.573,2.242,1,8.088,0,0,0
1051,5.287,3.3732,0,0,9,0,0,35.3,0,9,...,0,3,0,3.787,3.083,3,9.278,0,0,0
1052,4.869,1.7670,0,1,9,0,5,44.4,0,4,...,4,13,0,3.848,2.576,5,9.537,1,0,0
1053,5.158,1.6914,2,0,36,0,9,56.1,0,0,...,1,16,0,5.808,2.055,8,11.055,0,1,0


In [58]:
train_data,test_data = train_test_split(data,test_size=0.2)
in_feat = (train_data.shape[1]-1)
train_data = set_up_data(train_data)
train_dl = DataLoader(train_data,256,shuffle=True)

In [59]:
class my_model(nn.Module):
    def __init__(self,h_sizes,n_out=1):
        super(my_model, self).__init__()
        self.layers = nn.ModuleList()
        for k in range(len(h_sizes)-1):
            self.layers.append(nn.Linear(h_sizes[k], h_sizes[k+1]))
        
        self.out = nn.Linear(h_sizes[-1], n_out)
        
    def forward(self,x):
        for layer in self.layers:
            x = F.relu(layer(x))
        output= F.sigmoid(self.out(x))

        return output


In [45]:
model1 = [in_feat,2]
model = my_model(model1)
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
criterion=nn.BCELoss()
no_of_epochs = 20
for epoch in range(no_of_epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    i = 0
    
    for info,labels in train_dl:
        dummy_labels = labels.float()
        dummy_labels = dummy_labels.view(-1,1)
        output = model(info)
        loss = criterion(ouput,dummy_labels)
        epoch_loss += loss
        accuracy = ((output > 0.5).float() == dummy_labels).float().mean()
        epoch_accuracy += accuracy
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    