# 2a_SVM

In [59]:
from sklearn import svm 
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit, GridSearchCV
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt 

df = pd.read_csv(r'./dataset/train.csv')
train = df.to_numpy()
ytrain = train[:5000, 0]
xtrain = train[:5000, 1:]

df = pd.read_csv(r'./dataset/test.csv')
test = df.to_numpy()
ytest = test[:500,0]
xtest = test[:500,1:]

In [60]:
def getOptimalSVM(kernel, x, y):
    
    #Search for best parameters
    C_range = np.logspace(-2, 12, 15)
    gamma_range = np.logspace(-9, 5, 15)
    param_grid = dict(gamma=gamma_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=4, test_size=0.25)
    grid = GridSearchCV(svm.SVC(kernel=kernel), param_grid=param_grid, cv=cv).fit(x, y)
    
    #Check the results of the Cross validation search
    results = grid.cv_results_
    for avg, C, gamma in zip(results['mean_test_score'], results['param_C'], results['param_gamma']):
        print(f"{kernel.upper():}(C={C}, gamma={gamma}): Average accuracy was: {avg}")


    print(f"{kernel.upper()}: The best parameters are %s with a score of %0.5f \n" % (grid.best_params_, grid.best_score_))
        
    #Return the best svm 
    return svm.SVC(kernel=kernel, C=grid.best_params_['C'], gamma=grid.best_params_['gamma']).fit(xtrain,ytrain)



In [61]:
def calculateAccuracy(x, y, kernel, clf):
    predictions = clf.predict(x)
    print(f"Accuracy for \'{kernel}\':{accuracy_score(y, predictions)}")

In [63]:
linearSVM = getOptimalSVM('linear', xtrain, ytrain)
rbfSVM = getOptimalSVM('rbf', xtrain, ytrain)

LINEAR(C=0.01, gamma=1e-09): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1e-08): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1e-07): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1e-06): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1e-05): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=0.0001): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=0.001): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=0.01): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=0.1): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1.0): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=10.0): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=100.0): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=1000.0): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=10000.0): Average accuracy was: 0.9106
LINEAR(C=0.01, gamma=100000.0): Average accuracy was: 0.9106
LINEAR(C=0.1, gamma=1e-09): Average accuracy was: 0.9106
LINEAR(C=0.1, gamma=1e-08): Average accuracy was: 0.9106
LINEAR(C=0.1, g

RBF(C=0.01, gamma=1e-09): Average accuracy was: 0.1112
RBF(C=0.01, gamma=1e-08): Average accuracy was: 0.1112
RBF(C=0.01, gamma=1e-07): Average accuracy was: 0.266
RBF(C=0.01, gamma=1e-06): Average accuracy was: 0.1112
RBF(C=0.01, gamma=1e-05): Average accuracy was: 0.1112
RBF(C=0.01, gamma=0.0001): Average accuracy was: 0.1112
RBF(C=0.01, gamma=0.001): Average accuracy was: 0.1112
RBF(C=0.01, gamma=0.01): Average accuracy was: 0.1112
RBF(C=0.01, gamma=0.1): Average accuracy was: 0.1112
RBF(C=0.01, gamma=1.0): Average accuracy was: 0.1112
RBF(C=0.01, gamma=10.0): Average accuracy was: 0.1112
RBF(C=0.01, gamma=100.0): Average accuracy was: 0.1112
RBF(C=0.01, gamma=1000.0): Average accuracy was: 0.1112
RBF(C=0.01, gamma=10000.0): Average accuracy was: 0.1112
RBF(C=0.01, gamma=100000.0): Average accuracy was: 0.1112
RBF(C=0.1, gamma=1e-09): Average accuracy was: 0.1112
RBF(C=0.1, gamma=1e-08): Average accuracy was: 0.5436
RBF(C=0.1, gamma=1e-07): Average accuracy was: 0.892
RBF(C=0.1, gam

In [64]:
calculateAccuracy(xtest, ytest, 'linear', linearSVM)
calculateAccuracy(xtest, ytest, 'rbf', rbfSVM)

Accuracy for 'linear':0.902
Accuracy for 'rbf':0.944
