In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from sklearn.model_selection import train_test_split

import sklearn.metrics as sklmet
from sklearn import svm 

from sklearn.model_selection import GridSearchCV


from sklearn.metrics import confusion_matrix

from sklearn import preprocessing

raw_df = pd.read_csv('15134.csv')

In [5]:
#function for fitting using rbf kernel

def rbf_fit(X_train,y_train,C_range,gam_range):
    parameters = [{'C': C_range, 'kernel': ['rbf'], 'gamma': gam_range}]
    grid_search = GridSearchCV(estimator = svm.SVC(),
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 4,                            #Using 10 fold cross validation
                           n_jobs = -1)
    grid_search = grid_search.fit(X_train, y_train)

    accuracy = grid_search.best_score_
    return accuracy, grid_search.best_params_

#function for fitting using linear kernel

def lin_fit(X_train,y_train,C_range):
    parameters = [{'C': C_range, 'kernel': ['linear']}]
    grid_search = GridSearchCV(estimator = svm.SVC(),
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,                            #Using 10 fold cross validation
                           n_jobs = -1)
    grid_search = grid_search.fit(X_train, y_train)

    accuracy = grid_search.best_score_ 
    return accuracy, grid_search.best_params_

#function for producing confusion matrix for linear kernel

def conf_lin(X_train,y_train,X_test,y_test,C_,z):
    classifier = svm.SVC(kernel='linear', C=C_)
    classifier.fit(X_train,y_train)
    y_predict = classifier.predict(X_test)
    acc = sklmet.classification_report(y_test,y_predict)
    return f, print(acc)

#function for producing confusion matrix for rbf kernel

def conf_rbf(X_train,y_train,X_test,y_test,C_,g,z):
    classifier = svm.SVC(kernel='rbf',gamma=g, C=C_)
    classifier.fit(X_train,y_train)
    y_predict = classifier.predict(X_test)
    mat = confusion_matrix(y_test, y_predict)
    f = sns.heatmap(mat.T, annot=True, fmt='d', cbar=True )
    plt.xlabel('true label')
    plt.ylabel('predicted label');

    fig = f.get_figure()

    fig.savefig(z+".jpg", dpi=300)
    
    
    acc = sklmet.classification_report(y_test,y_predict)
    return f, print(acc)
 
def lin_pred(X_train,y_train,x_predict,C_):
    classifier = svm.SVC(kernel='linear', C=C_)
    classifier.fit(X_train,y_train)
    y_predict = classifier.predict(x_predict)
    
    return y_predict

#function for producing confusion matrix for rbf kernel

def rbf_pred(X_train,y_train,X_predict,C_,g):
    classifier = svm.SVC(kernel='rbf',gamma=g, C=C_)
    classifier.fit(X_train,y_train)
    y_predict = classifier.predict(X_predict)
    
    return y_predict
    

In [6]:
train_df = pd.read_csv('train_set.csv')
test_df = pd.read_csv('test_set.csv')

In [7]:
feature_df = train_df[['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11',
       'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21',
       'f22', 'f23', 'f24', 'f25']]

X_unnormalized = np.asarray(feature_df)

y = np.asarray(train_df['label'])

min_max_scaler = preprocessing.MinMaxScaler()

X = min_max_scaler.fit_transform(X_unnormalized)
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 4)

In [8]:
feature_df = train_df[['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11',
       'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21',
       'f22', 'f23', 'f24', 'f25']]

X_unnormalized = np.asarray(feature_df)

y = np.asarray(train_df['label'])

min_max_scaler = preprocessing.MinMaxScaler()

X = min_max_scaler.fit_transform(X_unnormalized)
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 4)

In [9]:
pred_feature_df = test_df[['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11',
       'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21',
       'f22', 'f23', 'f24', 'f25']]

X_predict_unnormalized = np.asarray(pred_feature_df)

X_predict = min_max_scaler.fit_transform(X_predict_unnormalized)

#### __Linear kernel__

In [None]:
lin_fit(X_train,y_train,np.linspace(0.0001,2.3,50)) 

In [None]:
conf_lin(X_train,y_train,X_test,y_test,1.7367591836734693,"ukda_25fe_lin")

In [None]:
y_predict_lin25fe = lin_pred(X_train,y_train,X_predict,1.7367591836734693)

In [None]:
rbf_fit(X_train,y_train,np.linspace(2,5,10),np.linspace(2,5,10))

In [None]:
conf_rbf(X_train,y_train,X_test,y_test,3.333333333333333,3.6666666666666665,"ukda_25fe_rbf")