In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, log_loss

In [2]:
df = pd.read_csv('Satellite.csv', sep=';')
df.head()

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil
2,84,102,102,83,80,102,102,79,84,94,...,87,84,99,104,79,84,99,104,79,grey soil
3,80,102,102,79,84,94,102,79,80,94,...,79,84,99,104,79,84,103,104,79,grey soil
4,84,94,102,79,80,94,98,76,80,102,...,79,84,103,104,79,79,107,109,87,grey soil


In [3]:
le = LabelEncoder()
df['classes'] = le.fit_transform(df['classes'])
df.head()

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,2
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,2
2,84,102,102,83,80,102,102,79,84,94,...,87,84,99,104,79,84,99,104,79,2
3,80,102,102,79,84,94,102,79,80,94,...,79,84,99,104,79,84,103,104,79,2
4,84,94,102,79,80,94,98,76,80,102,...,79,84,103,104,79,79,107,109,87,2


In [5]:
X = df.drop('classes', axis =1)
y = df['classes']

In [6]:
X_train, X_test, y_train,y_test = train_test_split(X,y, test_size=0.3, random_state=24, stratify=y)

In [7]:
# svc = SVC(probability=True, random_state=24, decision_function_shape='ovo')   # rbf = radial basis function
svc = SVC(probability=True, random_state=24, decision_function_shape='ovr')   # rbf = radial basis function
svc.fit(X_train, y_train)

y_pred=svc.predict(X_test)

In [8]:
print(f'Accuracy : {accuracy_score(y_test, y_pred)}')

y_pred_prob = svc.predict_proba(X_test)
print(f'Log loss : {log_loss(y_test, y_pred_prob)}')

Accuracy : 0.8834800621439669
Log loss : 0.2991297114029514


### SVC with GridSearch

In [9]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [13]:
params = {'kernel':['rbf','linear'],
         'C':np.linspace(0.01, 4, 10),
         'gamma':np.linspace(0.01,4,10),
         'decision_function_shape':['ovo', 'ovr']}

svc = SVC(probability=True, random_state=24)
# kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
# gcv = GridSearchCV(svc, param_grid=params, scoring='accuracy', cv=kfold)
rgcv = RandomizedSearchCV(svc, param_distributions=params, verbose=3, scoring='neg_log_loss', random_state=24)
rgcv.fit(X, y)

print(rgcv.best_params_)
print(rgcv.best_score_)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END C=0.8966666666666667, decision_function_shape=ovr, gamma=3.1133333333333333, kernel=linear;, score=-0.364 total time= 1.2min
[CV 2/5] END C=0.8966666666666667, decision_function_shape=ovr, gamma=3.1133333333333333, kernel=linear;, score=-0.380 total time= 1.3min
[CV 3/5] END C=0.8966666666666667, decision_function_shape=ovr, gamma=3.1133333333333333, kernel=linear;, score=-0.389 total time= 1.3min
[CV 4/5] END C=0.8966666666666667, decision_function_shape=ovr, gamma=3.1133333333333333, kernel=linear;, score=-0.412 total time= 1.3min
[CV 5/5] END C=0.8966666666666667, decision_function_shape=ovr, gamma=3.1133333333333333, kernel=linear;, score=-0.386 total time= 1.2min
[CV 1/5] END C=1.7833333333333334, decision_function_shape=ovr, gamma=1.34, kernel=rbf;, score=-1.722 total time=  26.2s
[CV 2/5] END C=1.7833333333333334, decision_function_shape=ovr, gamma=1.34, kernel=rbf;, score=-1.721 total time=  26.2s
[CV 3/5