In [1]:
import numpy as np
from sklearn import linear_model
from sklearn import model_selection
from sklearn import metrics
from sklearn import svm
from sklearn import preprocessing
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn import ensemble
from sklearn import tree
from sklearn import neighbors

In [2]:
X = np.load('dataset_x.npy')
Y = np.load('dataset_y.npy')

In [4]:
print(X)

[[-0.0881262   0.03043791  0.05741488 ...  0.01776126  0.05415184
   0.05634119]
 [-0.14593935 -0.02094049 -0.03384292 ...  0.03885734  0.05799268
   0.1301012 ]
 [-0.0948165   0.00470475  0.0088682  ...  0.05119714  0.06376068
   0.08496776]
 ...
 [-0.1526313   0.12161384  0.06752439 ... -0.12221479  0.06272715
   0.02702113]
 [-0.1812481   0.11436729  0.07435165 ... -0.09963466  0.038975
   0.02060723]
 [-0.15870148  0.1204138   0.07571484 ... -0.11005765  0.09271675
   0.00505629]]


In [5]:
print(np.max(X))

0.5336859226226807


In [6]:
print(np.min(X))

-0.46611472964286804


In [7]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_size = 0.33, random_state = 42, stratify = Y)

In [51]:
svm = svm.SVC(probability=True)

In [52]:
gammas = np.linspace(0, 10, num = 11)
Cs = np.linspace(1, 10, 20)
parameters = {'kernel':('linear', 'rbf'), 'C':Cs,'gamma': gammas, 'shrinking':(True,False)}

In [53]:
classifier = GridSearchCV(svm, parameters)

In [54]:
classifier.fit(x_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'kernel': ('linear', 'rbf'), 'C': array([ 1.     ,  1.47368,  1.94737,  2.42105,  2.89474,  3.36842,
        3.84211,  4.31579,  4.78947,  5.26316,  5.73684,  6.21053,
        6.68421,  7.15789,  7.63158,  8.10526,  8.57895,  9.05263,
        9.52632, 10.     ]), 'gamma': array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), 'shrinking': (True, False)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [55]:
params = classifier.best_params_
print(params)

{'C': 1.0, 'gamma': 0.0, 'kernel': 'linear', 'shrinking': True}


In [56]:
svm.set_params(**params)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0, kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [57]:
svm.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0, kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [58]:
accuracy = metrics.accuracy_score(y_test, svm.predict(x_test))

In [59]:
print('Test accuracy: ' + str(accuracy * 100) + "%")

Test accuracy: 100.0%


In [60]:
import pickle

In [61]:
pickle.dump(svm, open('./svm_model', 'wb'))

In [29]:
clf1 = neighbors.KNeighborsClassifier(n_neighbors=5)
clf2 = svm.SVC(probability=True)
clf3 = tree.DecisionTreeClassifier()
clf4 = neighbors.KNeighborsClassifier(n_neighbors=10)
clf5 = tree.DecisionTreeClassifier()


In [36]:
estimators = [
    ('knn5', clf1),
    ('svm', clf2),
    ('tree', clf3),
    ('knn10', clf4),
    ('tree_1', clf4),
]

In [37]:
clf = ensemble.VotingClassifier(estimators=estimators, voting='soft')

In [38]:
clf.fit(x_train, y_train)



VotingClassifier(estimators=[('knn5', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')), ('svm', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scali...      min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'))],
         flatten_transform=None, n_jobs=None, voting='soft', weights=None)

In [39]:
y_pred = clf.predict(x_test)

In [40]:
accuracy = metrics.accuracy_score(y_test, clf.predict(x_test))

In [41]:
print('Test accuracy: ' + str(accuracy * 100) + "%")

Test accuracy: 90.50445103857567%
