In [1]:
import pandas as pd

from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import cross_validate, cross_val_predict, cross_val_score

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer

import sys
sys.path.insert(0, '../System')

from classify import get_feature_table, get_feature_groups, train_classify

import pickle

In [2]:
# open a file, where you stored the pickled data
with open("../System/clfs/grid_svc.sav", 'rb') as file:
    grid_clf = pickle.load(file)
    
with open("../System/clfs/standard_linear_svc.sav", 'rb') as file:
    stan_clf = pickle.load(file)

pipeline = Pipeline([
            # ('normalizer', MinMaxScaler(feature_range=(0, 1))),           # Step1 - normalize data
            # ('normalizer', RobustScaler(quantile_range=(25, 75))),        # Step1 - normalize data
            ('normalizer', StandardScaler()),                              # Step1 - normalize data
            ('clf', stan_clf)  # Step2 - classifier                                       # Step2 - classifier
            ])

In [3]:
feats = pd.read_csv("../System/feats_train.csv")
labels = feats["class"]
feats.drop('class', axis=1, inplace=True)

In [4]:
scoring = ['precision_macro', 'recall_macro']
scores = cross_validate(pipeline, feats, labels, scoring=scoring, cv=10)
scores

{'fit_time': array([0.04829073, 0.06598258, 0.03093076, 0.03601027, 0.03440928,
        0.03092527, 0.03231716, 0.03727651, 0.05417919, 0.02736974]),
 'score_time': array([0.00290418, 0.00214362, 0.00202727, 0.00284362, 0.00962186,
        0.00211668, 0.00212407, 0.00203872, 0.00201964, 0.00253892]),
 'test_precision_macro': array([0.8880597 , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.94067797, 0.86956522]),
 'train_precision_macro': array([1.        , 0.99893162, 0.99893162, 0.99893162, 0.99893162,
        0.99893162, 0.99893162, 0.99893162, 0.9978678 , 1.        ]),
 'test_recall_macro': array([0.85576923, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.93269231, 0.82352941]),
 'train_recall_macro': array([1.        , 0.99892934, 0.99892934, 0.99892934, 0.99892934,
        0.99892934, 0.99892934, 0.99892934, 0.99785867, 1.        ])}

In [5]:
predictions = train_classify(['feats'], stan_clf)

Precision: 0.6757275429872434
Recall: 0.7186897880539499
Accuracy: 0.7360308285163777
F1-Score: 0.7313725490196079


In [6]:
predictions = train_classify(['feats'], grid_clf)

Precision: 0.6770990306110974
Recall: 0.7341040462427746
Accuracy: 0.7389210019267822
F1-Score: 0.7376573088092934


In [7]:
grid_clf

SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
stan_clf

SVC(C=3.698481284123085, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.20300389999423507,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [9]:
SVC(kernel='linear')

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)