# SVM Model (Classification)

In [None]:
import non_time_series_utils as utils

import numpy as np

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from sklearn.metrics import log_loss

In [None]:
# Import data instead of generation
df = utils.generate_df()
df['total_points'] = utils.get_classes_from_y(df['total_points'].values)

In [None]:
train_df, test_df = utils.split_df_to_train_test(df, split_rate=0.6)
test_df, val_df = utils.split_df_to_train_test(test_df, split_rate=0.5)

In [None]:
train_X, train_y = utils.split_df_to_X_y(train_df)
val_X, val_y = utils.split_df_to_X_y(val_df)

In [None]:
scaler = StandardScaler().fit(train_X)

In [None]:
best_params = {
    'C': 0,
    'val_loss': 10000,
    'model': None
}

C_list = np.arange(1, 5)

i = 0

for C in C_list:
    i += 1
    print('Iteration {} / {}'.format(i, len(C_list)))
    
    weights = class_weight.compute_class_weight('balanced', np.unique(train_y), train_y)

    model = SVC(C=C, kernel='linear', probability=True, )
    model.fit(scaler.transform(train_X), train_y)

    loss = log_loss(model.predict(scaler.transform(val_X)), val_y)
    if loss < best_params['val_loss']:
        best_params = {
            'C': C,
            'val_loss': loss,
            'model': model
        }
print('Best params: C={} (loss={:.2f})'.format(best_params['C'], best_params['val_loss']))

In [None]:
test_X, test_y = utils.split_df_to_X_y(test_df)

In [None]:
model = best_params['model']
log_loss(model.predict(scaler.transform(test_X)), test_y)