# SVM multiclass model with 'rbf' kernel ???
Parameters of the model:
- $C_{\text{raw}} = 10 000$
- $C_{\text{scaled}} = 250$

In [1]:
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler
from src.model.SVM.SVM_module import *
from src.train.metrics import *

In [2]:
data = ManageData()
# Loading the training features and labels
data.LoadTrainFeatures()

Training features and labels loaded!
Example:
X: [-2.03834213e+02  1.65272430e+02 -2.71203651e+01  3.98504753e+01
  1.52355738e+01  1.52752247e+01 -5.82818985e+00  1.24469271e+01
 -2.53585100e+00  5.68161488e+00  2.86470144e-02  4.47464501e-03
  2.23200000e+03  1.20789442e+03  1.52517064e+03  2.32513772e+01
  2.57816840e+01  2.43512780e+01  2.44582548e+01  2.38234885e+01
  2.04741587e+01  2.52746605e+01  6.54243573e-04  6.98350232e-01
  5.18728181e-01  4.41854394e-01  1.96658298e-01  2.66243911e+00] 
y: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [3]:
# Loading the test features and labels
data.LoadTestFeatures()

Test features and labels loaded!
Example:
X: [ 4.24629608e+02  1.57892838e+02 -6.61561127e+01  3.53172836e+01
 -1.08259525e+01  1.99779091e+01  9.03547859e+00  1.16253910e+01
 -6.59319687e+00  9.90838528e+00  4.58988983e-02  1.11563884e-01
  2.98343434e+03  1.57618474e+03  1.50766778e+03  2.51165358e+01
  1.80690291e+01  1.96382736e+01  1.99261764e+01  1.70384129e+01
  2.10494327e+01  5.28803644e+01  2.43222667e-03  7.72531224e-01
  6.70360129e-01  6.16755698e-01  3.44813168e-01  2.22349119e+00] 
y: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]


## Using non-normalized (raw) features:

In [14]:
# fitting the model
model_raw = multilabelSVM(C=1e4, kernel='rbf')
model_raw.fit(X=data.X_train, y=data.y_train)

Training completed successfully!


In [15]:
# save the model to disk
filename = '.\src\model\SVM\saved_models\SVM_model_rbf_raw.sav'
pickle.dump(model_raw, open(filename, 'wb'))

In [16]:
# first few predictions of the model
prediction_raw = model_raw.predict(X=data.X_test)
prediction_raw[:10]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

### Calculating various metrics for the given model:

In [17]:
metrics_raw = get_metrics(y_pred=torch.from_numpy(prediction_raw), y_true=torch.from_numpy(data.y_test), return_per_instrument=True)
metrics_raw = pd.Series(metrics_raw)
metrics_raw.head()

accuracy            tensor(0.8404)
hamming_distance    tensor(0.1596)
f1                  tensor(0.0176)
precision           tensor(0.0758)
recall              tensor(0.0099)
dtype: object

## Using scaled (normalized) features:

In [18]:
# Scaling the data
X_train_scaled = MinMaxScaler().fit_transform(data.X_train)
X_test_scaled = MinMaxScaler().fit_transform(data.X_test)

print('Scaled X_train example:\n', X_train_scaled[0], '\nScaled X_test example:\n', X_test_scaled[0])

Scaled X_train example:
 [0.23957868 0.58511031 0.54715048 0.57268476 0.7364767  0.53690207
 0.53250999 0.63510213 0.57247042 0.50040833 0.12563909 0.01155732
 0.20787152 0.18946834 0.35583313 0.56409949 0.64934186 0.57493308
 0.44173564 0.53803514 0.53268812 0.32445488 0.0044112  0.42691092
 0.46760361 0.33671273 0.12232241 0.10313562] 
Scaled X_test example:
 [0.75514135 0.57322875 0.19222452 0.55274369 0.47494303 0.70844048
 0.61235926 0.60762031 0.4747611  0.53467113 0.36493338 0.35864436
 0.29861641 0.27876588 0.30596512 0.50368287 0.24704974 0.32195861
 0.27762333 0.15087662 0.28380524 0.94714733 0.02254196 0.54175792
 0.77086355 0.58997553 0.31938742 0.04619178]


In [19]:
# fitting the model
model_norm = multilabelSVM(C=2.5e2, kernel='rbf')
model_norm.fit(X=X_train_scaled, y=data.y_train)

Training completed successfully!


In [20]:
# save the model to disk
filename = '.\src\model\SVM\saved_models\SVM_model_rbf_norm.sav'
pickle.dump(model_norm, open(filename, 'wb'))

In [21]:
# first few predictions of the model
prediction_norm = model_norm.predict(X=X_test_scaled)
prediction_norm[:10]

array([[0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

### Calculating various metrics for the given model:

In [22]:
metrics_norm = get_metrics(y_pred=torch.from_numpy(prediction_norm), y_true=torch.from_numpy(data.y_test), return_per_instrument=True)
metrics_norm = pd.Series(metrics_norm)
metrics_norm.head()

accuracy            tensor(0.7895)
hamming_distance    tensor(0.2105)
f1                  tensor(0.1948)
precision           tensor(0.3203)
recall              tensor(0.1960)
dtype: object