# Libraries and dataset import

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error

In [None]:
df_train = pd.read_csv('monks_3_train', sep=' ', header=None, skipinitialspace=True)
df_train.head(10)

In [None]:
df_train.drop(columns=7, inplace=True)

In [None]:
df_test = pd.read_csv('monks_3_test', sep=' ', header=None, skipinitialspace=True)
df_test.drop(columns=7, inplace=True)

In [None]:
df_train.dtypes

In [None]:
df_train = pd.get_dummies(data=df_train, columns=range(1,7))

In [None]:
X_train = df_train.iloc[:, 1:18]

In [None]:
X_train.shape

In [None]:
X_train = X_train.to_numpy()

In [None]:
X_train # data

In [None]:
y_train = df_train.iloc[:, 0]

In [None]:
y_train = y_train.to_numpy()
y_train # labels

In [None]:
df_test = pd.get_dummies(data=df_test, columns=range(1,7))

In [None]:
X_test = df_test.iloc[:, 1:18]
X_test = X_test.to_numpy()

In [None]:
y_test = df_test.iloc[:, 0]
y_test = y_test.to_numpy()

# Model Selection

In [None]:
param_grid = [{
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'C' : [2**i for i in range(-3, 5)],
    'gamma' : [0.01, 0.1, 2, 4, 8]
},
{
    'kernel' : ['poly'],
    'C' : [2**i for i in range(-3, 5)],
    'degree': [1, 2, 3, 4, 5],
    'gamma' : [0.01, 0.1, 0.2, 0.5, 2, 4, 8]
}]

In [None]:
grid = GridSearchCV(
    SVC(),
    param_grid=param_grid,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    )

grid.fit(X_train, y_train)
print(grid.best_estimator_, grid.best_score_)

In [None]:
clf = grid.best_estimator_
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred_train = grid.best_estimator_.predict(X_train)
mean_squared_error(y_train, y_pred_train)

In [None]:
y_pred_test = grid.best_estimator_.predict(X_test)
mean_squared_error(y_test, y_pred_test)