In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
from sklearn import model_selection
from sklearn import svm
from sklearn import metrics
from sklearn import datasets
from sklearn import preprocessing

In [None]:
from sklearn import pipeline

In [None]:
data = datasets.load_breast_cancer()

In [None]:
X = data.data

In [None]:
y = data.target

### Validacioni skup

In [None]:
X_train_validation, X_test, y_train_validation, y_test = model_selection.train_test_split(X, y, test_size=0.3, stratify = y, random_state = 25)

In [None]:
X_train, X_validation, y_train, y_validation = model_selection.train_test_split(X_train_validation, y_train_validation, test_size = 0.2, stratify = y_train_validation, random_state = 25)

Pravimo nizove potencijalnih optimalnih vrednosti regularizacionog parametra `C` i sirine kernela `gamma`:

In [None]:
Cs = np.array([10**i for i in range(-5, 5)])
gammas = np.array([10**i for i in range(-3, 3)])

In [None]:
print(Cs, gammas)

In [None]:
scaler_tv = preprocessing.StandardScaler()
scaler_tv.fit(X_train)
X_train = scaler_tv.transform(X_train)
X_validation = scaler_tv.transform(X_validation)

In [None]:
best_score = 0 
best_params = {'C':0, 'gamma': 0}

# za sve kombinacije C i gamma parametara
for C in Cs: 
    for gamma in gammas:
        # treniramo model na skupu za treniranje
        model = svm.SVC(C = C, gamma = gamma)
        model.fit(X_train, y_train)
        # ocenjujemo model na skupu za validaciju
        y_predicted = model.predict(X_validation)
        score = metrics.accuracy_score(y_validation, y_predicted)

        if score > best_score: 
            best_score = score
            best_params['C'] = C
            best_params['gamma'] = gamma

In [None]:
best_score

In [None]:
best_params

In [None]:
scaler_tt = preprocessing.StandardScaler()
scaler_tt.fit(X_train_validation)
X_train_validation = scaler_tt.transform(X_train_validation)
X_test = scaler_tt.transform(X_test)

In [None]:
model = svm.SVC(C=best_params['C'], gamma=best_params['gamma'])

In [None]:
model.fit(X_train_validation, y_train_validation)

In [None]:
y_predicted = model.predict(X_test)

In [None]:
metrics.accuracy_score(y_test, y_predicted)

### Unakrsna validacija za evaluaciju modela sa parametrima

In [None]:
model = svm.SVC(C=best_params['C'], gamma=best_params['gamma'])

In [None]:
scaler = preprocessing.StandardScaler()

In [None]:
svc_pipeline =  pipeline.make_pipeline(scaler, model)

In [None]:
scores = model_selection.cross_val_score(svc_pipeline, X, y, scoring='accuracy', cv = 10)

In [None]:
scores

In [None]:
scores.mean()

### Izbor vrednosti hiperparametra unakrsnom validacijom

In [None]:
grid_model = pipeline.Pipeline(steps=[('scaler', preprocessing.StandardScaler()), ('svm', svm.SVC())])
params = {
    'svm__C': [10**i for i in range(-5, 5)], 
    'svm__gamma': [10**i for i in range(-3, 3)]
}

In [None]:
grid = model_selection.GridSearchCV(grid_model, param_grid=params, scoring='accuracy', cv=10, return_train_score=True)

In [None]:
grid.fit(X_train_validation, y_train_validation)

In [None]:
grid.best_score_

In [None]:
grid.best_params_

### Finalni model

In [None]:
model_best_grid = svm.SVC(C=best_params['C'], gamma=best_params['gamma'])

In [None]:
model_best_grid.fit(X_train_validation, y_train_validation)

In [None]:
y_pred = model_best_grid.predict(X_test)

In [None]:
metrics.accuracy_score(y_test, y_pred) # ovo je vrednost koju prijavljujemo kao meru kvaliteta modela

In [None]:
final_scaler = preprocessing.StandardScaler()
X = final_scaler.fit_transform(X)

In [None]:
final_model = svm.SVC(C=best_params['C'], gamma=best_params['gamma']) # ovo je model koji cuvamo
final_model.fit(X, y) 

In [None]:
import pickle

In [None]:
with open('svm_final_model.model', 'wb')  as pickle_file:
    pickle.dump(final_model, pickle_file)