#   Grid Search and Cross-validation

## Import libraries

In [42]:
import numpy as np
import pandas as pd

from google.colab import files
import joblib

## Importing the dataset

In [43]:
training_file_name = 'Geometry and duck backward elimination with Support Vector Classification data' + '.csv'
data = pd.read_csv(training_file_name)

# The file is read with a column of indexes and this line removes it
data = data.drop(data.columns[0], axis=1)

## Choose an equal number of samples from each classes

In [44]:
# Split the data by classes
data_class_1 = data.loc[data['segmented'] == 1]
data_class_0 = data.loc[data['segmented'] == 0]

In [None]:
# Print class lenght
print('class 1 lenght:')
print(data_class_1.shape[0])

print('class 0 lenght:')
print(data_class_0.shape[0])

In [46]:
# Take n samples of each class - n depends on the amount of samples from each class
n_sampels = 6800
random_state = 7

data_class_1 = data_class_1.sample(n=n_sampels, random_state=random_state)
data_class_0 = data_class_0.sample(n=n_sampels, random_state=random_state)

In [47]:
# Connects the classes and mixes the data
data_equal = pd.concat([data_class_1, data_class_0], axis=0)
data_equal = data_equal.sample(frac=1.0, random_state=random_state)

In [48]:
# Remove the segmented image layer
X = data_equal.drop('segmented', axis=1)
y = data_equal['segmented']

## Grid Search and Cross-validation

In [57]:
# Create a model
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [23]:
# Hyperparameters for the Grid Search
C = [0.1, 0.2, 0.3, 0.5, 0.7, 1]
gamma = [0.1, 0.2, 0.3, 0.5, 0.7, 1]

In [None]:
# Applying Grid Search
from sklearn.model_selection import GridSearchCV
parameters = [{'kernel': ['linear'], 'C': C},
              {'kernel': ['rbf', 'poly', 'sigmoid'], 'C': C, 'gamma': gamma}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X, y)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

In [None]:
# Checking values around the values of the first Grid Search

C = [0.5, 0.55, 0.6, 0.7, 0.75, 0.8, 0.85]
gamma = [0.5, 0.55, 0.6, 0.7, 0.75, 0.8, 0.85]
kernel = 'rbf'

parameters = [{'C': C}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X, y)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

In [None]:
# Applying Cross-validation with the best hyperparameters
from sklearn.model_selection import cross_val_score
best_classifier = SVC(kernel = 'rbf', gamma = 0.5, C = 0.5)
accuracies = cross_val_score(estimator = best_classifier, X = X, y = y, cv = 20)

print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

In [None]:
# Applying Cross-validation with the best hyperparameters
from sklearn.model_selection import cross_val_score
best_classifier = LogisticRegression()
accuracies = cross_val_score(estimator = best_classifier, X = X, y = y, cv = 20)

print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

In [None]:
# Train the model with the best hyperparameters
best_classifier.fit(np.array(X),np.array(y))

## Save the model

In [63]:
# Save the trained model
model_name = 'LogisticRegression'
model_name_to_save = model_name + '.pkl'
joblib.dump(best_classifier, model_name_to_save)
files.download(model_name_to_save)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>