# SVC Model using GridSearch

In [1]:
# Retrieve variables from other jupyter notebook
%store -r X_train_scaled
%store -r X_test_scaled
%store -r y_train
%store -r y_test

In [2]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in /Users/annabelcheong/opt/anaconda3/lib/python3.8/site-packages (0.0)


In [3]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
import pandas as pd

In [5]:
# Create the SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [6]:
print(model.score(X_train_scaled, y_train))
print(model.score(X_test_scaled, y_test))

0.846271218767881
0.8386727688787186


In [7]:
# Create the GridSearchCV model
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV

# Initial parameter values
# param_grid = {'C': [1, 5, 10, 50],
#               'gamma': [0.0001, 0.0005, 0.001, 0.005]}

# Refinement of values for best accuracy
param_grid = {'C': [350],
              'gamma': [0.0007]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [8]:
# Train the model with GridSearch
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END ............................C=350, gamma=0.0007; total time=   1.3s
[CV 2/5] END ............................C=350, gamma=0.0007; total time=   0.8s
[CV 3/5] END ............................C=350, gamma=0.0007; total time=   1.4s
[CV 4/5] END ............................C=350, gamma=0.0007; total time=   0.7s
[CV 5/5] END ............................C=350, gamma=0.0007; total time=   1.3s


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [350], 'gamma': [0.0007]}, verbose=3)

In [9]:
# List the best parameters and the best score
print(grid.best_params_)
print(grid.best_score_)

{'C': 350, 'gamma': 0.0007}
0.8861305569098887


In [10]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test_scaled)
predictions

array([2, 0, 2, ..., 1, 0, 1])

In [11]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["0", "1","2"]))

              precision    recall  f1-score   support

           0       0.82      0.69      0.75       411
           1       0.77      0.86      0.81       484
           2       0.98      1.00      0.99       853

    accuracy                           0.89      1748
   macro avg       0.86      0.85      0.85      1748
weighted avg       0.89      0.89      0.88      1748



# Save the Model

## Save original SVC model

In [12]:
# Save the svc model 
import joblib
filename = 'model3.sav'

# Print file name
joblib.dump(model, filename)

['model3.sav']

In [13]:
# Test loading model back in to ensure it saved properly
loaded_model = joblib.load('model3.sav')
result = loaded_model.score(X_test_scaled, y_test)
print(result)

0.8386727688787186


## Save improved GridSearchCV model

In [14]:
# Save the GridSearchCV model (with higher accuracy)
filename = 'model3_improved.sav'

# Print file name
joblib.dump(grid, filename)

['model3_improved.sav']

In [15]:
# Test loading model back in to ensure it saved properly
loaded_model = joblib.load('model3_improved.sav')
result = loaded_model.score(X_test_scaled, y_test)
print(result)

0.8850114416475973
