In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import validation_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

df = pd.read_csv("/content/sample_data/mnist_train_small.csv")

print(df.shape)

(19999, 785)


In [None]:
df=df.dropna()

In [None]:
df.head()

Unnamed: 0,6,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
X = df.drop('6', axis=1)  # Features
X = X/255.0
X = scale(X)
y = df['6']  # Labels

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, random_state=42)
print("X-Train Shape:", X_train.shape)
print("X-Test Shape:", X_test.shape)

X-Train Shape: (7999, 784)
X-Test Shape: (12000, 784)


In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [0.01, 0.1, 1]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,2.944425,0.134596,1.259854,0.158785,0.01,{'C': 0.01},0.91375,0.924375,0.923125,0.923125,...,0.92049,0.004006,1,0.981247,0.980153,0.979372,0.981091,0.980781,0.980529,0.000689
1,2.949754,0.241951,1.139543,0.106891,0.1,{'C': 0.1},0.90375,0.914375,0.9175,0.914375,...,0.910738,0.005843,2,0.999219,0.999219,0.999062,0.999531,0.999531,0.999312,0.000188
2,2.935729,0.254531,1.069526,0.071035,1.0,{'C': 1},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,3,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9204897592245151 corresponding to hyperparameters {'C': 0.01}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9251666666666667 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [5, 10, 15]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,2.918667,0.374325,1.120252,0.099271,5,{'C': 5},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,2.843867,0.252423,1.09812,0.111896,10,{'C': 10},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,2.897379,0.274699,1.119168,0.107616,15,{'C': 15},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9099880393996248 corresponding to hyperparameters {'C': 5}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9074166666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [100,200, 500]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,3.044639,0.480911,1.118822,0.155979,100,{'C': 100},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,2.830505,0.267984,1.140487,0.14303,200,{'C': 200},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,2.828633,0.330108,1.106408,0.120959,500,{'C': 500},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9099880393996248 corresponding to hyperparameters {'C': 100}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9074166666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [400,600, 800]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,2.837811,0.234588,1.139403,0.102503,400,{'C': 400},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,2.8161,0.285419,1.035118,0.112762,600,{'C': 600},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,2.843722,0.286988,1.070234,0.095863,800,{'C': 800},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9099880393996248 corresponding to hyperparameters {'C': 400}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9074166666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [1000,2000, 4000]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,2.829519,0.311222,1.140221,0.115115,1000,{'C': 1000},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,2.760602,0.326189,1.134456,0.107514,2000,{'C': 2000},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,2.711688,0.217591,1.232229,0.206062,4000,{'C': 4000},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9099880393996248 corresponding to hyperparameters {'C': 1000}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9074166666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)

hyper_params = [{'C': [4500,6000, 8000]}]

model_linear = SVC(kernel='linear')

model_cv = GridSearchCV(estimator = model_linear,
                        param_grid = hyper_params,
                        scoring= 'accuracy',
                        cv = folds,
                        verbose = 1,
                        return_train_score=True)

model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,2.701103,0.243007,1.059587,0.077552,4500,{'C': 4500},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,2.769645,0.302249,1.049248,0.069454,6000,{'C': 6000},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,2.742543,0.300159,1.051422,0.081876,8000,{'C': 8000},0.904375,0.9125,0.915,0.91375,...,0.909988,0.004675,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9099880393996248 corresponding to hyperparameters {'C': 4500}


In [None]:
model = SVC(C=best_hyperparams['C'], kernel="linear")

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# metrics
print("accuracy", accuracy_score(y_test, y_pred), "\n")

accuracy 0.9074166666666666 



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import validation_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

df = pd.read_csv("/content/sample_data/mnist_train_small.csv")

print(df.shape)

(19999, 785)


In [None]:
df=df.dropna()

In [None]:
X = df.drop('6', axis=1)  # Features
X = X/255.0
X = scale(X)
y = df['6']  # Labels

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, random_state=42)
print("X-Train Shape:", X_train.shape)
print("X-Test Shape:", X_test.shape)

X-Train Shape: (7999, 784)
X-Test Shape: (12000, 784)


In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [0.01, 0.1, 1], 'degree': [2]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,22.312552,0.619519,7.278246,0.439037,0.01,2,"{'C': 0.01, 'degree': 2}",0.179375,0.17625,0.15875,...,0.174273,0.007896,3,0.181278,0.18456,0.179559,0.182841,0.186562,0.18296,0.002447
1,17.101157,0.279573,6.089108,0.818017,0.1,2,"{'C': 0.1, 'degree': 2}",0.781875,0.799375,0.7725,...,0.784849,0.01034,2,0.812314,0.812158,0.80872,0.802782,0.824688,0.812132,0.007166
2,8.693902,0.506137,3.368426,0.408767,1.0,2,"{'C': 1, 'degree': 2}",0.93,0.941875,0.935625,...,0.93349,0.006994,1,0.972652,0.972027,0.969839,0.972027,0.976094,0.972528,0.002023


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9334902282676673 corresponding to hyperparameters {'C': 1, 'degree': 2}


In [None]:
# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")

accuracy 0.9074166666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [5, 10, 15], 'degree': [2]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,7.240135,0.756084,2.935151,0.287448,5,2,"{'C': 5, 'degree': 2}",0.9375,0.955625,0.953125,...,0.945366,0.009422,2,0.996718,0.995624,0.996249,0.995781,0.996875,0.99625,0.000494
1,6.597904,0.540296,2.566984,0.273588,10,2,"{'C': 10, 'degree': 2}",0.93875,0.95375,0.9525,...,0.945742,0.00855,1,0.99875,0.998906,0.998594,0.998906,0.99875,0.998781,0.000117
2,7.014677,0.397087,2.752012,0.230649,15,2,"{'C': 15, 'degree': 2}",0.940625,0.951875,0.95125,...,0.944991,0.009244,3,0.999687,0.999531,0.999375,0.999844,0.999844,0.999656,0.000182


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9457415572232646 corresponding to hyperparameters {'C': 10, 'degree': 2}


In [None]:
# Extract the best hyperparameters from the GridSearchCV results
best_hyperparams_poly = model_cv_poly.best_params_

# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")


Accuracy for polynomial kernel: 0.9521666666666667 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [5, 10, 15], 'degree': [3]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,10.242707,0.201583,3.093144,0.446929,5,3,"{'C': 5, 'degree': 3}",0.935,0.94125,0.935,...,0.933491,0.004973,3,0.987654,0.987967,0.986248,0.988123,0.990313,0.988061,0.001307
1,9.609623,0.571217,2.990239,0.249403,10,3,"{'C': 10, 'degree': 3}",0.943125,0.95625,0.94875,...,0.944492,0.007505,2,0.996718,0.995624,0.994999,0.995937,0.997031,0.996062,0.000736
2,9.286613,0.462846,3.037809,0.343138,15,3,"{'C': 15, 'degree': 3}",0.946875,0.954375,0.951875,...,0.946617,0.006052,1,0.997968,0.998125,0.997968,0.997812,0.998437,0.998062,0.000212


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.9466172607879926 corresponding to hyperparameters {'C': 15, 'degree': 3}


In [None]:
# Extract the best hyperparameters from the GridSearchCV results
best_hyperparams_poly = model_cv_poly.best_params_

# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")


Accuracy for polynomial kernel: 0.9516666666666667 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [0.01, 0.1, 1], 'degree': [3]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,23.315842,1.451207,6.778617,0.806582,0.01,3,"{'C': 0.01, 'degree': 3}",0.161875,0.165,0.140625,...,0.15702,0.008553,3,0.170027,0.17112,0.169089,0.171902,0.176094,0.171646,0.00242
1,19.812444,0.345981,6.041945,0.479607,0.1,3,"{'C': 0.1, 'degree': 3}",0.38375,0.380625,0.358125,...,0.3793,0.014999,2,0.413346,0.418503,0.404751,0.41444,0.436406,0.417489,0.010465
2,13.253958,0.405967,4.610192,0.242891,1.0,3,"{'C': 1, 'degree': 3}",0.865625,0.884375,0.85625,...,0.866359,0.010086,1,0.921707,0.923426,0.910611,0.919987,0.931406,0.921427,0.006674


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.8663585053158224 corresponding to hyperparameters {'C': 1, 'degree': 3}


In [None]:
# Extract the best hyperparameters from the GridSearchCV results
best_hyperparams_poly = model_cv_poly.best_params_

# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")


Accuracy for polynomial kernel: 0.8909166666666667 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [5, 10, 15], 'degree': [4]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,17.793588,1.007547,4.780755,0.161418,5,4,"{'C': 5, 'degree': 4}",0.843125,0.85375,0.818125,...,0.838605,0.011938,3,0.929051,0.930458,0.9178,0.92702,0.937187,0.928303,0.006263
1,16.181683,0.762745,4.642398,0.284914,10,4,"{'C': 10, 'degree': 4}",0.87125,0.88375,0.864375,...,0.875235,0.006865,2,0.967807,0.967495,0.960463,0.967964,0.972969,0.967339,0.00399
2,17.188887,0.400467,4.796674,0.145066,15,4,"{'C': 15, 'degree': 4}",0.895625,0.904375,0.87875,...,0.893486,0.008505,1,0.982028,0.982028,0.977496,0.983747,0.984688,0.981998,0.002472


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.8934863977485928 corresponding to hyperparameters {'C': 15, 'degree': 4}


In [None]:
# Extract the best hyperparameters from the GridSearchCV results
best_hyperparams_poly = model_cv_poly.best_params_

# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")


Accuracy for polynomial kernel: 0.9046666666666666 



In [None]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
hyper_params = [{'C': [0.01, 0.1, 1], 'degree': [4]}]

# Change the kernel to 'poly' and remove 'gamma' (it is specific to the RBF kernel)
model_poly = SVC(kernel='poly')

# Use GridSearchCV with the polynomial kernel
model_cv_poly = GridSearchCV(estimator=model_poly,
                             param_grid=hyper_params,
                             scoring='accuracy',
                             cv=folds,
                             verbose=1,
                             return_train_score=True)

model_cv_poly.fit(X_train, y_train)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [None]:
cv_results = pd.DataFrame(model_cv_poly.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,26.039485,0.909119,6.658824,0.455522,0.01,4,"{'C': 0.01, 'degree': 4}",0.156875,0.1575,0.138125,...,0.152144,0.007148,3,0.16987,0.17112,0.170964,0.17112,0.175156,0.171646,0.001816
1,25.889098,1.913361,6.700681,0.337255,0.1,4,"{'C': 0.1, 'degree': 4}",0.26125,0.248125,0.23375,...,0.251158,0.010322,2,0.299578,0.298172,0.288326,0.301297,0.308281,0.299131,0.006423
2,27.417129,3.669768,6.749865,0.481353,1.0,4,"{'C': 1, 'degree': 4}",0.591875,0.594375,0.535625,...,0.588579,0.030174,1,0.693077,0.674949,0.65354,0.685888,0.70375,0.682241,0.017148


In [None]:
best_score = model_cv_poly.best_score_
best_hyperparams = model_cv_poly.best_params_

print("The best train score is {0} corresponding to hyperparameters {1}".format(best_score, best_hyperparams))

The best train score is 0.5885787210756723 corresponding to hyperparameters {'C': 1, 'degree': 4}


In [None]:
# Extract the best hyperparameters from the GridSearchCV results
best_hyperparams_poly = model_cv_poly.best_params_

# Create a model with the best hyperparameters
model_poly_final = SVC(C=best_hyperparams_poly['C'], kernel='poly', degree=best_hyperparams_poly['degree'])

# Fit the model with the training data
model_poly_final.fit(X_train, y_train)

# Make predictions on the test set
y_pred_poly = model_poly_final.predict(X_test)

# Evaluate the model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy for polynomial kernel:", accuracy_poly, "\n")


Accuracy for polynomial kernel: 0.66075 

