In [6]:
#!pip install --user scikit_optimize
#!pip install --user tpot

In [7]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from skopt import BayesSearchCV
from skopt.space import Categorical, Real
from tpot import TPOTClassifier

# Load the data set
raisins = pd.read_csv('Raisin_Dataset.csv')
X = raisins.drop('Class', axis=1)
y = raisins['Class']

# Split the data set into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [8]:
# Create an SVC model
svm = SVC()

# Dictionary of parameters for GridSearchCV
parameters = {'kernel': ['linear', 'rbf', 'sigmoid'], 'C': [1,10,100]}

# Create a GridSearchCV model
grid = GridSearchCV(svm, parameters)

# Fit the GridSearchCV model to the training data
grid.fit(X, y)

# Print the model and hyperparameters obtained by GridSearchCV
print(grid.best_estimator_)

# Print a table summarizing the results of GridSearchCV
df = pd.concat([pd.DataFrame(grid.cv_results_['params']), pd.DataFrame(grid.cv_results_['mean_test_score'], columns=['Score'])], axis=1)
cv_table = df.pivot(index='kernel', columns='C')
print(cv_table)

# Print the accuracy of the final model on the test data
print('GridSearchCV test score: ', grid.score(X_test, y_test))

SVC(C=100, kernel='linear')
            Score                    
C             1         10        100
kernel                               
linear   0.853333  0.852222  0.854444
rbf      0.818889  0.833333  0.852222
sigmoid  0.222222  0.218889  0.217778
GridSearchCV test score:  0.7955555555555556


In [9]:
# Dictionary of parameters for BayesSearchCV
search_spaces = {'kernel': Categorical(['linear', 'rbf', 'sigmoid']), 'C': Real(1, 100, prior='uniform')}

# Create a BayesSearchCV model
bayes = BayesSearchCV(svm, search_spaces, n_iter= 10)

# Fit the BayesSearchCV model to the training data
bayes.fit(X_train, y_train)

# Print the model and hyperparameters obtained by BayesSearchCV
print(bayes.best_estimator_)

# Print the accuracy of the final model on the test data
print('Bayes Search CV test score: ', bayes.score(X_test, y_test))

SVC(C=77.9461057690015, kernel='linear')
Bayes Search CV test score:  0.8044444444444444


In [10]:
# Create a TPOTClassifier model
tpot = TPOTClassifier(generations= 2, population_size= 20)

# Fit the TPOTClassifier model to the training data
tpot.fit(X_train, y_train)

# Print the accuracy of the final model on the test data
print('TPOT test score: ', tpot.score(X_test, y_test))

# Export TPOTClassifier's final model to a separate file
tpot.export('tpot_pipeline.py')

TPOT test score:  0.8044444444444444
