In [3]:
import pandas as pd 
import warnings
from sklearn import datasets
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
warnings.simplefilter('ignore')


In [5]:
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [8]:
X = iris.data 
y = iris.target 
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.6, random_state=42)


In [12]:
model = SVC(C=100, kernel='rbf', gamma=10)
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
y_predict


array([1, 2, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 1, 0, 0, 2, 1, 0, 0, 2, 2, 1, 1, 0,
       0, 1, 1, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 1, 0,
       1, 2, 0, 1, 2, 2, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0,
       0, 2])

In [14]:
accuracy = accuracy_score(y_test, y_predict)
accuracy

0.9

GridSearchCV

In [16]:
param_grid_gridsearch = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.01, 0.1, 1, 'auto']
}
model_gridsearch = SVC()
grid_search = GridSearchCV(model_gridsearch,
                           param_grid=param_grid_gridsearch,
                           scoring='accuracy',
                           cv=5)
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100],
                         'gamma': [0.01, 0.1, 1, 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='accuracy')

In [18]:
best_params_grid = grid_search.best_params_
best_params_grid

{'C': 10, 'gamma': 0.01, 'kernel': 'linear'}

In [19]:
best_model_grid = grid_search.best_estimator_
y_pred_grid = best_model_grid.predict(X_test)

accuracy_grid = accuracy_score(y_test, y_pred_grid)
accuracy_grid

0.9777777777777777

#### Hyperparameter Tuning for Multiple Models
Manual Search

In [25]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model1 = SVC(C=0.1, kernel='linear', gamma=0.01)
model1.fit(X_train, y_train)
y1_predict = model1.predict(X_test)
accuracy1 = accuracy_score(y_test, y1_predict)
accuracy1

0.9777777777777777

In [26]:
model2 = RandomForestClassifier(n_estimators=50, max_depth=10, min_samples_split=2)
model2.fit(X_train, y_train)
RandomForestClassifier(max_depth=10, n_estimators=50)
y2_predict = model2.predict(X_test)
accuracy2 = accuracy_score(y_test, y2_predict)
accuracy2

0.9666666666666667

In [27]:
model3 = LogisticRegression(C=0.1, penalty='l1', solver='liblinear')
model3.fit(X_train, y_train)
y3_predict = model3.predict(X_test)
accuracy3 = accuracy_score(y_test, y3_predict)
accuracy3

0.6777777777777778

#### Using GridSearchCV

In [32]:
models = {
    'SVM': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression()
}
param_grid = {
    'SVM': {
        'C': [0.1, 1, 10, 100], 
        'kernel': ['linear', 'rbf', 'poly'], 
        'gamma': [0.01, 0.1, 1, 'auto']},
    'Random Forest': {
        'n_estimators': [10, 50, 100, 200], 
        'max_depth':[None, 10, 20, 30], 
        'min_samples_split': [2, 5, 10]},
    'Logistic Regression': {
        'C': [0.1, 1, 10, 100], 
        'penalty': ['l1', 'l2'], 
        'solver': ['liblinear']
        }
}

best_models = {}

for name, model in models.items():
    grid_search = GridSearchCV(model, 
                               param_grid=param_grid[name],
                               scoring='accuracy',
                               cv=5)
    grid_search.fit(X_train, y_train)
    best_models[name] = grid_search.best_estimator_
    print(f"{name} - Optimal Hyperparameters: {grid_search.best_params_}")

SVM - Optimal Hyperparameters: {'C': 10, 'gamma': 0.01, 'kernel': 'linear'}
Random Forest - Optimal Hyperparameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 10}
Logistic Regression - Optimal Hyperparameters: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}


In [33]:
for name, model in best_models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} - Test Accuracy: {accuracy}")

SVM - Test Accuracy: 0.9777777777777777
Random Forest - Test Accuracy: 0.9666666666666667
Logistic Regression - Test Accuracy: 0.9555555555555556
