## Using GridSearchCV in order to fine tune the model's hyperparameters for the iris dataset

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
svc = SVC()

# Define the parameter grid to search over
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive', 'invscaling', 'learning_rate_init', 'power_t', 'beta_1'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': [2000, 5000, 10000]
}

# Create GridSearchCV with the classifier, parameter grid, and cross-validation
grid_search = GridSearchCV(svc, param_grid, cv=3, scoring='accuracy', verbose=1)

# Perform the grid search on the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found
print("Best Hyperparameters for the iris dataset:", grid_search.best_params_)

# Evaluate the best model on the test data
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy of the model for the best hyperparameters:", accuracy)
print("Classification Report of the model for the best hyperparameters:\n", classification_report(y_test, y_pred))

## Using GridSearchCV in order to fine tune the model's hyperparameters for the breast cancer dataset

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the Breast Cancer dataset
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
svc = SVC()

# Define the parameter grid to search over
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive', 'invscaling', 'learning_rate_init', 'power_t'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': [2000, 5000, 10000]
}

# Create GridSearchCV with the classifier, parameter grid, and cross-validation
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=1)

# Perform the grid search on the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found
print("Best Hyperparameters for the breast cancer dataset:", grid_search.best_params_)

# Evaluate the best model on the test data
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy of the model for the best hyperparameters:", accuracy)
print("Classification Report of the model for the best hyperparameters:\n", classification_report(y_test, y_pred))

## Using GridSearchCV in order to fine tune the model's hyperparameters for the ionosphere dataset

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load the Ionosphere dataset
ionosphere = pd.read_csv('https://raw.githubusercontent.com/hargurjeet/MachineLearning/Ionosphere/ionosphere_data.csv')
ionosphere.rename(columns={'column_ai': 'target'}, inplace=True)

X = ionosphere.drop(['target'], axis=1)
y = ionosphere.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
svc = SVC()

# Define the parameter grid to search over
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive', 'invscaling', 'learning_rate_init', 'power_t'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': [2000, 5000, 10000]
}

# Create GridSearchCV with the classifier, parameter grid, and cross-validation
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=1)

# Perform the grid search on the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found
print("Best Hyperparameters for the ionosphere dataset:", grid_search.best_params_)

# Evaluate the best model on the test data
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy of the model for the best hyperparameters:", accuracy)
print("Classification Report of the model for the best hyperparameters:\n", classification_report(y_test, y_pred))

In [None]:
import seaborn as sns

iris = load_iris()
i = iris.feature_names
iris_Y = iris.target
iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)

breast_cancer = load_breast_cancer()
b = breast_cancer.feature_names
breast_cancer_Y = breast_cancer.target
breast_cancer = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)

ionosphere = pd.read_csv('https://raw.githubusercontent.com/hargurjeet/MachineLearning/Ionosphere/ionosphere_data.csv')
ionosphere.rename(columns={'column_ai': 'target'}, inplace=True)
# print(ionosphere)

iris_X = iris[i]
breast_cancer_X = breast_cancer[b]
ionosphere_X = ionosphere.drop(['target'], axis=1)
ionosphere_Y = ionosphere.target


def svm_accuracy_vs_split_plot(start_split=10, end_split=70, dataset='iris', iterations=500):
    accuracies = []
    split_values = []

    if dataset == 'iris':
        X = iris_X
        y = iris_Y
        for i in range(start_split, end_split + 1, 10):
            split_values.append(i / 100)
            acc = 0
            for j in range(0, iterations):
                train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=None, test_size=i / 100)
                model = SVC()
                model.fit(train_X, train_y)
                pred_y = model.predict(test_X)
                acc += accuracy_score(test_y, pred_y)
            acc /= iterations
            accuracies.append(acc)
    if dataset == 'breast_cancer':
        X = breast_cancer_X
        y = breast_cancer_Y
        for i in range(start_split, end_split + 1, 10):
            split_values.append(i / 100)
            acc = 0
            for j in range(0, iterations):
                train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=None, test_size=i / 100)
                model = SVC()
                model.fit(train_X, train_y)
                pred_y = model.predict(test_X)
                acc += accuracy_score(test_y, pred_y)
            acc /= iterations
            accuracies.append(acc)
    elif dataset == 'ionosphere':
        X = ionosphere_X
        y = ionosphere_Y
        X = breast_cancer_X
        y = breast_cancer_Y
        for i in range(start_split, end_split + 1, 10):
            split_values.append(i / 100)
            acc = 0
            for j in range(0, iterations):
                train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=None, test_size=i / 100)
                model = SVC()
                model.fit(train_X, train_y)
                pred_y = model.predict(test_X)
                acc += accuracy_score(test_y, pred_y)
            acc /= iterations
            accuracies.append(acc)

    print(accuracies)
    print(split_values)

    data = pd.DataFrame(list(zip(accuracies, split_values)), columns=['Accuracies', 'Train-Test Splits'])
    sns.lineplot(data=data, y='Accuracies', x='Train-Test Splits').set_title(
        dataset + ": Accuracy vs Train-Test Split plot with fine tuned MLP model, over " + str(
            iterations) + " iterations on each split")



In [None]:
svm_accuracy_vs_split_plot(dataset='iris')