# Grid Search

In [None]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from ml import *

In [None]:
log_reg_param_grid = [
    {'solver': ['liblinear', 'saga'], 'penalty': ['l1', 'l2'], 'C': np.logspace(-4, 4, 10),
     'max_iter': [100, 200, 300]},
    {'solver': ['newton-cg', 'lbfgs', 'sag'], 'penalty': ['l2'], 'C': np.logspace(-4, 4, 10),
     'max_iter': [100, 200, 300]},
    {'solver': ['saga'], 'penalty': ['elasticnet'], 'C': np.logspace(-4, 4, 10), 'max_iter': [100, 200, 300],
     'l1_ratio': np.linspace(0, 1, 10)},
    {'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'penalty': ['none'], 'max_iter': [100, 200, 300]}
    ]

dec_tree_param_grid = [
        {'criterion': ['entropy'], 'max_depth': [None, 10, 20, 30]}
    ]

knn_param_grid = [
        {'n_neighbors': [3, 5, 7, 10]}
    ]

nn_param_grid = [
        {'solver': ['sgd', 'adam'], 'activation': ['tanh', 'relu'],
         'hidden_layer_sizes': [(50,), (100,), (50, 50)], 'learning_rate_init': [0.001, 0.01, 0.1],
         'max_iter': [200, 300, 400]},
        {'solver': ['lbfgs'], 'activation': ['tanh', 'relu'],
         'hidden_layer_sizes': [(50,), (100,), (50, 50)], 'max_iter': [200, 300, 400]}
    ]

In [None]:
X_train = None
X_test = None
y_train = None
y_test = None

In [None]:
def print_params(params):
    for param, value in params.items():
        print(f"  - {param}: {value}")

## Wine Quality Dataset

In [None]:
# Preprocess dataset
wineDF = pd.read_csv("./data/winequality-red.csv")

wineDF['label'] = wineDF['quality'].apply(lambda x: 'good' if x > 6 else 'bad')
wineDF.drop('quality', axis=1)

wine_y = wineDF['label'].values
wine_X = wineDF.drop('label', axis=1).values

In [None]:
# Standardize dataset
scaler = StandardScaler()
wine_X = scaler.fit_transform(wine_X)

In [None]:
(X_train, X_test, y_train, y_test) = train_test_split(wine_X, wine_y, test_size=0.2, stratify=wine_y)

print("Logistic Regression:")
print_params(hyperparam_sweep(X_train, y_train, LogisticRegression, log_reg_param_grid))
print("\nDecision Tree:")
print_params(hyperparam_sweep(X_train, y_train, DecisionTreeClassifier, dec_tree_param_grid))
print("\nK-Nearest Neighbors:")
print_params(hyperparam_sweep(X_train, y_train, KNeighborsClassifier, knn_param_grid))
print("\nNeural Network:")
print_params(hyperparam_sweep(X_train, y_train, MLPClassifier, nn_param_grid))


## Heart Disease Dataset

In [None]:
# Preprocess dataset
heartDF = pd.read_csv("./data/heart_statlog_cleveland_hungary_final.csv")

heartDF['target'] = heartDF['target'].apply(lambda x: 'heart disease' if x else 'normal')
heart_y = heartDF['target'].values
heart_X = heartDF.drop('target', axis=1).values

In [None]:
# Standardize dataset
scaler = StandardScaler()
heart_X = scaler.fit_transform(heart_X)

In [None]:
(X_train, X_test, y_train, y_test) = train_test_split(heart_X, heart_y, test_size=0.2, stratify=heart_y)

print("Logistic Regression:")
print_params(hyperparam_sweep(X_train, y_train, LogisticRegression, log_reg_param_grid))
print("\nDecision Tree:")
print_params(hyperparam_sweep(X_train, y_train, DecisionTreeClassifier, dec_tree_param_grid))
print("\nK-Nearest Neighbors:")
print_params(hyperparam_sweep(X_train, y_train, KNeighborsClassifier, knn_param_grid))
print("\nNeural Network:")
print_params(hyperparam_sweep(X_train, y_train, MLPClassifier, nn_param_grid))