In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv("data/lfw_arnie_nonarnie.csv")

X = df.drop('Label', axis=1)
y = df['Label'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

In [None]:

models= {"KNeighborsClassifier" : KNeighborsClassifier(), "LogisticRegression" : LogisticRegression(), "DecisionTreeClassifier" : DecisionTreeClassifier()}

params = {"KNeighborsClassifier" : { "KNeighborsClassifier__n_neighbors" : range(0,10)}, "LogisticRegression" : { "LogisticRegression__C" : [0.01, 0.1, 1, 10]}, "DecisionTreeClassifier": {"DecisionTreeClassifier__max_depth": [2, 5, 10],
           "DecisionTreeClassifier__min_samples_split": [2, 5, 10, 20],
           "DecisionTreeClassifier__random_state": [42]}}

In [None]:
kf = KFold(n_splits=5,random_state=42,shuffle=True)
pipe_accuracies = {}
pipe_params = {}
pipelines ={}

for name,model in models.items():
    pipeline = Pipeline(steps=[("scaler", StandardScaler()), (name, model)])
    gs = GridSearchCV(pipeline, params[name], cv=kf, scoring="accuracy")
    gs.fit(X_train,y_train)
    pipe_accuracies[name] = gs.best_score_
    pipe_params[name] = gs.best_params_
    pipelines[name] = gs
    
best_model_name = max(pipe_accuracies)
best_model_cv_score = max(pipe_accuracies.values())
best_model_info = pipe_params[best_model_name]

print(f"Best Model: {best_model_name}")
print(f"Best Model Parameters: {best_model_info}")
print(f"Best Model CV Score: {best_model_cv_score}")

y_pred = pipelines[best_model_name].predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)