In [1]:
import autorootcwd

In [7]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from src.data import heart_disease, titanic, artificial
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# disable warnings
import warnings

warnings.filterwarnings("ignore")


In [8]:
# prepare the data
X_heart, y_heart = heart_disease.prepare_data()
X_titanic, y_titanic = titanic.prepare_data()
X_artificial, y_artificial, true_weights = artificial.generate_data(
    num_samples=1000, num_features=5, interaction_pairs=None
)

# split the data into train and test sets
X_train_heart, X_test_heart, y_train_heart, y_test_heart = train_test_split(
    X_heart,
    y_heart,
    test_size=0.2,
    random_state=42,
)
X_train_titanic, X_test_titanic, y_train_titanic, y_test_titanic = train_test_split(
    X_titanic,
    y_titanic,
    test_size=0.2,
    random_state=42,
)
X_train_artificial, X_test_artificial, y_train_artificial, y_test_artificial = train_test_split(
    X_artificial,
    y_artificial,
    test_size=0.2,
    random_state=42,
)

# ravel
y_train_heart = y_train_heart.ravel()
y_test_heart = y_test_heart.ravel()
y_train_titanic = y_train_titanic.ravel()
y_test_titanic = y_test_titanic.ravel()
y_train_artificial = y_train_artificial.ravel()
y_test_artificial = y_test_artificial.ravel()


In [9]:
# Create classifiers
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
knn = KNeighborsClassifier()


In [11]:
# Define a function to evaluate the classifiers
def evaluate_classifiers(X_train, X_test, y_train, y_test):
    # Fit classifiers on the training data
    lda.fit(X_train, y_train)
    qda.fit(X_train, y_train)
    knn.fit(X_train, y_train)

    # Evaluate classifiers on the test data
    lda_scores = [
        accuracy_score(y_test, lda.predict(X_test)),
        precision_score(y_test, lda.predict(X_test)),
        recall_score(y_test, lda.predict(X_test)),
        f1_score(y_test, lda.predict(X_test)),
        roc_auc_score(y_test, lda.predict_proba(X_test)[:, 1]),
    ]

    qda_scores = [
        accuracy_score(y_test, qda.predict(X_test)),
        precision_score(y_test, qda.predict(X_test)),
        recall_score(y_test, qda.predict(X_test)),
        f1_score(y_test, qda.predict(X_test)),
        roc_auc_score(y_test, qda.predict_proba(X_test)[:, 1]),
    ]

    knn_scores = [
        accuracy_score(y_test, knn.predict(X_test)),
        precision_score(y_test, knn.predict(X_test)),
        recall_score(y_test, knn.predict(X_test)),
        f1_score(y_test, knn.predict(X_test)),
        roc_auc_score(y_test, knn.predict_proba(X_test)[:, 1]),
    ]

    # Return the scores
    return pd.DataFrame(
        [lda_scores, qda_scores, knn_scores],
        columns=["Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC"],
        index=["LDA", "QDA", "KNN"],
    )


# Evaluate the classifiers on all datasets
heart_scores = evaluate_classifiers(X_train_heart, X_test_heart, y_train_heart, y_test_heart)
titanic_scores = evaluate_classifiers(X_train_titanic, X_test_titanic, y_train_titanic, y_test_titanic)
artificial_scores = evaluate_classifiers(X_train_artificial, X_test_artificial, y_train_artificial, y_test_artificial)

# Plot the scores
fig, axes = plt.subplots(3, 1, figsize=(12, 12))
sns.heatmap(heart_scores, annot=True, ax=axes[0])
sns.heatmap(titanic_scores, annot=True, ax=axes[1])
sns.heatmap(artificial_scores, annot=True, ax=axes[2])
axes[0].set_title("Heart Disease")
axes[1].set_title("Titanic")
axes[2].set_title("Artificial")
plt.show()

TypeError: fit() got an unexpected keyword argument 'random_state'