In [21]:
from ISLP import load_data
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [22]:
# (a) Load the data 
Weekly = load_data('Weekly')

In [23]:
#Call Methods
log_reg = LogisticRegression()
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
nb = GaussianNB()

In [24]:
# (j) Experiment with different combinations of predictors for each method
from itertools import combinations

best_accuracy = 0
best_cm = None
best_model = None
best_predictors = None

# Experimenting with different combinations of predictors
for num_predictors in range(1, len(Weekly.columns) - 2):
    for predictors in combinations(Weekly.columns[1:-2], num_predictors):
        X_train = train_data[list(predictors)]
        X_test = test_data[list(predictors)]

        # Logistic Regression
        log_reg.fit(X_train, y_train)
        y_pred_test = log_reg.predict(X_test)
        accuracy_test = accuracy_score(y_test, y_pred_test)
        if accuracy_test > best_accuracy:
            best_accuracy = accuracy_test
            best_cm = confusion_matrix(y_test, y_pred_test)
            best_model = "Logistic Regression"
            best_predictors = predictors

        # LDA
        lda.fit(X_train, y_train)
        y_pred_test_lda = lda.predict(X_test)
        accuracy_test_lda = accuracy_score(y_test, y_pred_test_lda)
        if accuracy_test_lda > best_accuracy:
            best_accuracy = accuracy_test_lda
            best_cm = confusion_matrix(y_test, y_pred_test_lda)
            best_model = "LDA"
            best_predictors = predictors

        # QDA
        qda.fit(X_train, y_train)
        y_pred_test_qda = qda.predict(X_test)
        accuracy_test_qda = accuracy_score(y_test, y_pred_test_qda)
        if accuracy_test_qda > best_accuracy:
            best_accuracy = accuracy_test_qda
            best_cm = confusion_matrix(y_test, y_pred_test_qda)
            best_model = "QDA"
            best_predictors = predictors

        # KNN with K=1
        for k in range(1, 11):  # Trying K values from 1 to 10
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred_test_knn = knn.predict(X_test)
            accuracy_test_knn = accuracy_score(y_test, y_pred_test_knn)
            if accuracy_test_knn > best_accuracy:
                best_accuracy = accuracy_test_knn
                best_cm = confusion_matrix(y_test, y_pred_test_knn)
                best_model = "KNN"+k
                best_predictors = predictors

        # Naive Bayes
        nb.fit(X_train, y_train)
        y_pred_test_nb = nb.predict(X_test)
        accuracy_test_nb = accuracy_score(y_test, y_pred_test_nb)
        if accuracy_test_nb > best_accuracy:
            best_accuracy = accuracy_test_nb
            best_cm = confusion_matrix(y_test, y_pred_test_nb)
            best_model = "Naive Bayes"
            best_predictors = predictors

print("Best model:", best_model)
print("Best predictors:", best_predictors)
print("Best Accuracy:", best_accuracy)
print("Best Confusion Matrix:")
print(best_cm)

Best model: Logistic Regression
Best predictors: ('Lag2',)
Best Accuracy: 0.625
Best Confusion Matrix:
[[ 9 34]
 [ 5 56]]
