In [3]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from scipy.stats import mannwhitneyu
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler


df = pd.read_csv("DS 340 Final Dataset.csv")


# Separate by Independent and Dependent Variables
X = df.drop(['Team', 'Playoff'], axis=1)
y = df['Playoff']


# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)


# t-test to identify significant variables
playoff_teams = df[df['Playoff'] == 1]
non_playoff_teams = df[df['Playoff'] == 0]


# Calculate and display means for each variable by group
means_comparison = pd.DataFrame({
    'Playoff Teams': playoff_teams.drop(['Team', 'Playoff'], axis=1).mean(),
    'Non-Playoff Teams': non_playoff_teams.drop(['Team', 'Playoff'], axis=1).mean(),
})
means_comparison['Difference'] = means_comparison['Playoff Teams'] - means_comparison['Non-Playoff Teams']

print("\nMeans comparison between playoff and non-playoff teams:")
print(means_comparison)


# Discriminant Analysis Implementation
lda = LinearDiscriminantAnalysis()

X_discrim = df[['OpponentPointsPerGame', 'OwnFieldGoalPercentage',
                'OpponentFieldGoalPercentage', 'OpponentAssistsPerGame']]

# X_discrim = df[significant_variables]     # Part of novelty implementation

X_discrim_scaled = scaler.fit_transform(X_discrim)


# Model Fitting
lda.fit(X_discrim_scaled, y)


# Make Predictions
y_pred_lda = lda.predict(X_discrim_scaled)


# Performance Evaluation by Analyzing the Coefficients
print("\nDiscriminant Analysis Results:")
print("Coefficients:")
for i, coef in enumerate(lda.coef_[0]):

    print(f"{X_discrim.columns[i]}: {coef:.4f}")


# Confusion Matrix
print("\nConfusion Matrix (Discriminant Analysis):")
conf_matrix_lda = confusion_matrix(y, y_pred_lda)
print(conf_matrix_lda)


# Accuracy Calculation using Confusion Matrix
accuracy_lda = (conf_matrix_lda[0,0] + conf_matrix_lda[1,1]) / conf_matrix_lda.sum()
print(f"Accuracy: {accuracy_lda:.4f}")


# Logistic Regression Implementation
X_logit = df[['OwnPointsPerGame', 'OpponentFieldGoalPercentage']]
X_logit_scaled = scaler.fit_transform(X_logit)


# Model Fitting
logit = LogisticRegression(random_state=42)
logit.fit(X_logit_scaled, y)


# Make Predictions
y_pred_logit = logit.predict(X_logit_scaled)


# Performance Evaluation by Analyzing Coefficients
print("\nLogistic Regression Results:")
print("Coefficients:")
for i, coef in enumerate(logit.coef_[0]):
    print(f"{X_logit.columns[i]}: {coef:.4f}")


# Confusion Matrix
print("\nConfusion Matrix (Logistic Regression):")
conf_matrix_logit = confusion_matrix(y, y_pred_logit)
print(conf_matrix_logit)


# Accuracy Calculation using Confusion Matrix
accuracy_logit = (conf_matrix_logit[0,0] + conf_matrix_logit[1,1]) / conf_matrix_logit.sum()
print(f"Accuracy: {accuracy_logit:.4f}")


# Calculating and showing teams that were false positives and false negatives
team_results = pd.DataFrame({
    'Team': df['Team'],
    'Actual': df['Playoff'],
    'LDA_Prediction': y_pred_lda,
    'Logistic_Prediction': y_pred_logit
})

print("\nTeams misclassified by Discriminant Analysis:")
print(team_results[team_results['Actual'] != team_results['LDA_Prediction']])

print("\nTeams misclassified by Logistic Regression:")
print(team_results[team_results['Actual'] != team_results['Logistic_Prediction']])

# Comparing Accuracies
print("\nComparison of methods:")
print(f"Discriminant Analysis accuracy: {accuracy_lda:.2%}")
print(f"Logistic Regression accuracy: {accuracy_logit:.2%}")


Means comparison between playoff and non-playoff teams:
                                       Playoff Teams  Non-Playoff Teams  \
GP                                          77.43750          77.500000   
W                                           47.93750          28.214286   
L                                           29.50000          49.285714   
MIN                                         48.23750          48.271429   
OwnPointsPerGame                           116.01250         111.250000   
OwnFieldGoalPercentage                      47.50000          45.771429   
OpponentFieldGoalPercentage                 46.13125          47.342857   
OwnThreePointPercentage                     36.57500          35.335714   
OwnFreeThrowPercentage                      78.22500          78.142857   
OpponentThreePointFieldGoalPercentage       35.81250          36.250000   
OwnAssistsPerGame                           26.95000          26.078571   
OwnTotalReboundsPerGame                    