In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import numpy as np

# Read the data from a CSV file
df = pd.read_csv('2024FFBallers_QBs.csv')

# Drop rows where there are missing values to ensure a clean correlation analysis
df_cleaned = df.dropna()

# Extract the rankers' columns and the stats columns
rankers = df_cleaned[['Andy', 'Jason', 'Mike']]
stats = df_cleaned.drop(columns=['Name', 'Andy', 'Jason', 'Mike'])

# Standardize the statistics to ensure comparability
scaler = StandardScaler()
stats_scaled = scaler.fit_transform(stats)

# Measure the correlation using linear regression (similar to Pearson correlation)
def calculate_correlations(rankers, stats_scaled):
    correlations = {}
    for ranker in rankers.columns:
        correlations[ranker] = {}
        for i, stat in enumerate(stats.columns):
            model = LinearRegression()
            model.fit(stats_scaled[:, i].reshape(-1, 1), rankers[ranker])
            correlations[ranker][stat] = model.coef_[0]
    return correlations

correlations = calculate_correlations(rankers, stats_scaled)

# Convert the correlations to a DataFrame for better readability
correlation_df = pd.DataFrame(correlations)

print(correlation_df)


                        Andy     Jason      Mike
EPA+CPOE Composite -0.987501  0.399342 -0.089778
Adj. EPA/Play      -1.359620  0.064239 -0.702269
EPA/Play           -1.384715  0.009360 -0.832031
Success Rate       -2.305772 -1.851779 -2.239593
Cmp%               -0.641909  0.383843  0.131787
Expected Cmp%      -1.266538 -1.752022 -1.940152
CPOE                0.027599  1.222247  1.075453
Air Yards          -0.007416  0.031698  0.590916
