<a href="https://colab.research.google.com/github/bhaveshasasik/nfl_game_predictor/blob/main/qb_impact_scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import os

In [None]:
column_names = ["Rk", "Player", "Age", "Team", "Pos", "G", "GS", "QBrec", "Cmp", "Att",
                "Cmp%", "Yds", "TD", "TD%", "Int", "Int%", "1D", "Succ%", "Lng", "Y/A",
                "AY/A", "Y/C", "Y/G", "Rate", "QBR", "Sk", "Sk Yds", "Sk%", "NY/A",
                "ANY/A", "4QC", "GWD"]

data = pd.read_csv("sample_data/qb_stats.csv", header=0, names=column_names)

data.columns = data.columns.str.strip()

# Check and adjust numeric columns
numeric_columns = ['Yds', 'Cmp', 'Att', 'TD', 'Int', 'Rate', 'Succ%']
numeric_columns = [col for col in numeric_columns if col in data.columns]  # Keep only valid columns

# Convert to numeric, coercing errors to NaN
data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Display the first few rows
data['Pos'] = data['Pos'].str.strip().str.upper()
data = data[data['Pos'] == 'QB']
print(data.head())


   Rk           Player  Age   Team Pos   G  GS   QBrec  Cmp    Att  ...  \
0   1  Tua Tagovailoa    25   MIA   QB  17  17  11-6-0  388  560.0  ...   
1   2      Jared Goff    29   DET   QB  17  17  12-5-0  407  605.0  ...   
2   3    Dak Prescott    30   DAL   QB  17  17  12-5-0  410  590.0  ...   
3   4      Josh Allen    27   BUF   QB  17  17  11-6-0  385  579.0  ...   
4   5     Brock Purdy    24   SFO   QB  16  16  12-4-0  308  444.0  ...   

     Y/G   Rate   QBR  Sk  Sk Yds   Sk%  NY/A  ANY/A 4QC   GWD  
0  272.0  101.1  60.8  29     171  4.92  7.56   7.48   2     2  
1  269.1   97.9  60.3  30     197  4.72  6.89   6.99   2        
2  265.6  105.9  72.7  39     255  6.20  6.77   7.28   2     3  
3  253.3   92.2  69.6  24     152  3.98  6.89   6.51   2     4  
4  267.5  113.0  72.8  28     153  5.93  8.74   9.01   0     0  

[5 rows x 32 columns]


In [None]:
data['Yards_per_Attempt'] = data['Yds'] / data['Att']
data['Touchdowns_per_Attempt'] = data['TD'] / data['Att']
data['Passing_Yards_per_Attempt'] = data['Yds'] / data['Att']
data['Interceptions_per_Attempt'] = data['Int'] / data['Att']
data['Success_Rate'] = data['Succ%'] / 100  # Assuming Succ% is a percentage
data['Y/G'] = data['Y/G']


In [None]:
metrics = ['Yards_per_Attempt', 'Touchdowns_per_Attempt', 'Passing_Yards_per_Attempt', 'Interceptions_per_Attempt', 'Success_Rate', 'Y/G']


In [None]:
data[metrics] = data[metrics].apply(pd.to_numeric, errors='coerce')  # Coerce strings to NaN
data[metrics] = data[metrics].fillna(0)
data[metrics] = data[metrics].replace([np.inf, -np.inf], np.nan)

In [None]:
scaler = MinMaxScaler()
#print(data.columns)
#print(data[metrics])
data[metrics] = scaler.fit_transform(data[metrics])

In [None]:
data['Impact_Score'] = (
    0.4 * data['Yards_per_Attempt'] +      # Focus on yard efficiency
    0.3 * data['Touchdowns_per_Attempt'] +  # Prioritize scoring ability
    0.15 * data['Passing_Yards_per_Attempt'] +  # Adjust for passing contribution
    0.05 * data['Interceptions_per_Attempt'] +  # Penalize interceptions slightly
    0.05 * data['Success_Rate'] +         # Capture overall play success
    0.05 * data['Y/G']                    # Account for consistency in yardage
)

In [None]:
top_players_per_team = (
    data.sort_values(by=['Team', 'Impact_Score'], ascending=[True, False])
    .groupby('Team')
    .head(1)
)

In [None]:
print("Top Impactful Running Back per Team in 2023 based on Combined Impact Score:\n")
print(top_players_per_team[['Team', 'Player', 'Att', 'Impact_Score', 'Yards_per_Attempt', 'Passing_Yards_per_Attempt', 'Interceptions_per_Attempt', 'Touchdowns_per_Attempt', 'Success_Rate', 'Y/G']])

Top Impactful Running Back per Team in 2023 based on Combined Impact Score:

     Team               Player    Att  Impact_Score  Yards_per_Attempt  \
33   ARI         Kyler Murray   268.0      0.432966           0.453560   
20   ATL       Desmond Ridder   388.0      0.431780           0.493870   
14   BAL        Lamar Jackson   457.0      0.535120           0.543793   
3    BUF           Josh Allen   579.0      0.512412           0.502497   
54   CAR          Andy Dalton    58.0      0.385006           0.420550   
21   CHI        Justin Fields   370.0      0.454582           0.467860   
31   CIN        Jake Browning   243.0      0.522225           0.538316   
34   CLE           Joe Flacco   204.0      0.585870           0.535241   
2    DAL         Dak Prescott   590.0      0.555694           0.517178   
18   DEN       Russell Wilson   447.0      0.504073           0.464055   
1    DET           Jared Goff   605.0      0.515704           0.510945   
6    GNB          Jordan Love   579