In [2]:
import pandas as pd

# Prompt the user to input the year
year_input = input("Enter the NFL passing stat year: ")

# Construct the CSV file path based on user input
csv_file_path = f"nfl_passing_data{year_input}.csv"

# Read the CSV file into a DataFrame
try:
    df = pd.read_csv(csv_file_path)

    # Display the DataFrame
    print(df)

except FileNotFoundError:
    print(f"CSV file for the year {year_input} not found.")
except pd.errors.EmptyDataError:
    print(f"CSV file for the year {year_input} is empty.")
except pd.errors.ParserError:
    print(f"Error parsing CSV file for the year {year_input}.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Enter the NFL passing stat year: 2020
      Rk            Player   Tm  Age Pos   G  GS   QBrec  Cmp  Att  ...  \
0      1   Deshaun Watson*  HOU   25  QB  16  16  4-12-0  382  544  ...   
1      2  Patrick Mahomes*  KAN   25  QB  15  15  14-1-0  390  588  ...   
2      3         Tom Brady  TAM   43  QB  16  16  11-5-0  401  610  ...   
3      4         Matt Ryan  ATL   35  QB  16  16  4-12-0  407  626  ...   
4      5       Josh Allen*  BUF   24  QB  16  16  13-3-0  396  572  ...   
..   ...               ...  ...  ...  ..  ..  ..     ...  ...  ...  ...   
107  108        Brett Kern  TEN   34   P  13   0     NaN    0    1  ...   
108  109        D.J. Moore  CAR   23  WR  15  14     NaN    0    1  ...   
109  110       Zach Pascal  IND   26  WR  16  14     NaN    0    1  ...   
110  111     Sammy Watkins  KAN   27  WR  10   9     NaN    0    1  ...   
111  112     Isaiah Wright  WAS   23  WR  14   6     NaN    0    1  ...   

       Y/G   Rate   QBR  Sk  Yds.1  Sk%  NY/A  ANY/A  4QC  GW

In [3]:

df_copy = df.copy()  # Create a copy of the original dataframe

# Fill NaN values in the 'QBrec' column with 'DNS'
df_copy['QBrec'].fillna('DNS', inplace=True)

df_copy

Unnamed: 0,Rk,Player,Tm,Age,Pos,G,GS,QBrec,Cmp,Att,...,Y/G,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A,4QC,GWD
0,1,Deshaun Watson*,HOU,25,QB,16,16,4-12-0,382,544,...,301.4,112.4,63.7,49,293,8.3,7.64,8.22,,
1,2,Patrick Mahomes*,KAN,25,QB,15,15,14-1-0,390,588,...,316.0,108.2,78.1,22,147,3.6,7.53,8.33,3.0,3.0
2,3,Tom Brady,TAM,43,QB,16,16,11-5-0,401,610,...,289.6,102.2,66.0,21,143,3.3,7.12,7.53,3.0,3.0
3,4,Matt Ryan,ATL,35,QB,16,16,4-12-0,407,626,...,286.3,93.3,59.8,41,257,6.1,6.48,6.52,,
4,5,Josh Allen*,BUF,24,QB,16,16,13-3-0,396,572,...,284.0,107.2,76.6,26,159,4.3,7.33,7.82,2.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,108,Brett Kern,TEN,34,P,13,0,DNS,0,1,...,0.0,39.6,0.0,0,0,0.0,0.00,0.00,,
108,109,D.J. Moore,CAR,23,WR,15,14,DNS,0,1,...,0.0,39.6,,0,0,0.0,0.00,0.00,,
109,110,Zach Pascal,IND,26,WR,16,14,DNS,0,1,...,0.0,39.6,1.8,0,0,0.0,0.00,0.00,,
110,111,Sammy Watkins,KAN,27,WR,10,9,DNS,0,1,...,0.0,0.0,0.0,0,0,0.0,0.00,-45.00,,


In [29]:
# Display the top 10 records (name and QBrec) in their original format


# Assuming you already have the dataframe 'df'
df_copy = df.copy()  # Create a copy of the original dataframe

# Convert 'QBR' column to string
df_copy['QBrec'] = df_copy['QBrec'].astype(str)

# Define a custom sorting key function
def custom_sort_key(record):
    if record == 'nan' or record== 'DNS':  # Handle 'nan' values and "DNS"
        return (-9999, -9999, -9999)  # Assign placeholder values
    wins, losses, ties = map(int, record.split('-'))
    return (wins - losses, wins, ties)

# Calculate the custom sorting key for each record
df_copy['SortKey'] = df_copy['QBrec'].apply(custom_sort_key)

# Sort the DataFrame using the custom sorting key
sorted_df = df_copy.sort_values(by='SortKey', ascending=False)

# Display the top 10 records (name and QBrec) in their original format
top_10_records = sorted_df.head(10)[['Player',   'QBrec']]
print("Top 10 Records (Best to Worst):")
print(top_10_records.to_string(index=False))






Top 10 Records (Best to Worst):
            Player  QBrec
  Patrick Mahomes* 14-1-0
       Josh Allen* 13-3-0
   Aaron Rodgers*+ 13-3-0
Ben Roethlisberger 12-3-0
   Russell Wilson* 12-4-0
     Lamar Jackson 11-4-0
    Baker Mayfield 11-5-0
    Ryan Tannehill 11-5-0
         Tom Brady 11-5-0
     Philip Rivers 11-5-0


In [26]:
# Display the best player overall, player under 25, player over 25 but under 30 and , best player over 30


# Define a custom sorting key function
def custom_sort_key(record):
    if record['QBrec'] == 'nan' or record['QBrec'] == 'DNS':
        return (-9999, -9999, -9999, -9999)
    wins, losses, ties = map(int, record['QBrec'].split('-'))
    return (wins - losses, wins, ties, record['Age'])

# Calculate the custom sorting key for each record
df_copy['SortKey'] = df_copy.apply(custom_sort_key, axis=1)

# Sort the DataFrame by 'SortKey' in descending order
sorted_df = df_copy.sort_values(by='SortKey', ascending=False)

# Display the best player under 25
best_under_25 = sorted_df[sorted_df['Age'] < 25].iloc[0]
print("Best Player Under 25:")
print("Player:", best_under_25['Player'])
print("QBrec:", best_under_25['QBrec'])

# Display the best player under 30 (but over 25)
best_under_30 = sorted_df[(sorted_df['Age'] >= 25) & (sorted_df['Age'] < 30)].iloc[0]
print("\nBest Player Under 30:")
print("Player:", best_under_30['Player'])
print("QBrec:", best_under_30['QBrec'])

# Display the best player over 30
best_over_30 = sorted_df[sorted_df['Age'] >= 30].iloc[0]
print("\nBest Player Over 30:")
print("Player:", best_over_30['Player'])
print("QBrec:", best_over_30['QBrec'])

# Display the best overall player
best_overall = sorted_df.iloc[0]
print("\nBest Overall Player:")
print("Player:", best_overall['Player'])
print("QBrec:", best_overall['QBrec'])




Best Player Under 25:
Player: Josh Allen*
QBrec: 13-3-0

Best Player Under 30:
Player: Patrick Mahomes*
QBrec: 14-1-0

Best Player Over 30:
Player: Aaron Rodgers*+
QBrec: 13-3-0

Best Overall Player:
Player: Patrick Mahomes*
QBrec: 14-1-0


In [43]:

import plotly.express as px


# Filter the DataFrame to include only players with more than 10 games and Posititon QB
df_filtered = df[(df['GS'] > 10) & (df['Pos'] == 'QB')]

# Create the categories
def categorize_yards(yards):
    if yards > 4000:
        return 'Over 4000'
    elif 3000 <= yards <= 4000:
        return '3000-4000'
    elif 2000 <= yards < 3000:
        return '2000-3000'
    else:
        return 'Below 2000'



# Create a scatter plot with hover functionality and custom color mapping
color_mapping = {
    'Over 4000': 'red',
    '3000-4000': 'blue',
    '2000-3000': 'green',
    'Below 2000': 'orange'
}

fig = px.scatter(df_filtered, hover_data=['Player'], y='Yds',x='Cmp',  color=df_filtered['Yds'].apply(categorize_yards), title='Scatter Plot of Number of Yards vs Pass Completion', color_discrete_map=color_mapping)
 
# Add labels and titles
fig.update_layout(
    xaxis_title='Cmp',
    yaxis_title='Number of Yards',
)

# Show the plot
fig.show()



In [28]:
# top Players by Win-Loss Ratio (with more than 10 games played)


# Assuming df_copy is your DataFrame

def calculate_win_loss_ratio(row):
    if pd.isna(row['QBrec']) or row['QBrec'] == 'DNS' or pd.isna(row['G']):
        return None
    
    # Check if the number of games is greater than 10
    if int(row['G']) > 10:
        try:
            wins, losses, ties = map(int, row['QBrec'].split('-'))
            # Win-Loss Ratio = Wins / (Wins + Losses)
            return wins / (wins + losses)
        except ValueError:
            return None
    else:
        return None

df_copy['Win_Loss_Ratio'] = df_copy.apply(calculate_win_loss_ratio, axis=1)

# Display top players with the best win-loss ratios
top_win_loss_players = df_copy.dropna(subset=['Win_Loss_Ratio']).sort_values(by='Win_Loss_Ratio', ascending=False).head(10)
print("Top Quarterback by Win-Loss Ratio (with more than 10 games played):")
print(top_win_loss_players[['Player', 'Win_Loss_Ratio']])





Top Quarterback by Win-Loss Ratio (with more than 10 games played):
                Player  Win_Loss_Ratio
1     Patrick Mahomes*        0.933333
4          Josh Allen*        0.812500
6      Aaron Rodgers*+        0.812500
15  Ben Roethlisberger        0.800000
39         Taysom Hill        0.750000
19          Drew Brees        0.750000
8      Russell Wilson*        0.750000
21       Lamar Jackson        0.733333
17      Baker Mayfield        0.687500
14      Ryan Tannehill        0.687500


In [31]:
# rank by best Qb Rate with > 10 games

# Define the filtering conditions
filter_condition = (df['GS'] > 10) & (df['Pos'] == 'QB')

# Apply the filter
filtered_and_ranked_players = df[filter_condition].sort_values(by='Rate', ascending=False)

# Select columns for comparison
comparison_columns = ['Player', 'Cmp%', 'Yds', 'TD', 'Int', 'Rate']

# Display top players and their performance metrics for comparison
top_players_comparison = filtered_and_ranked_players.head(10)
print("QBR Comparison for > 10 Games:")
print(top_players_comparison[comparison_columns])




QBR Comparison for > 10 Games:
              Player  Cmp%   Yds  TD  Int   Rate
6    Aaron Rodgers*+  70.7  4299  48    5  121.5
0    Deshaun Watson*  70.2  4823  33    7  112.4
1   Patrick Mahomes*  66.3  4740  38    6  108.2
4        Josh Allen*  69.2  4544  37   10  107.2
14    Ryan Tannehill  65.5  3819  33    7  106.5
19        Drew Brees  70.5  2942  24    6  106.4
8    Russell Wilson*  68.8  4212  40   13  105.1
7       Kirk Cousins  67.6  4265  35   13  105.0
2          Tom Brady  65.7  4633  40   12  102.2
10        Derek Carr  67.3  4103  27    9  101.4


In [45]:

import plotly.express as px

# Assuming 'Tm' is the column representing the NFL teams
# and 'QBrec' is the column representing the QB record as a string, e.g., '12-1-1'
# 'Pos' is the column representing player positions
# Filter the DataFrame to include only relevant columns and position 'QB'
team_df = df_copy[(df_copy['Pos'] == 'QB') & (df_copy['QBrec'].notna())][['Tm', 'Yds', 'QBrec']]

# Extract the number of wins from the QB record string and convert to numeric
team_df['Wins'] = team_df['QBrec'].str.split('-').str[0].astype(float)

# Group by team and sum the passing yards and wins for each team
team_passing_yards = team_df.groupby('Tm')['Yds'].sum().reset_index()
team_wins = team_df.groupby('Tm')['Wins'].sum().reset_index()

# Merge the two dataframes on the 'Tm' column
team_analysis_df = pd.merge(team_passing_yards, team_wins, on='Tm')

# Sort the DataFrame based on total passing yards
team_analysis_df = team_analysis_df.sort_values(by='Yds', ascending=False)

# Create a scatter plot to visualize the correlation between passing yards and wins
fig = px.scatter(team_analysis_df, x='Yds', y='Wins', text='Tm', title='Team Analysis: Passing Yards vs Wins for QBs',
                 labels={'Yds': 'Total Passing Yards', 'Wins': 'Total Wins'},
                 trendline='ols')  # Adding a trendline directly here

# Show the plot
fig.show()


In [40]:

# Assuming 'Tm' is the column representing the NFL teams
# and 'QBrec' is the column representing the QB record as a string, e.g., '12-1-1'
# 'Pos' is the column representing player positions
# Filter the DataFrame to include only relevant columns and position 'QB'
team_df = df[(df['Pos'] == 'QB') & (df['QBrec'].notna())][['Player','Age','Tm', 'Yds', 'TD', 'Att', 'G', 'Cmp', 'Cmp%', 'TD%', 'QBR', 'Int', 'Sk', 'QBrec']]

# Extract the number of wins from the QB record string and convert to numeric
team_df['Total Wins'] = team_df['QBrec'].str.split('-').str[0].astype(int)

# Rename the selected columns
team_df = team_df.rename(columns={
    'Tm': 'Team',
    'Yds': 'Total Passing Yards',
    'TD': 'Total Touchdowns',
    'Att': 'Attempts',
    'G': 'Games Played',
    'Cmp': 'Completions',
    'Cmp%': 'Completion Percentage',
    'TD%': 'Touchdown Percentage',
    'QBR': 'QBR',
    'Int': 'Total Interceptions',
    'Sk': 'Total Sacks'
})

# Display the resulting DataFrame
print(team_df.head(10))


             Player  Age Team  Total Passing Yards  Total Touchdowns  \
0   Deshaun Watson*   25  HOU                 4823                33   
1  Patrick Mahomes*   25  KAN                 4740                38   
2         Tom Brady   43  TAM                 4633                40   
3         Matt Ryan   35  ATL                 4581                26   
4       Josh Allen*   24  BUF                 4544                37   
5    Justin Herbert   22  LAC                 4336                31   
6   Aaron Rodgers*+   37  GNB                 4299                48   
7      Kirk Cousins   32  MIN                 4265                35   
8   Russell Wilson*   32  SEA                 4212                40   
9     Philip Rivers   39  IND                 4169                24   

   Attempts  Games Played  Completions  Completion Percentage  \
0       544            16          382                   70.2   
1       588            15          390                   66.3   
2       610 

In [42]:
# Export the DataFrame to an Excel file
team_df.to_excel('nfl_passing_data_modified.xlsx', index=False)
