In [162]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [94]:
#pip install pyarrow

In [165]:
player_data_full = pd.read_parquet('../data/raw/nflverse/player_stats_season.parquet')
adj_salaries = pd.read_csv('../data/raw/salaries/QB_adjusted_salaries.csv')

In [166]:
qb = player_data_full[player_data_full['position'] == 'QB']

## including postgame data 
qb = qb[qb.season_type == 'REG+POST']

## filtering for relevant qb metrics
qb_features = ['season', 'player_display_name', 'games', 'completions', 'attempts', 'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_fumbles', 'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr', 'dakota', 'rushing_yards', 'rushing_tds', 'fantasy_points']
qb = qb[qb_features]
qb.sample(2)

Unnamed: 0,season,player_display_name,games,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_fumbles,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,rushing_yards,rushing_tds,fantasy_points
18135,2013,Carson Palmer,16,362,572,4274.0,24,22.0,41.0,6,204.0,12.825727,0,0.811622,0.092944,3.0,0,217.26
15593,2011,Eli Manning,20,465,752,6152.0,38,17.0,39.0,7,277.0,135.870313,0,0.863196,0.135483,35.0,1,365.58


In [150]:
## removing rookie contracts from salary data

rookie_years = adj_salaries.groupby('player')['year_signed'].min().reset_index()
rookie_years.columns = ['player', 'rookie_year']

adj_salaries = adj_salaries.merge(rookie_years, on='player')
adj_salaries = adj_salaries[adj_salaries['year_signed'] > adj_salaries['rookie_year']]

In [168]:
## joining salaries to data prior to the year contract was signed. 
## ex joining Aaron Rodgers 2018 contract information to all years 2013-2017, 2013 to 2008-2012 etc

def next_salary(player_id, year, contract_df):

    future_contracts = contract_df[(contract_df['player'] == player_id) &  
                                   (contract_df['year_signed'] > year)]


    ### getting contract signed most recently after current year of play, returning salary/yr
    if len(future_contracts) > 0:
        next_contract = future_contracts.sort_values('year_signed').iloc[0]
        return next_contract['apy']
    else:
        return np.nan 
    

def next_salary_smoothed(player_id, year, contract_df):

    future_contracts = contract_df[(contract_df['player'] == player_id) &  
                                   (contract_df['year_signed'] > year)]
    if len(future_contracts) > 0:
        next_contract = future_contracts.sort_values('year_signed').iloc[0]
        return next_contract['smooth_adjusted_apy']
    else:
        return np.nan 


In [169]:
### applying next salary function to all rows in qb_data, joining the salary from their next contract 

qb['salary_per_year'] = qb.apply(
    lambda row: next_salary(row['player_display_name'], row['season'], adj_salaries), 
    axis=1
)

qb['smoothed_salary_per_year'] = qb.apply(
    lambda row: next_salary_smoothed(row['player_display_name'], row['season'], adj_salaries), 
    axis=1
)

In [173]:
#qb[qb.season == 2024]

In [153]:
qb[qb.player_display_name == 'Aaron Rodgers'].sample(3)

Unnamed: 0,season,player_display_name,games,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_fumbles,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,rushing_yards,rushing_tds,fantasy_points,salary_per_year,smoothed_salary_per_year
8549,2005,Aaron Rodgers,3,9,16,65.0,0,1.0,3.0,2,3.0,-16.317948,0,0.0,0.046418,7.0,0,-2.7,12.704,2.17744
13168,2009,Aaron Rodgers,17,378,583,4856.0,34,8.0,54.0,8,214.0,134.679478,3,0.898427,0.163792,317.0,6,379.94,22.0,3.984133
10815,2007,Aaron Rodgers,2,20,28,218.0,1,0.0,3.0,0,9.0,7.166248,0,1.345679,0.218119,29.0,0,15.62,12.704,2.17744


In [160]:
qb_train_data = qb[qb.season <= 2023]
qb_train_data = qb_train_data.dropna(subset=['salary_per_year'])
qb_train_data.to_csv('../data/final/qb_train_updated_2022.csv', index=False)

In [161]:
qb_test_data = qb[qb.season > 2023]
qb_test_data = qb_test_data.dropna(subset=['salary_per_year'])
qb_test_data.to_csv('../data/final/qb_test_updated_2022.csv', index=False)

In [145]:
qb_test_data.sample(7)

Unnamed: 0,season,player_display_name,games,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_fumbles,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,rushing_yards,rushing_tds,fantasy_points,salary_per_year,smoothed_salary_per_year
33186,2024,Sam Darnold,18,386,585,4564.0,36,13.0,57.0,6,218.0,21.623014,0,0.908078,0.105089,231.0,1,319.66,33.5,6.579258
33391,2024,Chris Oladokun,1,0,0,0.0,0,0.0,1.0,1,0.0,-1.389833,0,,,5.0,0,0.5,0.84,-0.448178
33457,2024,Tommy DeVito,2,31,44,257.0,0,0.0,6.0,0,15.0,-8.008932,0,0.977186,0.055948,32.0,0,13.48,1.03,-0.407295
33236,2024,Daniel Jones,10,216,341,2070.0,8,7.0,29.0,4,106.0,-33.381812,0,0.82503,0.052998,265.0,2,135.3,14.0,2.383453
33102,2024,Nick Mullens,4,2,2,38.0,0,0.0,0.0,0,2.0,4.868726,0,1.583333,,-2.0,0,1.32,2.25,-0.144789
33392,2024,Skylar Thompson,2,21,33,187.0,0,0.0,6.0,2,8.0,-15.511025,0,0.813043,0.049576,4.0,0,7.88,1.1,-0.392234
33072,2024,Marcus Mariota,5,34,44,364.0,4,0.0,3.0,0,22.0,19.813352,0,0.892157,0.261795,93.0,1,45.86,8.0,1.092436
