In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [3]:
#pip install pyarrow

In [2]:
player_data_full = pd.read_parquet('../data/raw/nflverse/player_stats_season.parquet')
adj_salaries = pd.read_csv('../data/raw/salaries/WR_adjusted_salaries.csv')

In [3]:
wr = player_data_full[player_data_full['position'] == 'WR']

## including postgame data 
wr = wr[wr.season_type == 'REG+POST']

## filtering for relevant wr metrics
wr_features = ['season', 'player_display_name', 'games','receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa', 'target_share', 'fantasy_points']
wr = wr[wr_features]
wr.sample(2)

Unnamed: 0,season,player_display_name,games,receptions,targets,receiving_yards,receiving_tds,receiving_yards_after_catch,receiving_first_downs,receiving_epa,target_share,fantasy_points
31937,2023,Gunner Olszewski,2,1,1,0.0,0,0.0,0.0,-6.185573,0.034483,4.0
1208,1999,Alex Van Dyke,1,0,2,0.0,0,0.0,0.0,-2.295286,0.1,0.0


In [4]:
## removing rookie contracts from salary data

rookie_years = adj_salaries.groupby('player')['year_signed'].min().reset_index()
rookie_years.columns = ['player', 'rookie_year']

adj_salaries = adj_salaries.merge(rookie_years, on='player')
adj_salaries = adj_salaries[adj_salaries['year_signed'] > adj_salaries['rookie_year']]

In [7]:
## joining salaries to data prior to the year contract was signed. 
## ex joining Aaron Rodgers 2018 contract information to all years 2013-2017, 2013 to 2008-2012 etc

def next_salary(player_id, year, contract_df):

    future_contracts = contract_df[(contract_df['player'] == player_id) &  
                                   (contract_df['year_signed'] > year)]


    ### getting contract signed most recently after current year of play, returning salary/yr
    if len(future_contracts) > 0:
        next_contract = future_contracts.sort_values('year_signed').iloc[0]
        return next_contract['apy']
    else:
        return np.nan 
    

def next_salary_smoothed(player_id, year, contract_df, type):

    future_contracts = contract_df[(contract_df['player'] == player_id) &  
                                   (contract_df['year_signed'] > year)]
    if len(future_contracts) > 0:
        next_contract = future_contracts.sort_values('year_signed').iloc[0]
        if type == 'smoothed':
            return next_contract['smooth_adjusted_apy']
        if type == 'mean':
            return next_contract['mean_adjusted_apy']
        if type == 'median':
            return next_contract['med_adjusted_apy']
    else:
        return np.nan 

In [8]:
### applying next salary function to all rows in qb_data, joining the salary from their next contract 

wr['salary_per_year'] = wr.apply(
    lambda row: next_salary(row['player_display_name'], row['season'], adj_salaries), 
    axis=1
)

wr['smoothed_salary_per_year'] = wr.apply(
    lambda row: next_salary_smoothed(row['player_display_name'], row['season'], adj_salaries, 'smoothed'), 
    axis=1
)

wr['mean_adj_salary_per_year'] = wr.apply(
    lambda row: next_salary_smoothed(row['player_display_name'], row['season'], adj_salaries, 'mean'), 
    axis=1
)

wr['med_adj_salary_per_year'] = wr.apply(
    lambda row: next_salary_smoothed(row['player_display_name'], row['season'], adj_salaries, 'median'), 
    axis=1
)

In [9]:
adj_salaries = adj_salaries.rename(columns={'player': 'player_display_name','smooth_adjusted_apy': 'smoothed_salary_per_year'})

wr = pd.merge(
    wr, 
    adj_salaries[['player_display_name', 'smoothed_salary_per_year', 'year_signed']], 
    on=['player_display_name', 'smoothed_salary_per_year'], 
    how='left'
)

In [10]:
wr.shape

(5277, 17)

In [11]:
wr_train_data = wr[wr.year_signed < 2024]
wr_train_data = wr_train_data.dropna(subset=['salary_per_year'])
wr_train_data.to_csv('../data/cleaned/wr_train.csv', index=False)

In [12]:
wr_test_data = wr[wr.year_signed == 2024]
wr_test_data = wr_test_data.dropna(subset=['salary_per_year'])
wr_test_data.to_csv('../data/cleaned/wr_test.csv', index=False)