In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [67]:
qb = pd.read_csv('data/dashboard/qb.csv')
rb = pd.read_csv('data/dashboard/rb.csv')
wr = pd.read_csv('data/dashboard/wr.csv')

qb.rename(columns={'player_display_name': 'player'}, inplace=True)
rb.rename(columns={'player_display_name': 'player'}, inplace=True)
wr.rename(columns={'player_display_name': 'player'}, inplace=True)

qb['position'] = 'QB'
rb['position'] = 'RB'
wr['position'] = 'WR'

In [68]:
qb_best_features = ['player', 'position', 'fantasy_points', 'passing_tds', 'rushing_yards', 'salary_per_year', 'predicted_salary']
rb_best_features = ['player', 'position', 'fantasy_points', 'rushing_yards','target_share','salary_per_year','predicted_salary']
wr_best_features = ['player', 'position', 'fantasy_points', 'target_share', 'receiving_yards', 'salary_per_year', 'predicted_salary']

qb = qb[qb_best_features]
rb = rb[rb_best_features]
wr= wr[wr_best_features]

rename_cols = lambda df: df.rename(columns={df.columns[2]: 'feature_1', df.columns[3]: 'feature_2', df.columns[4]: 'feature_3', 'player':'name', 'salary_per_year': 'actual_salary'})
qb = rename_cols(qb)
rb = rename_cols(rb)
wr = rename_cols(wr)

full  = pd.concat([qb, rb, wr], ignore_index=True)
full.sample(5)

Unnamed: 0,name,position,feature_1,feature_2,feature_3,actual_salary,predicted_salary
200,Lil'Jordan Humphrey,WR,34.2,0.060317,162.0,1.125,1.615891
72,Jeff Wilson,RB,27.9,188.0,0.069597,1.75,1.033106
182,Sterling Shepard,WR,12.3,0.073333,57.0,1.3775,0.783973
113,Laviska Shenault,WR,45.7,0.115523,272.0,1.2925,2.426738
94,Rico Dowdle,RB,79.0,372.0,0.05176,1.255,1.504802


In [69]:
numerics = ['feature_1', 'feature_2', 'feature_3', 'actual_salary', 'predicted_salary']
full[numerics] = full[numerics].round(2)
full = full[full['name'] != 'Russell Wilson']
print(full.shape)

## undervalued 
full[(full.predicted_salary - full.actual_salary) > 3]

Unnamed: 0,name,position,feature_1,feature_2,feature_3,actual_salary,predicted_salary
9,Desmond Ridder,QB,109.43,7.0,128.5,0.98,4.37
22,Joshua Dobbs,QB,202.66,13.0,421.0,2.25,12.46
26,Baker Mayfield,QB,324.6,34.0,194.0,33.33,39.17
29,Jake Browning,QB,143.44,12.0,127.0,0.97,9.06
107,Tim Patrick,WR,103.4,0.17,734.0,1.3,6.96
147,Rashid Shaheed,WR,111.6,0.14,719.0,3.09,8.91
192,Mike Evans,WR,229.0,0.25,1450.0,20.5,24.92
224,Josh Reynolds,WR,108.0,0.12,740.0,4.5,7.52


In [65]:
players = pd.read_csv('players.csv')

full = full.merge(
    players[['display_name', 'birth_date', 'height', 'weight']], 
    how='left', 
    left_on='name', 
    right_on='display_name'
)


full = full.drop_duplicates(subset='name', keep='first')
full['birth_date'] = pd.to_datetime(full['birth_date'], errors='coerce')
today = pd.Timestamp.today()
full['age'] = (today - full['birth_date']).dt.days // 365

full = full.drop(columns=['display_name'])
full = full.drop(columns=['birth_date'])

full = full[['name', 'height', 'weight', 'age', 'position', 'feature_1', 'feature_2', 'feature_3', 'actual_salary', 'predicted_salary']]
print(full.shape)
full.sample(5)

(227, 10)


Unnamed: 0,name,height,weight,age,position,feature_1,feature_2,feature_3,actual_salary,predicted_salary
80,Rashaad Penny,71.0,220.0,29.0,RB,3.8,33.0,0.05,1.29,0.8
18,Jacoby Brissett,76.0,235.0,32.0,QB,22.86,3.0,19.0,8.0,3.32
38,Derrick Henry,75.0,247.0,31.0,RB,218.66,1167.0,0.08,8.0,8.02
146,A.J. Brown,72.0,226.0,27.0,WR,183.6,0.3,1456.0,32.0,22.2
76,Gus Edwards,73.0,238.0,30.0,RB,182.5,870.0,0.06,3.25,4.33


In [66]:
full.to_csv('dashboard_data.csv')

In [70]:
## QB

# feature 1 = fantasy points | feature 2 = passing tds | feature 3 = rushing yards

## RB

# feature 1 = fantasy points | feature 2 = rushing yards | feature 3 = target share

## WR

# feature 1 = fantasy points | feature 2 = target share | feature 3 = receiving yards