In [1]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt 
import dython.nominal as dy
import warnings
import nfl_data_py as nfl
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

warnings.filterwarnings('ignore')

# WR Model

**Prepare data for use**

Pull WR fantasy data from scraped CSV file

In [2]:
fantasy00to22 = pd.read_csv('Data/fantasy00to22.csv', usecols=["DKPt", "Name", "PFR_ID", "Position", "Season"])
fantasy00to22.rename(columns = {'DKPt': 'PPR_Fantasy_Points', "PFR_ID": 'pfr_id'}, inplace=True)
fantasy00to22 = fantasy00to22[fantasy00to22['Position'] == 'WR']
fantasy00to22.head()

Unnamed: 0,PPR_Fantasy_Points,Name,pfr_id,Position,Season
78,12.9,Randy Moss,MossRa00,WR,2000
79,21.6,Randy Moss,MossRa00,WR,2000
80,4.0,Randy Moss,MossRa00,WR,2000
81,44.7,Randy Moss,MossRa00,WR,2000
82,25.8,Randy Moss,MossRa00,WR,2000


Use average of median and average to be metric of player's season

$$
    FanScore^{season} = \frac{FanPoints^{season}_{med} + FanPoints^{season}_{mean}}{2}
$$

In [3]:
fantasySeasonData00to22 = fantasy00to22.groupby(["Name", "pfr_id", "Position", "Season"], as_index=False)["PPR_Fantasy_Points"].mean()
fantasySeasonData00to22.rename(columns = {'PPR_Fantasy_Points': 'Avg_PPR_Fantasy_Points'}, inplace=True)
fantasySeasonData00to22['Med_PPR_Fantasy_Points'] = fantasy00to22.groupby(["Name", "pfr_id", "Position", "Season"], as_index=False)["PPR_Fantasy_Points"].median()["PPR_Fantasy_Points"]
fantasySeasonData00to22["NFLFanScore"] = (fantasySeasonData00to22["Avg_PPR_Fantasy_Points"] + fantasySeasonData00to22["Med_PPR_Fantasy_Points"]) / 2
fantasySeasonData00to22.head()

Unnamed: 0,Name,pfr_id,Position,Season,Avg_PPR_Fantasy_Points,Med_PPR_Fantasy_Points,NFLFanScore
0,A.J. Brown,BrowAJ00,WR,2019,14.50625,12.15,13.328125
1,A.J. Brown,BrowAJ00,WR,2020,18.607143,18.7,18.653571
2,A.J. Brown,BrowAJ00,WR,2021,14.607692,9.8,12.203846
3,A.J. Brown,BrowAJ00,WR,2022,18.623529,17.0,17.811765
4,A.J. Green,GreeA.00,WR,2011,15.333333,16.1,15.716667


I am making the conclusion here that a successful fantasy career is one that has 5 good years (This obviously depends on the position). 

Take top 5 seasons and average them

In [4]:
fantasySeasonData00to22Top5 = fantasySeasonData00to22.groupby(["Name", "pfr_id", "Position"], as_index=False)['NFLFanScore'].apply(lambda grp: grp.nlargest(5).mean())
fantasySeasonData00to22Top5.head()

Unnamed: 0,Name,pfr_id,Position,NFLFanScore
0,A.J. Brown,BrowAJ00,WR,15.499327
1,A.J. Green,GreeA.00,WR,19.505631
2,A.J. Jenkins,JenkA.00,WR,3.346667
3,Aaron Burbridge,BurbAa00,WR,3.18
4,Aaron Dobson,DobsAa00,WR,4.953283


Pull scraped rookie WR data from CSV and calculate FantasyPoints

In [5]:
rookies00to22 = pd.read_csv('Data/collegeWR_stats.csv')
rookies00to22["FanPts"] = rookies00to22["Rec"] + ((rookies00to22["Rec_Yds"] + rookies00to22["Rush_Yds"]) * 0.1) + ((rookies00to22["Rec_TD"] + rookies00to22["Rush_TD"]) * 6)
rookies00to22 = rookies00to22[["Year", "cfb_id", "Position", "Name", "FanPts"]]
rookies00to22.head()

Unnamed: 0,Year,cfb_id,Position,Name,FanPts
0,2000.0,corey-brown-1,WR,Corey Brown,28.4
1,2000.0,corey-brown-1,WR,Corey Brown,26.8
2,2000.0,corey-brown-1,WR,Corey Brown,3.1
3,2000.0,corey-brown-1,WR,Corey Brown,18.9
4,2000.0,corey-brown-1,WR,Corey Brown,11.5


Calculate fantasy scores for college using same formula

In [6]:
rookiesYrAvg = rookies00to22.groupby(["Year", "Name", "cfb_id", "Position"], as_index=False)['FanPts'].mean()
rookiesYrAvg["G"] = rookies00to22.groupby(["Year", "Name", "cfb_id", "Position"], as_index=False).size()["size"]
rookiesYrAvg["Med_FanPts"] = rookies00to22.groupby(["Year", "Name", "cfb_id", "Position"], as_index=False)["FanPts"].median()["FanPts"]
rookiesYrAvg["FanScore"] = (rookiesYrAvg["Med_FanPts"] + rookiesYrAvg["FanPts"]) / 2
rookiesYrAvg.head()

Unnamed: 0,Year,Name,cfb_id,Position,FanPts,G,Med_FanPts,FanScore
0,2000.0,Aaron Lockett,aaron-lockett-1,WR,8.661538,13,8.1,8.380769
1,2000.0,Andrae Thurman,andrae-thurman-1,WR,4.057143,7,2.2,3.128571
2,2000.0,Andre Johnson,andre-johnson-1,WR,2.1,7,0.0,1.05
3,2000.0,Andre' Davis,andre-davis-1,WR,9.8625,8,8.0,8.93125
4,2000.0,Anquan Boldin,anquan-boldin-1,WR,12.016667,12,11.45,11.733333


Determine if player was an early declare

(We have to get rid of players whose last season was before 2003 because we don't have data pre 2000)

In [7]:
ED = rookiesYrAvg.groupby(["cfb_id"], as_index=False)['Year'].max()
ED = ED[ED['Year'] >= 2003]
ED['EarlyDeclare'] = (ED['Year'] - rookiesYrAvg.groupby(["cfb_id"], as_index=False)['Year'].min()['Year']) <= 2
ED = ED.drop(columns="Year")
rookiesYrAvg = rookiesYrAvg.merge(ED, on="cfb_id")
rookiesYrAvg.head()

Unnamed: 0,Year,Name,cfb_id,Position,FanPts,G,Med_FanPts,FanScore,EarlyDeclare
0,2000.0,B.J. Johnson,bj-johnson-1,WR,11.972727,11,9.7,10.836364,False
1,2001.0,B.J. Johnson,bj-johnson-1,WR,10.358333,12,7.65,9.004167,False
2,2002.0,B.J. Johnson,bj-johnson-1,WR,10.841667,12,6.7,8.770833,False
3,2003.0,B.J. Johnson,bj-johnson-1,WR,10.9,10,6.75,8.825,False
4,2000.0,Bernard Berrian,bernard-berrian-1,WR,14.3125,8,14.45,14.38125,False


Take best year

In [8]:
rookiesYrAvg = rookiesYrAvg.groupby(["cfb_id", "EarlyDeclare"], as_index=False)["FanScore"].max()
rookiesYrAvg.head()

Unnamed: 0,cfb_id,EarlyDeclare,FanScore
0,aaron-burbridge-1,False,18.385714
1,aaron-dobson-1,False,14.976923
2,aaron-fuller-1,False,12.933333
3,aaron-kelly-1,False,20.473077
4,ace-sanders-1,True,11.653846


Get cfb_id to pfr_id dictionary

In [9]:
combineData = nfl.import_combine_data(range(2000,2023))
combineData.head()

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,2000,2000.0,New York Jets,1.0,13.0,AbraJo00,,John Abraham,OLB,South Carolina,6-4,252.0,4.55,,,,,
1,2000,2000.0,Seattle Seahawks,1.0,19.0,AlexSh00,shaun-alexander-1,Shaun Alexander,RB,Alabama,6-0,218.0,4.58,,,,,
2,2000,2000.0,Kansas City Chiefs,6.0,188.0,AlfoDa20,,Darnell Alford,OT,Boston Col.,6-4,334.0,5.56,23.0,25.0,94.0,8.48,4.98
3,2000,,,,,,,Kyle Allamon,TE,Texas Tech,6-2,253.0,4.97,,29.0,104.0,7.29,4.49
4,2000,2000.0,Carolina Panthers,1.0,23.0,AndeRa21,,Rashard Anderson,CB,Jackson State,6-2,206.0,4.55,,34.0,123.0,7.18,4.15


In [10]:
rookiesYrAvg00to22 = rookiesYrAvg.merge(combineData, on="cfb_id")
rookiesYrAvg00to22.head()

Unnamed: 0,cfb_id,EarlyDeclare,FanScore,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,aaron-burbridge-1,False,18.385714,2016,2016.0,San Francisco 49ers,6.0,213.0,BurbAa00,Aaron Burbridge,WR,Michigan State,6-0,206.0,4.56,20.0,30.5,115.0,7.22,4.31
1,aaron-dobson-1,False,14.976923,2013,2013.0,New England Patriots,2.0,59.0,DobsAa00,Aaron Dobson,WR,Marshall,6-3,210.0,4.37,,,,,
2,aaron-fuller-1,False,12.933333,2020,,,,,FullAa01,Aaron Fuller,WR,Washington,5-11,188.0,4.59,,34.0,118.0,7.14,
3,aaron-kelly-1,False,20.473077,2009,,,,,,Aaron Kelly,WR,Clemson,6-4,204.0,4.49,11.0,38.0,119.0,7.25,4.45
4,ace-sanders-1,True,11.653846,2013,2013.0,Jacksonville Jaguars,4.0,101.0,SandAc00,Ace Sanders,WR,South Carolina,5-7,173.0,4.58,7.0,32.0,117.0,6.81,4.37


Function to convert heights to inches

In [11]:
def parse_ht(ht):
    # format: 6-1
    ht_ = ht.split("-")
    ft_ = float(ht_[0])
    in_ = float(ht_[1])
    return (12*ft_) + in_

Apply height function

In [12]:
rookiesYrAvg00to22["ht"] = rookiesYrAvg00to22["ht"].apply(lambda x: parse_ht(x) if x else x)
rookiesYrAvg00to22["ht"].head()

0    72.0
1    75.0
2    71.0
3    76.0
4    67.0
Name: ht, dtype: float64

Merge fantasy data with rookie data

In [13]:
fullWRData = rookiesYrAvg00to22.merge(fantasySeasonData00to22Top5, on="pfr_id")
fullWRData = fullWRData.drop(columns=["pos", "draft_year", "draft_round", "Name", "bench", "cone", "shuttle", "broad_jump"])
fullWRData = fullWRData.dropna(subset='draft_ovr')