# 2024
Now, we have raw fantasy point projections for each player. These projections only take into account a player's career statistics. They do not factor in the __team/system__ around the player, which can be very important. From this [notebook](eda/eda.ipynb), we know that some external variables are important to prediciting fantasy success. Here, we will factor these externals in with player projections to create more accurate 2024 rankings.

In [1]:
# data science stuff
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# models
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline

# data preprocessing, performance metrics
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import r2_score, mean_squared_error

# display
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

# global random_state
random_state = 9

In [8]:
# load data
master = pd.read_csv('data/final_data/master.csv')
adp = pd.read_csv('data/rankings/underdog_adps.csv')[['firstName', 'lastName', 'adp', 'projectedPoints', 'positionRank', 'slotName']]
offline_def_projections = pd.read_csv('./data/rankings/2024_offline_def_projections.csv')

# combine firstName and lastName in adp
adp['Player'] = adp['firstName'] + ' ' + adp['lastName']
adp = adp.drop(columns=['firstName', 'lastName'])

# drop target cols from df
master = master.drop(columns=[col for col in master.columns if 'Target' in col])

# get 2023 seasons
master_2023 = master[master['Year'] == 2023]

In [9]:
# merge
df = master_2023.merge(adp, on='Player', how='left').merge(offline_def_projections, on='Tm', how='left')

# view data
df.head()

Unnamed: 0,Player,Tm,Pos,Age,G,GS,Pass_Cmp,Pass_Att,Pass_Yds,Pass_TD,Pass_Int,Rush_Att,Rush_Yds,Rush_Y/A,Rush_TD,Rec_Tgt,Rec_Rec,Rec_Yds,Rec_Y/R,Rec_TD,Fmb,FmbLost,Key,Year,Scrim_Yds,Scrim_TD,num_games,games_played_pct,games_started_pct,ProBowl,AllPro,Exp,New_Team,Will_be_on_New_Team,Traded,Pass_Y/A,Touches,Cmp%,Catch%,Pass_Cmp_per_game,Pass_Att_per_game,Pass_Yds_per_game,Pass_TD_per_game,Pass_Int_per_game,Rush_Att_per_game,Rush_Yds_per_game,Rush_TD_per_game,Rec_Tgt_per_game,Rec_Rec_per_game,Rec_Yds_per_game,...,PPT_standard,PPT_half-ppr,PPT_ppr,PPT_6,SeasonOvrRank_standard,SeasonOvrRank_half-ppr,SeasonOvrRank_ppr,SeasonOvrRank_6,SeasonPosRank_standard,SeasonPosRank_half-ppr,SeasonPosRank_ppr,SeasonPosRank_6,PPGOvrRank_standard,PPGOvrRank_half-ppr,PPGOvrRank_ppr,PPGOvrRank_6,PPGPosRank_standard,PPGPosRank_half-ppr,PPGPosRank_ppr,PPGPosRank_6,PPTOvrRank_standard,PPTOvrRank_half-ppr,PPTOvrRank_ppr,PPTOvrRank_6,PPTPosRank_standard,PPTPosRank_half-ppr,PPTPosRank_ppr,PPTPosRank_6,VORP_standard_10tm,VORP_half-ppr_10tm,VORP_ppr_10tm,VORP_6_10tm,VORP_standard_12tm,VORP_half-ppr_12tm,VORP_ppr_12tm,VORP_6_12tm,VORP_standard_10tm_3WR,VORP_half-ppr_10tm_3WR,VORP_ppr_10tm_3WR,VORP_6_10tm_3WR,VORP_standard_12tm_3WR,VORP_half-ppr_12tm_3WR,VORP_ppr_12tm_3WR,VORP_6_12tm_3WR,adp,projectedPoints,positionRank,slotName,OffLine_rank,Def_rank
0,Quintin Morris,BUF,TE,24,15,0,0,0,0,0,0,0,0,0.0,0,3.0,2,26,13.0,1,0.0,0.0,MorrQu00,2023,26,1,17,0.882353,0.0,False,False,1,False,False,False,0.0,2,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.133333,1.733333,...,4.3,4.8,5.3,5.3,416.0,428.0,435.0,436.0,81.0,86.0,89.0,89.0,442.0,463.0,477.0,479.0,82.0,88.0,94.0,94.0,6.0,6.0,6.0,6.0,3.0,3.0,3.0,3.0,-91.9,-117.42,-139.9,-139.9,-81.8,-104.2,-132.92,-134.92,-91.9,-117.42,-139.9,-139.9,-81.8,-104.2,-132.92,-134.92,-,18.0,,TE,8.0,17.0
1,Geoff Swaim,ARI,TE,30,14,8,0,0,0,0,0,0,0,0.0,0,11.0,10,94,9.4,0,0.0,0.0,SwaiGe00,2023,94,0,17,0.823529,0.571429,False,False,8,True,False,False,0.0,10,0.0,0.909091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.785714,0.714286,6.714286,...,0.94,1.44,1.94,1.94,410.0,390.0,386.0,388.0,79.0,77.0,77.0,77.0,436.0,426.0,416.0,416.0,80.0,79.0,78.0,78.0,270.0,257.0,249.0,249.0,89.0,89.0,89.0,89.0,-91.1,-112.62,-131.1,-131.1,-81.0,-99.4,-124.12,-126.12,-91.1,-112.62,-131.1,-131.1,-81.0,-99.4,-124.12,-126.12,-,0.0,,TE,26.0,32.0
2,Brycen Hopkins,LAR,TE,26,15,0,0,0,0,0,0,0,0,0.0,0,7.0,5,78,15.6,0,0.0,0.0,HopkBr01,2023,78,0,17,0.882353,0.0,False,False,3,False,False,False,0.0,5,0.0,0.714286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466667,0.333333,5.2,...,1.56,2.06,2.56,2.56,422.0,420.0,418.0,422.0,83.0,82.0,84.0,84.0,452.0,455.0,459.0,461.0,85.0,85.0,87.0,87.0,130.0,125.0,119.0,119.0,33.0,32.0,32.0,32.0,-92.7,-116.72,-137.7,-137.7,-82.6,-103.5,-130.72,-132.72,-92.7,-116.72,-137.7,-137.7,-82.6,-103.5,-130.72,-132.72,-,0.0,,TE,16.0,23.0
3,Tim Jones,JAX,WR,25,17,2,0,0,0,0,0,0,0,0.0,0,18.0,11,83,7.545455,0,0.0,0.0,JoneTi00,2023,83,0,17,1.0,0.117647,False,False,1,False,False,False,0.0,11,0.0,0.611111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.058824,0.647059,4.882353,...,0.754545,1.254545,1.754545,1.754545,418.0,394.0,388.0,390.0,165.0,161.0,160.0,160.0,454.0,440.0,434.0,434.0,179.0,178.0,176.0,176.0,301.0,280.0,275.0,275.0,175.0,171.0,167.0,167.0,-128.9,-163.7,-195.9,-195.9,-118.3,-151.9,-185.1,-185.1,-109.9,-145.1,-170.9,-170.9,-95.08,-113.4,-137.9,-137.9,-,19.9,,WR,,
4,Dare Ogunbowale,HOU,RB,29,12,0,0,0,0,0,0,8,35,4.375,0,7.0,2,18,9.0,0,0.0,0.0,OgunDa00,2023,53,0,17,0.705882,0.0,False,False,6,False,False,False,0.0,10,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.666667,2.916667,0.0,0.583333,0.166667,1.5,...,0.53,0.63,0.73,0.73,447.0,449.0,453.0,455.0,114.0,115.0,116.0,116.0,464.0,480.0,483.0,486.0,122.0,125.0,127.0,127.0,387.0,399.0,398.0,399.0,85.0,85.0,85.0,85.0,-138.8,-159.6,-178.1,-178.1,-130.1,-146.04,-167.2,-167.2,-138.8,-159.6,-178.1,-178.1,-130.1,-146.04,-167.2,-167.2,216.0,19.4,RB111,RB,22.0,9.0


In [29]:
# create cols
df['Passing Incentive'] = df['Def_rank'] - df['OffLine_rank']
df['Rushing Incentive'] = (df['OffLine_rank'] + df['Def_rank']) / 2

In [30]:
df[['Team', 'OffLine_rank', 'Def_rank', 'Passing Incentive']].drop_duplicates().sort_values('Passing Incentive', ascending=False).T

Unnamed: 0,11,9,6,77,105,7,63,30,36,13,22,3,28,27,31,68,66,4,19,59,5,64,2,0,39,32,17,26,50,1,24,23,232
Team,PHI,IND,ATL,DEN,LAC,DET,CAR,BUF,TB,LAR,ARI,MIN,CHI,NYG,GB,WAS,PIT,CIN,KC,CLE,NYJ,TEN,MIA,DAL,SEA,LVR,JAC,HOU,NE,SF,BAL,NO,UNS
OffLine_rank,2.0,3.0,6.0,15.0,12.0,1.0,20.0,8.0,17.0,16.0,26.0,13.0,11.0,29.0,14.0,27.0,9.0,21.0,7.0,4.0,5.0,30.0,18.0,10.0,31.0,19.0,23.0,22.0,28.0,24.0,25.0,32.0,
Def_rank,21.0,22.0,24.0,31.0,27.0,10.0,29.0,17.0,25.0,23.0,32.0,18.0,16.0,30.0,15.0,28.0,7.0,19.0,4.0,1.0,2.0,26.0,14.0,3.0,20.0,8.0,12.0,9.0,13.0,5.0,6.0,11.0,
Passing Incentive,19.0,19.0,18.0,16.0,15.0,9.0,9.0,9.0,8.0,7.0,6.0,5.0,5.0,1.0,1.0,1.0,-2.0,-2.0,-3.0,-3.0,-3.0,-4.0,-4.0,-7.0,-11.0,-11.0,-11.0,-13.0,-15.0,-19.0,-19.0,-21.0,


- The teams on the left (PHI, IND, ATL) are the most likely to have __high-scoring offenses NOT factoring in skill players__.
- They are more incentivized to pass given their strong front 5 and weak defense.
- They could still run the ball if they want, but will likely need to pass in order to keep up with their opponents.

In [33]:
df[['Team', 'OffLine_rank', 'Def_rank', 'Rushing Incentive']].drop_duplicates().sort_values('Rushing Incentive').T

Unnamed: 0,59,5,7,19,0,66,11,9,30,32,28,1,31,6,26,24,3,2,17,13,105,4,50,36,23,77,63,39,68,64,22,27,232
Team,CLE,NYJ,DET,KC,DAL,PIT,PHI,IND,BUF,LVR,CHI,SF,GB,ATL,HOU,BAL,MIN,MIA,JAC,LAR,LAC,CIN,NE,TB,NO,DEN,CAR,SEA,WAS,TEN,ARI,NYG,UNS
OffLine_rank,4.0,5.0,1.0,7.0,10.0,9.0,2.0,3.0,8.0,19.0,11.0,24.0,14.0,6.0,22.0,25.0,13.0,18.0,23.0,16.0,12.0,21.0,28.0,17.0,32.0,15.0,20.0,31.0,27.0,30.0,26.0,29.0,
Def_rank,1.0,2.0,10.0,4.0,3.0,7.0,21.0,22.0,17.0,8.0,16.0,5.0,15.0,24.0,9.0,6.0,18.0,14.0,12.0,23.0,27.0,19.0,13.0,25.0,11.0,31.0,29.0,20.0,28.0,26.0,32.0,30.0,
Rushing Strength,2.5,3.5,5.5,5.5,6.5,8.0,11.5,12.5,12.5,13.5,13.5,14.5,14.5,15.0,15.5,15.5,15.5,16.0,17.5,19.5,19.5,20.0,20.5,21.0,21.5,23.0,24.5,25.5,27.5,28.0,29.0,29.5,


- This is a more predictable trait. Teams on the left should run the ball with their strong o-lines and defenses.
- Teams on the right are very unlikely to run the ball, as they'll likely be playing from behind and have shite o-lines.
- Once again, these __rankings do not factor in skill players__.

In [9]:
# look at Exp in 1970
df[df['Year'] == 1970]['Exp'].max()

21

## Team analysis:
- High __negative values__ in the "diff" col indicate a team that should be weighted more towards offense.
- High __positive values__ in the "diff" col indicate a team that may score less points.

__ARI__:
- Worst DEF means they'll likely be playing from behind alot.
- Gaudy stats for Murray, MHJ, and McBride.

__ATL__:
- Great line. Bijan should have strong season.
- However, the weak defense may entice an explosive passing attack as well (London, Pitts).

__BAL__:
- Bad line but Henry has been good without one throughout his career. 
- Strong def should cancel out for Henry.
- Quick throws and scrambles for Lamar.

__BUF__:
- Slightly weighted towards an offensive team.
- Really like James Cook.
- Allen should be consistent. Who will explode in the passing game?

__CAR__:
- Like Arizona, the Panthers should be playing behind a lot
- __Diontae Johnson__ going as WR35 has extreme upside.
- 

__CHI__:
- 

__CIN__:
- 

__CLE__:
- 

__DAL__:
- 

__DEN__:
- 

__DET__:
- 

__GB__:
- 

__HOU__:
- 

__IND__:
- 

__JAC__:
- 

__KC__:
- 

__LAC__:
- 

__LAR__:
- 

__LVR__:
- 

__MIA__:
- 

__MIN__:
- 

__NE__:
- 

__NO__:
- 

__NYG__:
- 

__NYJ__:
- 

__PHI__:
- Highest diff. This could be a team that plays in shootouts consistently.
- Should try to get at least one in the draft.
- Smith at WR21 could be a league-winning pick

__PIT__:
- 

__SEA__:
- 

__SF__:
- 

__TB__:
- 

__TEN__:
- 

__WAS__:
- 
- 

In [53]:
def view_player(player):
    return df[df['Player'] == player]

In [66]:
view_player('Breece Hall')

Unnamed: 0,Rank,Tier,Player,Pos,Team,Bye,ADP,Floor,Consensus,FP,Ceiling,MVP,OffLine_rank,Def_rank,projected_returner
15,16,2,Breece Hall,RB,NYJ,12,1.07,236.2,296.9,295.2,366.0,63.9,5.0,2.0,


In [65]:
view_player('Bijan Robinson')

Unnamed: 0,Rank,Tier,Player,Pos,Team,Bye,ADP,Floor,Consensus,FP,Ceiling,MVP,OffLine_rank,Def_rank,projected_returner
16,17,2,Bijan Robinson,RB,ATL,12,1.05,229.6,287.4,302.1,362.5,63.9,6.0,24.0,


In [64]:
df.sort_values('ADP', ascending=True).head(30)

Unnamed: 0,Rank,Tier,Player,Pos,Team,Bye,ADP,Floor,Consensus,FP,Ceiling,MVP,OffLine_rank,Def_rank,projected_returner
9,10,1,Christian McCaffrey,RB,SF,9,1.01,254.8,348.6,330.9,407.0,71.3,24.0,5.0,
8,9,1,Tyreek Hill,WR,MIA,6,1.02,273.0,323.6,332.9,409.5,71.4,18.0,14.0,
7,8,1,CeeDee Lamb,WR,DAL,7,1.03,279.6,334.2,367.9,408.3,75.2,10.0,3.0,
13,14,2,Amon-Ra St. Brown,WR,DET,5,1.04,262.6,298.6,312.7,359.6,66.1,1.0,10.0,
16,17,2,Bijan Robinson,RB,ATL,12,1.05,229.6,287.4,302.1,362.5,63.9,6.0,24.0,
12,13,2,Ja'Marr Chase,WR,CIN,12,1.06,271.2,289.0,308.2,388.3,67.0,21.0,19.0,
15,16,2,Breece Hall,RB,NYJ,12,1.07,236.2,296.9,295.2,366.0,63.9,5.0,2.0,
11,12,2,Justin Jefferson,WR,MIN,6,1.08,267.8,301.2,311.4,392.3,67.7,13.0,18.0,
19,20,3,A.J. Brown,WR,PHI,5,1.09,238.0,281.3,267.4,323.6,58.9,2.0,21.0,
20,21,3,Jahmyr Gibbs,RB,DET,5,1.1,221.1,246.9,269.6,334.3,58.0,1.0,10.0,


## 2024 Questions
Here we will investigate specific data subsets to further solidify 2024 projections.

### Team Swap
Notable team swaps in 2024:

__QB__:
- ARod (didn't play in 2023)
- Kirk Cousins
- Wilson & Fields
- Minshew

__RB__:
- Saquon Barkley
- Derrick Henry
- Josh Jacobs
- Joe Mixon
- Aaron Jones
- Swift
- Pollard
- Moss
- Ekeler

__WR__:
- Stefon Diggs
- Keenan Allen
- Ridley
- Hollywood
- Diontae
- Curtis Samuel
- Jeudy
- Mike Williams
- Gabe Davis

No notable swaps at TE in 2024.

In [None]:
# sort by Year, player

# get all rows where 'New_Team' == 1 and the row before it
df[(df['New_Team'] == 1)]

# compare

In [None]:
# QBs
# account for injuries (ARod 2023)

In [None]:
# RBs

In [None]:
# WRs

In [None]:
# TEs