# Rookie Data

In [40]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from fantasy_football import Fantasy


# Import fantasy class which will be used for building datasets
football = Fantasy()

In [42]:
rookie = football.getYearlyRookieData(2020)

In [43]:
# Get points per game for labels


QB_rookie_data = rookie[rookie["Pos"] == "QB"].reset_index(drop=True)
RB_rookie_data = rookie[rookie["Pos"] == "RB"].reset_index(drop=True)
WR_rookie_data = rookie[rookie["Pos"] == "WR"].reset_index(drop=True)
TE_rookie_data = rookie[rookie["Pos"] == "TE"].reset_index(drop=True)


QB_rookie_data.head()

Unnamed: 0,Pick,Team,Player,Pos,passing_Att,passing_Cmp,passing_Cmp %,passing_Yds/Att,passing_Pass Yds,passing_TD,...,downs_3rd Att,downs_3rd Md,downs_4th Att,downs_4th Md,downs_Rec 1st,downs_Rec 1st%,downs_Rush 1st,downs_Rush 1st%,downs_Scrm Plys,AVG
0,1,49ers,Alex Smith,QB,561,325,57.9,6.2,3455,16,...,219,88,12,6,172,53.1,93,21.3,1004,3.0
1,24,Packers,Aaron Rodgers,QB,598,382,63.9,7.6,4550,36,...,241,84,12,5,152,50.8,86,18.1,1047,0.4
2,148,Packers,Ingle Martin,QB,626,383,61.2,6.3,3964,20,...,239,91,16,5,224,53.5,58,16.1,1075,-0.5
3,193,Bengals,Reggie McNeal,QB,538,362,67.3,7.3,3935,32,...,220,72,12,3,182,54.0,73,20.0,1027,0.4
4,3,Falcons,Matt Ryan,QB,555,336,60.5,6.4,3573,18,...,219,91,17,6,167,55.3,119,25.4,1041,12.4


In [44]:
from sklearn.model_selection import train_test_split

WR_features, WR_labels = football.prepRookieData(WR_rookie_data, standardize= True)

WR_selected, WR_optimal_cols = football.getBestFeatures(WR_features, WR_labels, numFeatures=5)

WR_X_train, WR_X_test, WR_y_train, WR_y_test = train_test_split(WR_selected, WR_labels, test_size=0.2, random_state=42)

# Get optimal model
WR_model = football.get_model(WR_X_train, WR_y_train)
WR_model

In [45]:
WR_pred = WR_model.predict(WR_X_test)

In [46]:
from sklearn.metrics import mean_squared_error

# Reset index of y_test
WR_y_test = WR_y_test.reset_index(drop=True)

WR_mse = mean_squared_error(WR_y_test, WR_pred)

# Make results df to see visual per player
WR_results = pd.DataFrame()

WR_results['Y_pred'] = WR_pred
WR_results['Y_true'] = WR_y_test
WR_results['difference'] = WR_results['Y_true'] - WR_results['Y_pred']

In [47]:
TE_features, TE_labels = football.prepRookieData(TE_rookie_data, standardize= True)

TE_selected, TE_optimal_cols = football.getBestFeatures(TE_features, TE_labels, numFeatures=5)

TE_X_train, TE_X_test, TE_y_train, TE_y_test = train_test_split(TE_selected, TE_labels, test_size=0.2, random_state=42)

# Get optimal model
TE_model = football.get_model(TE_X_train, TE_y_train)
TE_model

In [48]:
TE_pred = TE_model.predict(TE_X_test)

In [49]:
# Reset index of y_test
TE_y_test = TE_y_test.reset_index(drop=True)

TE_mse = mean_squared_error(TE_y_test, TE_pred)

# Make results df to see visual per player
TE_results = pd.DataFrame()

TE_results['Y_pred'] = TE_pred
TE_results['Y_true'] = TE_y_test
TE_results['difference'] = TE_results['Y_true'] - TE_results['Y_pred']

In [50]:
QB_features, QB_labels = football.prepRookieData(QB_rookie_data, standardize= True)

QB_selected, QB_optimal_cols = football.getBestFeatures(QB_features, QB_labels, numFeatures=5)

QB_X_train, QB_X_test, QB_y_train, QB_y_test = train_test_split(QB_selected, QB_labels, test_size=0.2, random_state=42)

# Get optimal model
QB_model = football.get_model(QB_X_train, QB_y_train)
QB_model

In [51]:
QB_pred = QB_model.predict(QB_X_test)

In [52]:
# Reset index of y_test
QB_y_test = QB_y_test.reset_index(drop=True)

QB_mse = mean_squared_error(QB_y_test, QB_pred)

# Make results df to see visual per player
QB_results = pd.DataFrame()

QB_results['Y_pred'] = QB_pred
QB_results['Y_true'] = QB_y_test
QB_results['difference'] = QB_results['Y_true'] - QB_results['Y_pred']

In [53]:
RB_features, RB_labels = football.prepRookieData(RB_rookie_data, standardize= True)

RB_selected, RB_optimal_cols = football.getBestFeatures(RB_features, RB_labels, numFeatures=5)

RB_X_train, RB_X_test, RB_y_train, RB_y_test = train_test_split(RB_selected, RB_labels, test_size=0.2, random_state=42)

# Get optimal model
RB_model = football.get_model(RB_X_train, RB_y_train)
RB_model

In [54]:
RB_pred = RB_model.predict(RB_X_test)

In [55]:
# Reset index of y_test
RB_y_test = RB_y_test.reset_index(drop=True)

RB_mse = mean_squared_error(RB_y_test, RB_pred)

# Make results df to see visual per player
RB_results = pd.DataFrame()

RB_results['Y_pred'] = RB_pred
RB_results['Y_true'] = RB_y_test
RB_results['difference'] = RB_results['Y_true'] - RB_results['Y_pred']

In [56]:
test_year = football.getYearlyRookieData(2022, last_year= True)

QB_test_year = test_year[test_year["Pos"] == "QB"].reset_index(drop=True)
RB_test_year = test_year[test_year["Pos"] == "RB"].reset_index(drop=True)
WR_test_year = test_year[test_year["Pos"] == "WR"].reset_index(drop=True)
TE_test_year = test_year[test_year["Pos"] == "TE"].reset_index(drop=True)

WR_test_features, WR_test_labels = football.prepRookieData(WR_test_year, standardize= True)
QB_test_features, QB_test_labels = football.prepRookieData(QB_test_year, standardize= True)
RB_test_features, RB_test_labels = football.prepRookieData(RB_test_year, standardize= True)
TE_test_features, TE_test_labels = football.prepRookieData(TE_test_year, standardize= True)



In [57]:
WR_y_pred = WR_model.predict(WR_test_features[WR_optimal_cols])

WR_pred_df = pd.DataFrame()

WR_pred_df['Player'] = WR_test_year['Player']
WR_pred_df['2023_pred'] = WR_y_pred
WR_pred_df['2022_ppg'] = WR_test_labels

WR_pred_df = WR_pred_df.sort_values(by='2023_pred', ascending=False).reset_index(drop=True)
WR_pred_df

Unnamed: 0,Player,2023_pred,2022_ppg
0,Garrett Wilson,5.246689,7.8
1,Jameson Williams,4.951097,2.4
2,Christian Watson,4.474271,8.8
3,Drake London,4.419105,6.3
4,Skyy Moore,4.354251,1.3
5,Jahan Dotson,4.333893,8.0
6,Tyquan Thornton,4.141826,3.7
7,Alec Pierce,4.064401,4.5
8,George Pickens,3.849356,6.7
9,Chris Olave,3.663266,8.4


In [58]:
TE_y_pred = TE_model.predict(TE_test_features[TE_optimal_cols])

TE_pred_df = pd.DataFrame()

TE_pred_df['Player'] = TE_test_year['Player']
TE_pred_df['2023_pred'] = TE_y_pred
TE_pred_df['2022_ppg'] = TE_test_labels

TE_pred_df = TE_pred_df.sort_values(by='2023_pred', ascending=False).reset_index(drop=True)
TE_pred_df

Unnamed: 0,Player,2023_pred,2022_ppg
0,Trey McBride,3.412966,2.7
1,Charlie Kolar,3.292757,4.9
2,Greg Dulcich,3.248373,5.3
3,Isaiah Likely,3.108568,4.0
4,Daniel Bellinger,2.943353,4.1
5,Jelani Woods,2.852995,4.1
6,Jeremy Ruckert,2.737443,0.4
7,Cole Turner,2.669895,0.3
8,James Mitchell,2.350403,2.2
9,Cade Otton,2.11687,3.2


In [59]:
QB_y_pred = QB_model.predict(QB_test_features[QB_optimal_cols])

QB_pred_df = pd.DataFrame()

QB_pred_df['Player'] = QB_test_year['Player']
QB_pred_df['2023_pred'] = QB_y_pred
QB_pred_df['2022_ppg'] = QB_test_labels

QB_pred_df = QB_pred_df.sort_values(by='2023_pred', ascending=False).reset_index(drop=True)
QB_pred_df

Unnamed: 0,Player,2023_pred,2022_ppg
0,Matt Corral,16.676649,0.0
1,Malik Willis,12.064304,3.2
2,Desmond Ridder,11.881455,9.7
3,Kenny Pickett,11.389441,12.2
4,Sam Howell,10.433469,19.3
5,Skylar Thompson,7.145969,2.9
6,Bailey Zappe,6.594409,10.4
7,Brock Purdy,2.314487,12.3


In [60]:
RB_y_pred = RB_model.predict(RB_test_features[RB_optimal_cols])

RB_pred_df = pd.DataFrame()

RB_pred_df['Player'] = RB_test_year['Player']
RB_pred_df['2023_pred'] = RB_y_pred
RB_pred_df['2022_ppg'] = RB_test_labels

RB_pred_df = RB_pred_df.sort_values(by='2023_pred', ascending=False).reset_index(drop=True)
RB_pred_df

Unnamed: 0,Player,2023_pred,2022_ppg
0,Rachaad White,10.94661,5.2
1,James Cook,9.987317,5.1
2,Kenneth Walker III,8.946625,11.7
3,Isaiah Spiller,8.633931,1.1
4,Dameon Pierce,8.276394,10.5
5,Snoop Conner,7.786744,2.0
6,Brian Robinson Jr.,7.365356,8.6
7,Breece Hall,7.287464,13.7
8,Hassan Haskins,6.95251,1.1
9,Ty Chandler,6.934603,1.0
