In [94]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [95]:
# Import data

qb = pd.read_csv('../modeling/qb_value_modeling/qb_value_data/qb_with_values.csv')
qb = qb.drop(columns = ['Unnamed: 0'])
rb = pd.read_csv('../modeling/rush_offense_value_modeling/rush_offense_value_data/rb_with_values.csv')
rb = rb.drop(columns = ['Unnamed: 0'])
pass_def = pd.read_csv('../modeling/pass_defense_value_modeling/pass_defense_value_data/pass_defense_with_values.csv')
pass_def = pass_def.drop(columns = ['Unnamed: 0'])
rush_def = pd.read_csv('../modeling/rush_defense_value_modeling/rush_defense_value_data/rush_defense_with_values.csv')
rush_def = rush_def.drop(columns = ['Unnamed: 0'])

In [96]:
# Put data into one data frame that just has the game identifiers and value numbers

qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'passing_value']]
rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value', 'qb_rushing_value_pct']]
pass_def = pass_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'pass_def_value']]
rush_def = rush_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rush_def_value']]
df = qb.copy()
df = df.merge(rb).merge(pass_def).merge(rush_def)
df = df.drop_duplicates()
df

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
0,2014,1,ARI,LAC,18,17,C. Palmer,0.604567,0.194004,0.270270,0.493942,0.855757
1,2014,1,ATL,NO,37,34,M. Ryan,0.846419,0.721665,0.120968,0.254835,0.216091
2,2014,1,BAL,CIN,16,23,J. Flacco,0.310023,0.578230,0.073684,0.191127,0.693342
3,2014,1,BUF,CHI,23,20,E. Manuel,0.422696,0.763580,0.116162,0.696008,0.408553
4,2014,1,CAR,TB,20,14,D. Anderson,0.712989,0.312710,0.087719,0.843228,0.321939
...,...,...,...,...,...,...,...,...,...,...,...,...
4740,2022,10,TB,SEA,21,16,T. Brady,0.909411,0.611409,0.000000,0.372324,0.741143
4741,2022,10,TEN,DEN,17,10,R. Tannehill,0.570619,0.098195,0.187500,0.708770,0.570952
4742,2022,10,WAS,PHI,32,21,T. Heinicke,0.534081,0.675205,0.089172,0.897627,0.159860
4743,2022,11,GB,TEN,17,27,A. Rodgers,0.487381,0.269715,0.000000,0.043549,0.537460


In [97]:
# Save raw data frame with values

df.to_csv('../data/value_models_combined.csv')

In [123]:
# Save data frame with past 6 games rolling stats

# Notes:
# Pure average for now, not weighted (possible future adjustment?)
# Resets every new season. Previous iterations had used the final 6 games of the last season to predict week 1,
# but not sure if thats the best way to do it since things vary a lot from season to season. Can revisit later.

base = df.copy()[['season', 'team', 'week', 'qb']]

qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().reset_index().groupby(
    by=['season', 'team', 'qb']).rolling(
    6, closed='left', min_periods=1).mean().reset_index(
    level=['qb', 'season', 'team'])[['qb', 'season', 'team', 'passing_value']]

other_rolling = df.groupby(by=['season', 'team', 'week']).mean().rolling(
    6, closed='left', min_periods=1).mean().reset_index(
    level=['season', 'team', 'week'])[['season', 'team', 'week', 'rushing_value', 
                                       'qb_rushing_value_pct', 'pass_def_value', 'rush_def_value']]

base = qb_rolling.merge(other_rolling)
base = base[['season', 'team', 'week', 'qb', 'passing_value', 'rushing_value', 
                                       'qb_rushing_value_pct', 'pass_def_value', 'rush_def_value']]

  qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().rolling(
  other_rolling = df.groupby(by=['season', 'team', 'week']).mean().rolling(


In [124]:
base

Unnamed: 0,season,team,week,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
0,2014,ARI,1,C. Palmer,,,,,
1,2014,ARI,2,D. Stanton,0.604567,0.194004,0.270270,0.493942,0.855757
2,2014,ARI,3,D. Stanton,0.428231,0.391087,0.135135,0.477170,0.820346
3,2014,ARI,5,D. Stanton,0.535634,0.452549,0.170676,0.386578,0.717531
4,2014,ARI,6,C. Palmer,0.475280,0.371278,0.128007,0.331807,0.656903
...,...,...,...,...,...,...,...,...,...
4740,2022,WAS,6,C. Wentz,0.435165,0.445968,0.176340,0.425100,0.560599
4741,2022,WAS,7,T. Heinicke,0.292100,0.492684,0.178924,0.511041,0.472391
4742,2022,WAS,8,T. Heinicke,0.243559,0.523155,0.156401,0.524770,0.548866
4743,2022,WAS,9,T. Heinicke,0.244251,0.460470,0.163158,0.521156,0.640387


In [125]:
current = base[base.season==2022]
current[current.team == 'LV']

Unnamed: 0,season,team,week,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
4608,2022,LV,1,D. Carr,0.417879,0.540968,0.174493,0.566478,0.270568
4609,2022,LV,2,D. Carr,0.315332,0.538961,0.174493,0.448187,0.396405
4610,2022,LV,3,D. Carr,0.269735,0.462231,0.182638,0.522957,0.371302
4611,2022,LV,4,D. Carr,0.338547,0.454356,0.191577,0.40447,0.376737
4612,2022,LV,5,D. Carr,0.354941,0.570247,0.156234,0.449292,0.478653
4613,2022,LV,7,D. Carr,0.377072,0.624449,0.154004,0.327968,0.522275
4614,2022,LV,8,D. Carr,0.488709,0.671456,0.083491,0.345824,0.513135
4615,2022,LV,8,J. Stidham,0.458406,0.671456,0.083491,0.345824,0.513135
4616,2022,LV,9,D. Carr,0.44982,0.593296,0.083491,0.359617,0.444101
4617,2022,LV,10,D. Carr,0.470136,0.552155,0.064973,0.255357,0.50602


In [126]:
df[df.team=='LV'].tail(11)

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
4418,2021,19,LV,CIN,19,26,D. Carr,0.412575,0.832673,0.192308,0.186235,0.594935
4455,2022,1,LV,LAC,19,24,D. Carr,0.242689,0.51012,0.0,0.095614,0.827463
4487,2022,2,LV,ARI,23,29,D. Carr,0.293302,0.499345,0.111111,0.717448,0.075474
4519,2022,3,LV,TEN,22,24,D. Carr,0.553403,0.671853,0.175258,0.090085,0.538289
4551,2022,4,LV,DEN,32,23,D. Carr,0.289745,0.793759,0.195349,0.577519,0.882914
4585,2022,5,LV,KC,29,30,D. Carr,0.681519,0.731097,0.019231,0.169965,0.391064
4645,2022,7,LV,HOU,38,20,D. Carr,0.871595,0.822565,0.0,0.424311,0.363604
4673,2022,8,LV,NO,0,24,D. Carr,0.060872,0.04116,0.0,0.178376,0.413264
4674,2022,8,LV,NO,0,24,J. Stidham,0.241789,0.04116,0.0,0.178376,0.413264
4704,2022,9,LV,JAX,20,27,D. Carr,0.675298,0.252498,0.0,0.091885,0.446986


In [131]:
qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().reset_index()
qb_rolling[qb_rolling.team == 'LV'].tail(10)

  qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().reset_index()


Unnamed: 0,season,team,week,qb,score,opponent_score,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
4608,2022,LV,1,D. Carr,19.0,24.0,0.242689,0.51012,0.0,0.095614,0.827463
4609,2022,LV,2,D. Carr,23.0,29.0,0.293302,0.499345,0.111111,0.717448,0.075474
4610,2022,LV,3,D. Carr,22.0,24.0,0.553403,0.671853,0.175258,0.090085,0.538289
4611,2022,LV,4,D. Carr,32.0,23.0,0.289745,0.793759,0.195349,0.577519,0.882914
4612,2022,LV,5,D. Carr,29.0,30.0,0.681519,0.731097,0.019231,0.169965,0.391064
4613,2022,LV,7,D. Carr,38.0,20.0,0.871595,0.822565,0.0,0.424311,0.363604
4614,2022,LV,8,D. Carr,0.0,24.0,0.060872,0.04116,0.0,0.178376,0.413264
4615,2022,LV,8,J. Stidham,0.0,24.0,0.241789,0.04116,0.0,0.178376,0.413264
4616,2022,LV,9,D. Carr,20.0,27.0,0.675298,0.252498,0.0,0.091885,0.446986
4617,2022,LV,10,D. Carr,20.0,25.0,0.465886,0.227262,0.0,0.43485,0.13401


In [148]:
df[['season', 'team', 'week', 'score', 'opponent_score', 'rushing_value', 'qb_rushing_value_pct', 
   'pass_def_value', 'rush_def_value']].drop_duplicates().groupby(by=['season', 'team']).rolling(
    6, closed='left', min_periods=1).mean().reset_index(
    level=['season', 'team'])

Unnamed: 0,season,team,week,score,opponent_score,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
0,2014,ARI,,,,,,,
33,2014,ARI,1.00,18.000000,17.000000,0.194004,0.270270,0.493942,0.855757
65,2014,ARI,1.50,21.500000,15.500000,0.391087,0.135135,0.477170,0.820346
126,2014,ARI,2.00,22.000000,15.000000,0.452549,0.170676,0.386578,0.717531
158,2014,ARI,2.75,21.500000,21.500000,0.371278,0.128007,0.331807,0.656903
...,...,...,...,...,...,...,...,...,...
4628,2022,WAS,3.00,18.000000,25.600000,0.445198,0.211608,0.423382,0.528998
4656,2022,WAS,3.50,17.000000,22.500000,0.492684,0.178924,0.511041,0.472391
4687,2022,WAS,4.50,16.166667,22.333333,0.523155,0.156401,0.524770,0.548866
4714,2022,WAS,5.50,14.500000,19.000000,0.460470,0.163158,0.521156,0.640387


In [151]:
qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().reset_index().groupby(
    by=['season', 'team', 'qb']).rolling(
    6, closed='left', min_periods=1).mean().reset_index(
    level=['qb', 'season', 'team'])[['qb', 'season', 'team', 'passing_value']]
qb_rolling

  qb_rolling = df.groupby(by=['season', 'team', 'week', 'qb']).mean().reset_index().groupby(


Unnamed: 0,qb,season,team,passing_value
0,C. Palmer,2014,ARI,
4,C. Palmer,2014,ARI,0.604567
5,C. Palmer,2014,ARI,0.589692
6,C. Palmer,2014,ARI,0.641113
7,C. Palmer,2014,ARI,0.637922
...,...,...,...,...
4740,C. Wentz,2022,WAS,0.333638
4741,T. Heinicke,2022,WAS,
4742,T. Heinicke,2022,WAS,0.361894
4743,T. Heinicke,2022,WAS,0.416002


In [153]:

base = df.copy()[['season', 'team', 'week', 'qb']]
base

Unnamed: 0,season,team,week,qb
0,2014,ARI,1,C. Palmer
1,2014,ATL,1,M. Ryan
2,2014,BAL,1,J. Flacco
3,2014,BUF,1,E. Manuel
4,2014,CAR,1,D. Anderson
...,...,...,...,...
4740,2022,TB,10,T. Brady
4741,2022,TEN,10,R. Tannehill
4742,2022,WAS,10,T. Heinicke
4743,2022,GB,11,A. Rodgers
