In [15]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [16]:
# Import data

passing = pd.read_csv('../modeling/pass_offense_value_modeling_v2/passing_value_model_v2_data/passing_with_value.csv')
passing = passing.drop(columns = ['Unnamed: 0'])
rb = pd.read_csv('../modeling/rush_offense_value_modeling_v2/rushing_value_model_v2_data/rushing_with_value.csv')
rb = rb.drop(columns = ['Unnamed: 0'])
pass_def = pd.read_csv('../modeling/pass_defense_value_modeling_v2/passing_defense_value_model_v2_data/pass_def_with_value.csv')
pass_def = pass_def.drop(columns = ['Unnamed: 0'])
rush_def = pd.read_csv('../modeling/rush_defense_value_modeling_v2/rush_defense_value_model_v2_data/rush_def_with_value.csv')
rush_def = rush_def.drop(columns = ['Unnamed: 0'])
qb = pd.read_csv('../modeling/qb_value_modeling_v2/qb_value_model_v2_data/qb_with_value.csv')
qb = qb.drop(columns = ['Unnamed: 0'])
qb_def = pd.read_csv('../modeling/qb_defense_value_modeling_v2/qb_defense_value_model_v2_data/qb_def_with_value.csv')
qb_def = qb_def.drop(columns = ['Unnamed: 0'])
st = pd.read_csv('../modeling/special_teams_value_modeling_v2/special_teams_value_model_v2_data/special_teams_with_value.csv')
st = st.drop(columns = ['Unnamed: 0'])

In [17]:
# Put data into one data frame that just has the game identifiers and value numbers

qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'qb_value']]
passing = passing.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'passing_value']]
# rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value', 'qb_rushing_value_pct']]
rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value']]
rb['qb_rushing_value_pct'] = 0
qb_def = qb_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb_def_value']]
pass_def = pass_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'pass_def_value']]
rush_def = rush_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rush_def_value']]
st = st.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'special_teams_value']]
df = qb.copy()
df = df.merge(passing).merge(rb).merge(qb_def).merge(pass_def).merge(rush_def).merge(st)
df = df.drop_duplicates()
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value
0,2014,1,ARI,LAC,18,17,C. Palmer,0.491486,0.472651,-1.258735,0,0.361331,0.014745,1.468394,-0.996743
1,2014,1,ATL,NO,37,34,M. Ryan,1.997776,1.515659,0.682493,0,-1.047285,-0.748551,-1.005162,-0.243872
2,2014,1,BAL,CIN,16,23,J. Flacco,-0.509634,-0.552992,0.180459,0,-0.457796,-0.934747,0.727127,-0.4421
3,2014,1,BUF,CHI,23,20,E. Manuel,-0.171272,-0.063069,1.014793,0,0.198708,0.700995,-0.187514,0.780553
4,2014,1,CAR,TB,20,14,D. Anderson,0.728679,0.690009,-0.984362,0,0.53196,1.035469,-0.392165,-0.72529


In [18]:
# Fix team names

def fix_team_names(game, is_team=True):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    if is_team:
        return team_mapping[game['team']]
    
    else:
        return team_mapping[game['opponent']]
    
df['team_full'] = df.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
df['opponent_full'] = df.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value,team_full,opponent_full
0,2014,1,ARI,LAC,18,17,C. Palmer,0.491486,0.472651,-1.258735,0,0.361331,0.014745,1.468394,-0.996743,Arizona Cardinals,Los Angeles Chargers
1,2014,1,ATL,NO,37,34,M. Ryan,1.997776,1.515659,0.682493,0,-1.047285,-0.748551,-1.005162,-0.243872,Atlanta Falcons,New Orleans Saints
2,2014,1,BAL,CIN,16,23,J. Flacco,-0.509634,-0.552992,0.180459,0,-0.457796,-0.934747,0.727127,-0.4421,Baltimore Ravens,Cincinnati Bengals
3,2014,1,BUF,CHI,23,20,E. Manuel,-0.171272,-0.063069,1.014793,0,0.198708,0.700995,-0.187514,0.780553,Buffalo Bills,Chicago Bears
4,2014,1,CAR,TB,20,14,D. Anderson,0.728679,0.690009,-0.984362,0,0.53196,1.035469,-0.392165,-0.72529,Carolina Panthers,Tampa Bay Buccaneers


In [19]:
# Save raw data frame with values

df.to_csv('../data/value_models_combined.csv')

In [21]:
# Save data frame with past 6 games rolling stats

# Notes:
# Pure average for now, not weighted (possible future adjustment?)
# Resets every new season. Previous iterations had used the final 6 games of the last season to predict week 1,
# but not sure if thats the best way to do it since things vary a lot from season to season. Can revisit later.

# Accuracy for model isn't great in general, but its worse for the beginning games of the season using the above. 
# Going to try this using the previous season's games as well. 

# To undo, add season back to the groupby and levels
offense_base = df.copy()[['season', 'week', 'team', 'qb']]
defense_base = df.copy()[['season', 'week', 'team']]

offense_rolling = df.groupby(by=['team', 'qb']).rolling(
    6, closed='left', min_periods=1).mean()[[
    'qb_value', 'passing_value', 'rushing_value', 'qb_rushing_value_pct']].reset_index(
    level=['team',  'qb'])[['qb_value', 'passing_value', 'rushing_value', 'qb_rushing_value_pct']]

defense_rolling = df[['week', 'team', 'qb_def_value', 'pass_def_value', 'rush_def_value', 'special_teams_value']].drop_duplicates().groupby(
    by=['team']).rolling(6, closed='left', min_periods=1).mean()[[
    'qb_def_value', 'pass_def_value', 'rush_def_value', 'special_teams_value']].reset_index(
    level=['team'])[['qb_def_value', 'pass_def_value', 'rush_def_value', 'special_teams_value']]

offense = offense_base.join(offense_rolling)
defense = defense_base.join(defense_rolling).dropna()

combined = offense.merge(defense, how='left')

  offense_rolling = df.groupby(by=['team', 'qb']).rolling(


In [22]:
# NOTE: Pretty sure everything in previous code block is good, but do some checks next time i look at this

combined[combined.team=='LV'].tail(15)

Unnamed: 0,season,week,team,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value
4386,2022,3,LV,D. Carr,-0.227711,-0.307331,0.205813,0.0,-0.294375,-0.295871,0.139652,0.839096
4418,2022,4,LV,D. Carr,-0.078059,-0.142402,0.262186,0.0,-0.524189,-0.514883,-0.036681,0.735322
4450,2022,5,LV,D. Carr,-0.162883,-0.282181,0.46844,0.0,-0.586023,-0.581146,0.360221,0.544774
4508,2022,7,LV,D. Carr,0.016762,-0.095306,0.54866,0.0,-0.603932,-0.550665,0.194729,0.30643
4536,2022,8,LV,D. Carr,0.136058,0.087521,0.612734,0.0,-0.49984,-0.44558,0.078853,0.49553
4564,2022,9,LV,D. Carr,0.038059,-0.011856,0.412003,0.0,-0.490017,-0.362218,-0.217381,0.403212
4591,2022,10,LV,D. Carr,0.091725,0.169041,0.306876,0.0,-0.684263,-0.684642,0.143948,0.398046
4620,2022,11,LV,D. Carr,0.007174,0.116217,0.070542,0.0,-0.655773,-0.530316,-0.149098,0.666
4649,2022,12,LV,D. Carr,0.114275,0.278586,-0.141497,0.0,-0.694216,-0.567988,-0.344332,0.817222
4679,2022,13,LV,D. Carr,0.232577,0.38525,-0.153452,0.0,-0.534342,-0.479651,-0.150268,0.483556


In [23]:
df[df.team == 'LV'].tail(15)

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value,team_full,opponent_full
4386,2022,3,LV,TEN,22,24,D. Carr,0.46462,0.405115,0.423888,0,-0.963374,-1.0208,0.235539,-0.218493,Las Vegas Raiders,Tennessee Titans
4418,2022,4,LV,DEN,32,23,D. Carr,-0.213134,-0.407256,1.346756,0,0.084533,0.180543,1.344218,-0.504775,Las Vegas Raiders,Denver Broncos
4450,2022,5,LV,KC,29,30,D. Carr,0.371816,0.536725,0.720135,0,-1.248102,-1.173216,-0.236504,1.170337,Las Vegas Raiders,Kansas City Chiefs
4508,2022,7,LV,HOU,38,20,D. Carr,0.74104,1.042196,1.143365,0,-0.072493,-0.191728,-0.324343,0.578142,Las Vegas Raiders,Houston Texans
4536,2022,8,LV,NO,0,24,D. Carr,-1.320261,-1.207371,-1.130636,0,-0.947739,-0.769853,-0.159348,0.69266,Las Vegas Raiders,New Orleans Saints
4564,2022,9,LV,JAX,20,27,D. Carr,0.506269,0.644839,-0.662253,0,-0.958401,-1.132799,0.004129,0.670404,Las Vegas Raiders,Jacksonville Jaguars
4591,2022,10,LV,IND,20,25,D. Carr,-0.042685,0.088167,-0.994115,0,-0.792435,-0.094845,-1.522742,1.389234,Las Vegas Raiders,Indianapolis Colts
4620,2022,11,LV,DEN,22,16,D. Carr,0.42947,0.56696,0.07452,0,-0.146125,-0.045489,0.172814,0.402555,Las Vegas Raiders,Denver Broncos
4649,2022,12,LV,SEA,40,34,D. Carr,1.08163,1.17671,0.648405,0,-0.28886,-0.643191,0.927884,-0.831662,Las Vegas Raiders,Seattle Seahawks
4679,2022,13,LV,LAC,27,20,D. Carr,-0.450916,-0.835251,0.698422,0,0.460542,0.674367,0.031113,0.614703,Las Vegas Raiders,Los Angeles Chargers


In [24]:
combined

Unnamed: 0,season,week,team,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value
0,2014,1,ARI,C. Palmer,,,,,,,,
1,2014,1,ATL,M. Ryan,,,,,,,,
2,2014,1,BAL,J. Flacco,,,,,,,,
3,2014,1,BUF,E. Manuel,,,,,,,,
4,2014,1,CAR,D. Anderson,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
4866,2022,21,KC,P. Mahomes,0.577461,0.538192,0.433467,0.0,0.685704,0.901990,-0.379622,-0.444345
4867,2022,21,PHI,J. Hurts,0.795804,0.137651,2.439576,0.0,0.398366,0.584781,-0.512271,0.134125
4868,2022,21,SF,J. Johnson,,,,,0.407030,0.407733,0.362456,-0.121462
4869,2022,22,KC,P. Mahomes,0.609899,0.563517,0.272761,0.0,0.591341,0.796702,-0.412985,-0.538741


In [25]:
# fix team names

combined['team_full'] = combined.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
# combined['opponent_full'] = combined.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
combined.head()

Unnamed: 0,season,week,team,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value,team_full
0,2014,1,ARI,C. Palmer,,,,,,,,,Arizona Cardinals
1,2014,1,ATL,M. Ryan,,,,,,,,,Atlanta Falcons
2,2014,1,BAL,J. Flacco,,,,,,,,,Baltimore Ravens
3,2014,1,BUF,E. Manuel,,,,,,,,,Buffalo Bills
4,2014,1,CAR,D. Anderson,,,,,,,,,Carolina Panthers


In [26]:
# Save aggregated data frame with values

combined.to_csv('../data/value_models_combined_6_game_rolling.csv')

In [27]:
# Sandbox for testing

In [28]:
combined[(combined.team == 'BAL') & (combined.season==2022)]

Unnamed: 0,season,week,team,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value,team_full
4306,2022,1,BAL,L. Jackson,-0.418474,-0.687724,0.864288,0.0,-0.899491,-0.941661,0.124414,0.776423,Baltimore Ravens
4338,2022,2,BAL,L. Jackson,-0.29927,-0.568898,0.591427,0.0,-0.635148,-0.620926,0.160961,0.916828,Baltimore Ravens
4370,2022,3,BAL,L. Jackson,0.025442,-0.14563,0.371509,0.0,-1.044445,-1.05637,-0.035974,0.808228,Baltimore Ravens
4402,2022,4,BAL,L. Jackson,0.278732,-0.096402,0.368862,0.0,-0.769345,-0.703351,-0.137023,0.831129,Baltimore Ravens
4434,2022,5,BAL,L. Jackson,0.375215,-0.005689,0.682965,0.0,-0.392228,-0.243045,-0.057349,0.833132,Baltimore Ravens
4466,2022,6,BAL,L. Jackson,0.403083,0.076619,0.737802,0.0,-0.204396,-0.046331,-0.184955,0.85418,Baltimore Ravens
4494,2022,7,BAL,L. Jackson,0.360778,0.011227,0.974305,0.0,-0.31626,-0.156922,-0.334284,0.689958,Baltimore Ravens
4522,2022,8,BAL,L. Jackson,0.267972,-0.121397,1.013009,0.0,-0.472353,-0.342904,-0.583146,0.717186,Baltimore Ravens
4551,2022,9,BAL,L. Jackson,0.01685,-0.38425,1.409225,0.0,-0.117026,-0.018103,-0.417191,0.735114,Baltimore Ravens
4605,2022,11,BAL,L. Jackson,-0.088105,-0.336426,1.243871,0.0,-0.04193,-0.023917,-0.150212,0.695078,Baltimore Ravens


In [29]:
combined[(combined.team == 'ATL') & (combined.season==2014)]

Unnamed: 0,season,week,team,qb,qb_value,passing_value,rushing_value,qb_rushing_value_pct,qb_def_value,pass_def_value,rush_def_value,special_teams_value,team_full
1,2014,1,ATL,M. Ryan,,,,,,,,,Atlanta Falcons
33,2014,2,ATL,M. Ryan,1.997776,1.515659,0.682493,0.0,-1.047285,-0.748551,-1.005162,-0.243872,Atlanta Falcons
65,2014,3,ATL,M. Ryan,0.27302,-0.013581,0.805317,0.0,-1.031131,-0.921694,-0.693784,0.801254,Atlanta Falcons
96,2014,4,ATL,M. Ryan,0.739454,0.589468,-0.188257,0.0,-0.343842,-0.073727,0.232607,0.757188,Atlanta Falcons
123,2014,5,ATL,M. Ryan,0.662997,0.597149,0.12374,0.0,-0.551587,-0.356761,-0.219113,0.479095,Atlanta Falcons
153,2014,6,ATL,M. Ryan,0.522968,0.496475,0.026127,0.0,-0.533754,-0.402483,-0.240881,0.409503,Atlanta Falcons
183,2014,7,ATL,M. Ryan,0.288921,0.286688,0.00384,0.0,-0.649819,-0.613592,-0.232749,0.641817,Atlanta Falcons
213,2014,8,ATL,M. Ryan,-0.27335,-0.177721,-0.164106,0.0,-0.598957,-0.628825,0.085043,0.61268,Atlanta Falcons
269,2014,10,ATL,M. Ryan,0.048197,0.184738,-0.37109,0.0,-0.547431,-0.595971,0.212574,0.267767,Atlanta Falcons
295,2014,11,ATL,M. Ryan,-0.116204,0.021545,-0.004592,0.0,-0.807435,-0.945603,-0.090732,0.221073,Atlanta Falcons
