In [1]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [2]:
# Import data

# passing = pd.read_csv('../modeling/pass_offense_value_modeling_adjusted/passing_value_model_adjusted_data/passing_with_value.csv')
# passing = passing.drop(columns = ['Unnamed: 0'])
rb = pd.read_csv('../modeling/rush_offense_value_modeling_adjusted/rush_offense_value_modeling_adjusted_data/rushing_offense_values_with_adjustment.csv')
rb = rb.drop(columns = ['Unnamed: 0'])
# pass_def = pd.read_csv('../modeling/pass_defense_value_modeling_adjusted/passing_defense_value_model_v2_data/pass_def_with_value.csv')
# pass_def = pass_def.drop(columns = ['Unnamed: 0'])
rush_def = pd.read_csv('../modeling/rush_defense_value_modeling_adjusted/rush_defense_value_modeling_adjusted_data/rushing_defense_values_with_adjustment.csv')
rush_def = rush_def.drop(columns = ['Unnamed: 0'])
qb = pd.read_csv('../modeling/qb_value_modeling_adjusted/qb_value_modeling_adjusted_data/qb_values_with_adjustment.csv')
qb = qb.drop(columns = ['Unnamed: 0'])
qb_def = pd.read_csv('../modeling/qb_defense_value_modeling_adjusted/qb_defense_value_modeling_adjusted_data/qb_defense_values_with_adjustment.csv')
qb_def = qb_def.drop(columns = ['Unnamed: 0'])
st = pd.read_csv('../modeling/special_teams_value_modeling_v2/special_teams_value_model_v2_data/special_teams_with_value.csv')
st = st.drop(columns = ['Unnamed: 0'])

In [3]:
# Put data into one data frame that just has the game identifiers and value numbers

qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'qb_adjusted_value']]
# passing = passing.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'passing_value']]
# rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value', 'qb_rushing_value_pct']]
rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_adjusted_value']]
rb['qb_rushing_value_pct'] = 0
qb_def = qb_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb_def_adjusted_value']]
# pass_def = pass_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'pass_def_value']]
rush_def = rush_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rush_def_adjusted_value']]
st = st.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'special_teams_value']]
df = qb.copy()
# df = df.merge(passing).merge(rb).merge(qb_def).merge(pass_def).merge(rush_def).merge(st)
df = df.merge(rb).merge(qb_def).merge(rush_def).merge(st)
df = df.drop_duplicates()
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value
0,2014,1,ARI,LAC,18,17,C. Palmer,,,0,,,-0.996743
1,2014,1,ATL,NO,37,34,M. Ryan,,,0,,,-0.243872
2,2014,1,BAL,CIN,16,23,J. Flacco,,,0,,,-0.4421
3,2014,1,BUF,CHI,23,20,E. Manuel,,,0,,,0.780553
4,2014,1,CAR,TB,20,14,D. Anderson,,,0,,,-0.72529


In [4]:
# Fix team names

def fix_team_names(game, is_team=True):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    if is_team:
        return team_mapping[game['team']]
    
    else:
        return team_mapping[game['opponent']]
    
df['team_full'] = df.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
df['opponent_full'] = df.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full,opponent_full
0,2014,1,ARI,LAC,18,17,C. Palmer,,,0,,,-0.996743,Arizona Cardinals,Los Angeles Chargers
1,2014,1,ATL,NO,37,34,M. Ryan,,,0,,,-0.243872,Atlanta Falcons,New Orleans Saints
2,2014,1,BAL,CIN,16,23,J. Flacco,,,0,,,-0.4421,Baltimore Ravens,Cincinnati Bengals
3,2014,1,BUF,CHI,23,20,E. Manuel,,,0,,,0.780553,Buffalo Bills,Chicago Bears
4,2014,1,CAR,TB,20,14,D. Anderson,,,0,,,-0.72529,Carolina Panthers,Tampa Bay Buccaneers


In [5]:
qb_def[qb_def.week==2]

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb_def_adjusted_value
32,2014,2,ARI,NYG,25,14,-0.737241
33,2014,2,ATL,CIN,10,24,-0.879286
34,2014,2,BAL,PIT,26,6,0.629275
35,2014,2,BUF,MIA,29,10,1.100186
36,2014,2,CAR,DET,24,7,0.730025
...,...,...,...,...,...,...,...
4355,2022,2,SEA,SF,7,27,-0.379889
4356,2022,2,SF,SEA,27,7,1.508990
4357,2022,2,TB,NO,20,10,1.823388
4358,2022,2,TEN,BUF,7,41,-0.787880


In [6]:
# Save raw data frame with values

df.to_csv('../data/adjusted_value_models_combined.csv')

In [7]:
# Save data frame with past 6 games rolling stats

# Notes:
# Pure average for now, not weighted (possible future adjustment?)
# Resets every new season. Previous iterations had used the final 6 games of the last season to predict week 1,
# but not sure if thats the best way to do it since things vary a lot from season to season. Can revisit later.

# Accuracy for model isn't great in general, but its worse for the beginning games of the season using the above. 
# Going to try this using the previous season's games as well. 

offense_base = df.copy()[['season', 'week', 'team', 'qb']]
defense_base = df.copy()[['season', 'week', 'team']]

# To undo, add season back to the groupby and levels
offense_rolling = df.groupby(by=['team', 'qb']).rolling(
    6, closed='left', min_periods=1).mean()[[
    'qb_adjusted_value', 'rushing_adjusted_value', 'qb_rushing_value_pct']].reset_index(
    level=['team',  'qb'])[['qb_adjusted_value', 'rushing_adjusted_value', 'qb_rushing_value_pct']]

defense_rolling = df[['season', 'week', 'team', 'qb_def_adjusted_value', 'rush_def_adjusted_value', 'special_teams_value']].drop_duplicates().groupby(
    by=['team']).rolling(6, closed='left', min_periods=1).mean()[[
    'qb_def_adjusted_value', 'rush_def_adjusted_value', 'special_teams_value']].reset_index(
    level=['team'])[['qb_def_adjusted_value', 'rush_def_adjusted_value', 'special_teams_value']]

offense = offense_base.join(offense_rolling)
defense = defense_base.join(defense_rolling).dropna()

combined = offense.merge(defense, how='left')

  offense_rolling = df.groupby(by=['team', 'qb']).rolling(


In [8]:
# NOTE: Pretty sure everything in previous code block is good, but do some checks next time i look at this

combined[(combined.team=='ATL') & (combined.season==2014)].head(16)

Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value
1,2014,1,ATL,M. Ryan,,,,,,
33,2014,2,ATL,M. Ryan,,,0.0,,,
65,2014,3,ATL,M. Ryan,-1.307454,0.874004,0.0,-0.879286,-0.600544,0.801254
96,2014,4,ATL,M. Ryan,0.066511,-0.506235,0.0,-0.879286,-0.600544,0.757188
123,2014,5,ATL,M. Ryan,0.206473,0.076393,0.0,-0.984527,-1.105372,0.479095
153,2014,6,ATL,M. Ryan,0.154375,-0.02592,0.0,-0.790908,-0.897583,0.409503
183,2014,7,ATL,M. Ryan,-0.050746,-0.054898,0.0,-0.906684,-0.724975,0.641817
213,2014,8,ATL,M. Ryan,-0.281893,-0.061745,0.0,-0.844054,-0.390739,0.61268
269,2014,10,ATL,M. Ryan,0.043859,-0.211775,0.0,-0.82795,-0.213371,0.267767
295,2014,11,ATL,M. Ryan,-0.110725,0.130727,0.0,-0.784788,-0.112773,0.221073


In [9]:
df[(df.team=='ATL') & (df.season==2014)].head(15)

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full,opponent_full
1,2014,1,ATL,NO,37,34,M. Ryan,,,0,,,-0.243872,Atlanta Falcons,New Orleans Saints
33,2014,2,ATL,CIN,10,24,M. Ryan,-1.307454,0.874004,0,-0.879286,-0.600544,1.846381,Atlanta Falcons,Cincinnati Bengals
65,2014,3,ATL,TB,56,14,M. Ryan,1.440476,-1.886474,0,,,0.669056,Atlanta Falcons,Tampa Bay Buccaneers
96,2014,4,ATL,MIN,28,41,M. Ryan,0.486396,1.241649,0,-1.089767,-1.610199,-0.355185,Atlanta Falcons,Minnesota Vikings
123,2014,5,ATL,NYG,20,30,M. Ryan,-0.001918,-0.332857,0,-0.403669,-0.482005,0.131137,Atlanta Falcons,New York Giants
153,2014,6,ATL,CHI,13,27,M. Ryan,-0.871229,-0.170812,0,-1.254014,-0.207154,1.803385,Atlanta Falcons,Chicago Bears
183,2014,7,ATL,BAL,7,29,M. Ryan,-1.437632,-0.095979,0,-0.593535,0.946209,-0.418695,Atlanta Falcons,Baltimore Ravens
213,2014,8,ATL,DET,21,22,M. Ryan,0.647058,-0.02618,0,-0.798767,0.286295,-0.223097,Atlanta Falcons,Detroit Lions
269,2014,10,ATL,TB,27,17,M. Ryan,0.512972,0.168542,0,-0.568976,0.390219,0.388894,Atlanta Falcons,Tampa Bay Buccaneers
295,2014,11,ATL,CAR,19,17,M. Ryan,0.308796,-1.332182,0,0.280896,-0.236249,1.286911,Atlanta Falcons,Carolina Panthers


In [10]:
df[df.team == 'LV'].tail(15)

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full,opponent_full
4378,2022,3,LV,TEN,22,24,D. Carr,0.535601,0.337344,0,-0.931799,0.100311,-0.218493,Las Vegas Raiders,Tennessee Titans
4410,2022,4,LV,DEN,32,23,D. Carr,-0.223133,1.363907,0,0.045585,1.095087,-0.504775,Las Vegas Raiders,Denver Broncos
4442,2022,5,LV,KC,29,30,D. Carr,0.234723,0.626897,0,-0.93817,-0.009525,1.170337,Las Vegas Raiders,Kansas City Chiefs
4500,2022,7,LV,HOU,38,20,D. Carr,0.692904,1.110959,0,-0.262806,-0.268463,0.578142,Las Vegas Raiders,Houston Texans
4527,2022,8,LV,NO,0,24,D. Carr,-1.348601,-1.043091,0,-0.932614,0.08732,0.69266,Las Vegas Raiders,New Orleans Saints
4555,2022,9,LV,JAX,20,27,D. Carr,0.499375,-0.81846,0,-1.029325,0.176854,0.670404,Las Vegas Raiders,Jacksonville Jaguars
4582,2022,10,LV,IND,20,25,D. Carr,0.070181,-0.955259,0,-0.963758,-1.718785,1.389234,Las Vegas Raiders,Indianapolis Colts
4611,2022,11,LV,DEN,22,16,D. Carr,0.697495,-0.041618,0,-0.351074,0.038151,0.402555,Las Vegas Raiders,Denver Broncos
4640,2022,12,LV,SEA,40,34,D. Carr,1.148768,0.680231,0,-0.299975,0.944338,-0.831662,Las Vegas Raiders,Seattle Seahawks
4670,2022,13,LV,LAC,27,20,D. Carr,-0.549901,0.565149,0,0.360074,0.068165,0.614703,Las Vegas Raiders,Los Angeles Chargers


In [11]:
combined

Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value
0,2014,1,ARI,C. Palmer,,,,,,
1,2014,1,ATL,M. Ryan,,,,,,
2,2014,1,BAL,J. Flacco,,,,,,
3,2014,1,BUF,E. Manuel,,,,,,
4,2014,1,CAR,D. Anderson,,,,,,
...,...,...,...,...,...,...,...,...,...,...
4857,2022,21,KC,P. Mahomes,0.626274,0.390027,0.0,0.641969,-0.456237,-0.444345
4858,2022,21,PHI,J. Hurts,0.756473,2.287960,0.0,0.513945,-0.159588,0.134125
4859,2022,21,SF,J. Johnson,,,,0.559229,0.398191,-0.121462
4860,2022,22,KC,P. Mahomes,0.666601,0.280515,0.0,0.579033,-0.464682,-0.538741


In [12]:
# fix team names

combined['team_full'] = combined.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
# combined['opponent_full'] = combined.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
combined.head()

Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full
0,2014,1,ARI,C. Palmer,,,,,,,Arizona Cardinals
1,2014,1,ATL,M. Ryan,,,,,,,Atlanta Falcons
2,2014,1,BAL,J. Flacco,,,,,,,Baltimore Ravens
3,2014,1,BUF,E. Manuel,,,,,,,Buffalo Bills
4,2014,1,CAR,D. Anderson,,,,,,,Carolina Panthers


In [13]:
# Save aggregated data frame with values

combined.to_csv('../data/adjusted_value_models_combined_6_game_rolling.csv')

In [14]:
# Sandbox for testing

In [15]:
combined[(combined.team == 'BAL') & (combined.season==2022)]

Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full
4298,2022,1,BAL,L. Jackson,-0.429986,0.776442,0.0,-0.947986,0.157411,0.776423,Baltimore Ravens
4330,2022,2,BAL,L. Jackson,-0.339047,0.495349,0.0,-0.692315,0.192469,0.916828,Baltimore Ravens
4362,2022,3,BAL,L. Jackson,0.032903,0.247179,0.0,-1.100807,-0.026739,0.808228,Baltimore Ravens
4394,2022,4,BAL,L. Jackson,0.23581,0.226215,0.0,-0.863102,-0.105486,0.831129,Baltimore Ravens
4426,2022,5,BAL,L. Jackson,0.379862,0.540616,0.0,-0.402611,0.025398,0.833132,Baltimore Ravens
4458,2022,6,BAL,L. Jackson,0.435177,0.620529,0.0,-0.204091,-0.133498,0.85418,Baltimore Ravens
4486,2022,7,BAL,L. Jackson,0.405073,0.872694,0.0,-0.274165,-0.267357,0.689958,Baltimore Ravens
4514,2022,8,BAL,L. Jackson,0.338014,0.869879,0.0,-0.411264,-0.461854,0.717186,Baltimore Ravens
4542,2022,9,BAL,L. Jackson,0.029609,1.264946,0.0,-0.031939,-0.323345,0.735114,Baltimore Ravens
4596,2022,11,BAL,L. Jackson,-0.047083,1.137034,0.0,0.050003,-0.059724,0.695078,Baltimore Ravens
