In [19]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [20]:
# Import data

qb = pd.read_csv('../modeling/pass_offense_value_modeling_v2/passing_value_model_v2_data/passing_with_value.csv')
qb = qb.drop(columns = ['Unnamed: 0'])
rb = pd.read_csv('../modeling/rush_offense_value_modeling_v2/rushing_value_model_v2_data/rushing_with_value.csv')
rb = rb.drop(columns = ['Unnamed: 0'])
pass_def = pd.read_csv('../modeling/pass_defense_value_modeling_v2/passing_defense_value_model_v2_data/pass_def_with_value.csv')
pass_def = pass_def.drop(columns = ['Unnamed: 0'])
rush_def = pd.read_csv('../modeling/rush_defense_value_modeling_v2/rush_defense_value_model_v2_data/rush_def_with_value.csv')
rush_def = rush_def.drop(columns = ['Unnamed: 0'])

In [24]:
# Put data into one data frame that just has the game identifiers and value numbers

qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'passing_value']]
# rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value', 'qb_rushing_value_pct']]
rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value']]
rb['qb_rushing_value_pct'] = 0
pass_def = pass_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'pass_def_value']]
rush_def = rush_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rush_def_value']]
df = qb.copy()
df = df.merge(rb).merge(pass_def).merge(rush_def)
df = df.drop_duplicates()
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
0,2014,1,ARI,LAC,18,17,C. Palmer,0.472651,-1.258735,0,-0.014745,-1.468394
1,2014,1,ATL,NO,37,34,M. Ryan,1.515659,0.682493,0,0.748551,1.005162
2,2014,1,BAL,CIN,16,23,J. Flacco,-0.552992,0.180459,0,0.934747,-0.727127
3,2014,1,BUF,CHI,23,20,E. Manuel,-0.063069,1.014793,0,-0.700995,0.187514
4,2014,1,CAR,TB,20,14,D. Anderson,0.690009,-0.984362,0,-1.035469,0.392165


In [11]:
# Fix team names

def fix_team_names(game, is_team=True):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    if is_team:
        return team_mapping[game['team']]
    
    else:
        return team_mapping[game['opponent']]
    
df['team_full'] = df.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
df['opponent_full'] = df.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
df.head()

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value,team_full,opponent_full
0,2014,1,ARI,LAC,18,17,C. Palmer,0.472651,-1.258735,0,-0.014745,-1.468394,Arizona Cardinals,Los Angeles Chargers
1,2014,1,ATL,NO,37,34,M. Ryan,1.515659,0.682493,0,0.748551,1.005162,Atlanta Falcons,New Orleans Saints
2,2014,1,BAL,CIN,16,23,J. Flacco,-0.552992,0.180459,0,0.934747,-0.727127,Baltimore Ravens,Cincinnati Bengals
3,2014,1,BUF,CHI,23,20,E. Manuel,-0.063069,1.014793,0,-0.700995,0.187514,Buffalo Bills,Chicago Bears
4,2014,1,CAR,TB,20,14,D. Anderson,0.690009,-0.984362,0,-1.035469,0.392165,Carolina Panthers,Tampa Bay Buccaneers


In [12]:
# Save raw data frame with values

df.to_csv('../data/value_models_combined.csv')

In [13]:
# Save data frame with past 6 games rolling stats

# Notes:
# Pure average for now, not weighted (possible future adjustment?)
# Resets every new season. Previous iterations had used the final 6 games of the last season to predict week 1,
# but not sure if thats the best way to do it since things vary a lot from season to season. Can revisit later.

offense_base = df.copy()[['season', 'week', 'team', 'qb']]
defense_base = df.copy()[['season', 'week', 'team']]

offense_rolling = df.groupby(by=['season', 'team', 'qb']).rolling(
    6, closed='left', min_periods=1).mean()[[
    'passing_value', 'rushing_value', 'qb_rushing_value_pct']].reset_index(
    level=['season', 'team',  'qb'])[['passing_value', 'rushing_value', 'qb_rushing_value_pct']]

defense_rolling = df[['season', 'week', 'team', 'pass_def_value', 'rush_def_value']].drop_duplicates().groupby(
    by=['season', 'team']).rolling(6, closed='left', min_periods=1).mean()[[
    'pass_def_value', 'rush_def_value']].reset_index(level=['season', 'team'])[['pass_def_value', 'rush_def_value']]

offense = offense_base.join(offense_rolling)
defense = defense_base.join(defense_rolling).dropna()

combined = offense.merge(defense, how='left')

  offense_rolling = df.groupby(by=['season', 'team', 'qb']).rolling(


In [14]:
# NOTE: Pretty sure everything in previous code block is good, but do some checks next time i look at this

combined[combined.team=='LV'].tail(15)

Unnamed: 0,season,week,team,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
4540,2022,4,LV,D. Carr,-0.215512,0.155383,0.0,0.49636,0.103418
4574,2022,5,LV,D. Carr,-0.263448,0.453226,0.0,0.327135,-0.258491
4634,2022,7,LV,D. Carr,-0.103413,0.506608,0.0,0.496351,-0.159492
4662,2022,8,LV,D. Carr,0.087521,0.612734,0.0,0.44558,-0.078853
4663,2022,8,LV,J. Stidham,,,,0.44558,-0.078853
4691,2022,9,LV,D. Carr,-0.011856,0.412003,0.0,0.362218,0.217381
4718,2022,10,LV,D. Carr,0.169041,0.306876,0.0,0.684642,-0.143948
4747,2022,11,LV,D. Carr,0.116217,0.070542,0.0,0.530316,0.149098
4776,2022,12,LV,D. Carr,0.278586,-0.141497,0.0,0.567988,0.344332
4806,2022,13,LV,D. Carr,0.38525,-0.153452,0.0,0.479651,0.150268


In [15]:
df[df.team == 'LV'].tail(15)

Unnamed: 0,season,week,team,opponent,score,opponent_score,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value,team_full,opponent_full
4540,2022,4,LV,DEN,32,23,D. Carr,-0.407256,1.346756,0,-0.180543,-1.344218,Las Vegas Raiders,Denver Broncos
4574,2022,5,LV,KC,29,30,D. Carr,0.536725,0.720135,0,1.173216,0.236504,Las Vegas Raiders,Kansas City Chiefs
4634,2022,7,LV,HOU,38,20,D. Carr,1.042196,1.143365,0,0.191728,0.324343,Las Vegas Raiders,Houston Texans
4662,2022,8,LV,NO,0,24,D. Carr,-1.207371,-1.130636,0,0.769853,0.159348,Las Vegas Raiders,New Orleans Saints
4663,2022,8,LV,NO,0,24,J. Stidham,-0.274394,-1.130636,0,0.769853,0.159348,Las Vegas Raiders,New Orleans Saints
4691,2022,9,LV,JAX,20,27,D. Carr,0.644839,-0.662253,0,1.132799,-0.004129,Las Vegas Raiders,Jacksonville Jaguars
4718,2022,10,LV,IND,20,25,D. Carr,0.088167,-0.994115,0,0.094845,1.522742,Las Vegas Raiders,Indianapolis Colts
4747,2022,11,LV,DEN,22,16,D. Carr,0.56696,0.07452,0,0.045489,-0.172814,Las Vegas Raiders,Denver Broncos
4776,2022,12,LV,SEA,40,34,D. Carr,1.17671,0.648405,0,0.643191,-0.927884,Las Vegas Raiders,Seattle Seahawks
4806,2022,13,LV,LAC,27,20,D. Carr,-0.835251,0.698422,0,-0.674367,-0.031113,Las Vegas Raiders,Los Angeles Chargers


In [16]:
combined

Unnamed: 0,season,week,team,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value
0,2014,1,ARI,C. Palmer,,,,,
1,2014,1,ATL,M. Ryan,,,,,
2,2014,1,BAL,J. Flacco,,,,,
3,2014,1,BUF,E. Manuel,,,,,
4,2014,1,CAR,D. Anderson,,,,,
...,...,...,...,...,...,...,...,...,...
4999,2022,21,KC,P. Mahomes,0.538192,0.433467,0.0,-0.901990,0.379622
5000,2022,21,PHI,J. Hurts,0.137651,2.439576,0.0,-0.584781,0.512271
5001,2022,21,SF,J. Johnson,,,,-0.407733,-0.362456
5002,2022,22,KC,P. Mahomes,0.563517,0.272761,0.0,-0.796702,0.412985


In [17]:
# fix team names

combined['team_full'] = combined.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
# combined['opponent_full'] = combined.apply(lambda x: fix_team_names(x, is_team=False), axis=1)
combined.head()

Unnamed: 0,season,week,team,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value,team_full
0,2014,1,ARI,C. Palmer,,,,,,Arizona Cardinals
1,2014,1,ATL,M. Ryan,,,,,,Atlanta Falcons
2,2014,1,BAL,J. Flacco,,,,,,Baltimore Ravens
3,2014,1,BUF,E. Manuel,,,,,,Buffalo Bills
4,2014,1,CAR,D. Anderson,,,,,,Carolina Panthers


In [18]:
# Save aggregated data frame with values

combined.to_csv('../data/value_models_combined_6_game_rolling.csv')