In [11]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 30 21:45:18 2023

@author: ian
"""

# Import packages

import pandas as pd 
import numpy as np 
import os

# Import data

rb = pd.read_csv('data/06_seasontest_adjusted_rushing_values.csv')
rb = rb.drop(columns = ['Unnamed: 0'])
pass_def = pd.read_csv('data/06_seasontest_adjusted_pass_defense_values.csv')
pass_def = pass_def.drop(columns = ['Unnamed: 0'])
rush_def = pd.read_csv('data/06_seasontest_adjusted_rush_defense_values.csv')
rush_def = rush_def.drop(columns = ['Unnamed: 0'])
qb = pd.read_csv('data/06_seasontest_adjusted_qb_values.csv')
qb = qb.drop(columns = ['Unnamed: 0'])
st = pd.read_csv('data/04_seasontest_initial_st_values.csv')
st = st.drop(columns = ['Unnamed: 0'])
extra = pd.read_csv('data/04_seasontest_initial_extra_values.csv')
extra = extra.drop(columns = ['Unnamed: 0'])

# Put data into one data frame that just has the game identifiers and value numbers

# qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'qb_value', 'passing_value']]
qb = qb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'qb', 'passing_value_adjusted']]
rb = rb.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rushing_value_adjusted']]
pass_def = pass_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'pass_def_value_adjusted']]
rush_def = rush_def.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'rush_def_value_adjusted']]
st = st.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'special_teams_value']]
extra = extra.copy()[['season', 'week', 'team', 'opponent', 'score', 'opponent_score', 'total_plays_standardized', 'total_possession_time_standardized', 'pass_percentage_standardized']]
df = qb.copy()
df = df.merge(rb).merge(pass_def).merge(rush_def).merge(st).merge(extra)
df = df.drop_duplicates()

# Add current week data

df2 = df.copy()
current_week_data = pd.read_csv('data/04_seasontest_current_week_data.csv')

home_week = current_week_data[['season', 'week', 'home', 'away', 'home_qb']]
home_week = home_week.rename(columns = {'home': 'team', 
                                       'away': 'opponent',
                                       'home_qb': 'qb'})
away_week = current_week_data[['season', 'week', 'home', 'away', 'away_qb']]
away_week = away_week.rename(columns = {'away': 'team', 
                                       'home': 'opponent',
                                       'away_qb': 'qb'})
away_week

df = pd.concat([df2, home_week, away_week], axis=0).reset_index()
df

# Fix team names

# Move to helper functions

def fix_team_names(game, is_team=True):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    if is_team:
        return team_mapping[game['team']]
    
    else:
        return team_mapping[game['opponent']]
    
df['team_full'] = df.apply(lambda x: fix_team_names(x, is_team=True), axis=1)
df['opponent_full'] = df.apply(lambda x: fix_team_names(x, is_team=False), axis=1)

# Save raw data frame with values

df.to_csv('data/07_seasontest_adjusted_value_models_combined.csv')

# Save data frame with past 5 games rolling stats

# Weighted average approach

def weighted_avg(values):
    if len(values) == 1:
        weights = np.array([1])
    elif len(values) == 2:
        weights = np.array([.4, .6])
    elif len(values) == 3:
        weights = np.array([.23, .33, .43])
    elif len(values) == 4:
        weights = np.array([.1, .2, .3, .4])
    elif len(values) == 5:
        weights = np.array([.05, .15, .2, .25, .35])
    
    return np.sum(weights * values)

# Uncomment and use this if even weights are desired

# def weighted_avg(values):
#     if len(values) == 1:
#         weights = np.array([1])
#     elif len(values) == 2:
#         weights = np.array([.5, .5])
#     elif len(values) == 3:
#         weights = np.array([.333, .333, .333])
#     elif len(values) == 4:
#         weights = np.array([.25, .25, .25, .25])
#     elif len(values) == 5:
#         weights = np.array([.2, .2, .2, .2, .2])
    
#     return np.sum(weights * values)


offense_base = df.copy()[['season', 'week', 'team_full', 'qb']]
defense_base = df.copy()[['season', 'week', 'team_full']]

offense_rolling = df[['team_full', 'qb', 'passing_value_adjusted', 'rushing_value_adjusted',
                     'total_plays_standardized', 'total_possession_time_standardized', 
                     'pass_percentage_standardized']]
offense_rolling = offense_rolling.groupby(by=['team_full', 'qb']).rolling(
    5, closed='left', min_periods=1).apply(lambda x: weighted_avg(x)).reset_index(level=['team_full', 'qb'], drop=True)

defense_rolling = df[['team_full', 'pass_def_value_adjusted', 'rush_def_value_adjusted', 'special_teams_value']]
defense_rolling = defense_rolling.groupby(by=['team_full']).rolling(
    5, closed='left', min_periods=1).apply(lambda x: weighted_avg(x)).reset_index(level=['team_full'], drop=True)

offense = offense_base.join(offense_rolling)
defense = defense_base.join(defense_rolling).dropna()

combined = offense.merge(defense, how='left')

# fix team names

# combined['team_full'] = combined.apply(lambda x: fix_team_names(x, is_team=True), axis=1)

# Save aggregated data frame with values

combined.to_csv('data/07_seasontest_adjusted_value_models_aggregated.csv')

print('07_adjusted_value_models_combination_and_aggregation Complete')

07_adjusted_value_models_combination_and_aggregation Complete


In [17]:
df[df.team == 'NYG']

Unnamed: 0,index,season,week,team,opponent,score,opponent_score,qb,passing_value_adjusted,rushing_value_adjusted,pass_def_value_adjusted,rush_def_value_adjusted,special_teams_value,total_plays_standardized,total_possession_time_standardized,pass_percentage_standardized,team_full,opponent_full
23,23,2022,1,NYG,TEN,21.0,20.0,D. Jones,,,,,0.955098,-0.869675,-0.358917,-1.105563,New York Giants,Tennessee Titans
55,55,2022,2,NYG,CAR,19.0,16.0,D. Jones,-0.11873,-1.131197,1.080286,-1.192864,1.938629,0.986934,1.27364,-0.267887,New York Giants,Carolina Panthers
87,87,2022,3,NYG,DAL,16.0,23.0,D. Jones,-1.058303,1.336523,-0.318995,-1.370052,-0.123295,0.63882,-0.729427,0.468597,New York Giants,Dallas Cowboys
119,119,2022,4,NYG,CHI,20.0,12.0,D. Jones,-0.491497,0.810202,0.59578,-0.015182,-1.289792,-0.173446,0.192987,-2.630289,New York Giants,Chicago Bears
151,151,2022,5,NYG,GB,27.0,22.0,D. Jones,1.723347,0.455023,-0.254477,-0.211884,-0.112723,-0.521561,0.401399,-0.660547,New York Giants,Green Bay Packers
180,180,2022,6,NYG,BAL,24.0,20.0,D. Jones,1.23792,-0.391276,0.145457,-1.365414,0.992859,-0.289484,0.328069,-0.434268,New York Giants,Baltimore Ravens
208,208,2022,7,NYG,JAX,23.0,17.0,D. Jones,0.280867,1.246991,-0.779162,0.363079,-0.01132,0.870896,0.860676,-1.169137,New York Giants,Jacksonville Jaguars
236,236,2022,8,NYG,SEA,13.0,27.0,D. Jones,-0.029495,-0.901689,0.38705,0.63833,-2.591894,0.05863,0.721735,-0.104278,New York Giants,Seattle Seahawks
290,290,2022,10,NYG,HOU,24.0,16.0,D. Jones,2.209673,-0.314092,-0.674783,0.818168,-0.395974,0.290706,0.667702,-2.41364,New York Giants,Houston Texans
319,319,2022,11,NYG,DET,18.0,31.0,D. Jones,-0.193184,0.032795,-0.238006,-0.906206,-1.744459,0.754858,-0.521015,0.619635,New York Giants,Detroit Lions


In [10]:
df

Unnamed: 0,index,season,week,team,opponent,score,opponent_score,qb,passing_value_adjusted,rushing_value_adjusted,pass_def_value_adjusted,rush_def_value_adjusted,special_teams_value,total_plays_standardized,total_possession_time_standardized,pass_percentage_standardized
0,0,2022,1,ARI,KC,21.0,44.0,K. Murray,,,,,0.252584,-0.289484,-1.192564,0.869194
1,1,2022,1,ATL,NO,26.0,27.0,M. Mariota,,,,,1.054228,1.219011,0.760330,-1.041990
2,2,2022,1,BAL,NYJ,24.0,9.0,L. Jackson,,,,,1.589315,-1.333827,-0.683113,0.473806
3,3,2022,1,BUF,LA,31.0,10.0,J. Allen,,,,,0.477646,-0.753637,0.181409,0.042109
4,4,2022,1,CAR,CLE,24.0,26.0,B. Mayfield,,,,,-0.428776,-1.681941,-2.057086,0.520107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,10,2023,1,LAR,SEA,,,Matthew Stafford,,,,,,,,
594,11,2023,1,GB,CHI,,,Jordan Love,,,,,,,,
595,12,2023,1,LV,DEN,,,Jimmy Garoppolo,,,,,,,,
596,13,2023,1,DAL,NYG,,,D. Prescott,,,,,,,,


In [29]:
df[df.team == 'GB']

AttributeError: 'DataFrame' object has no attribute 'team'

In [27]:
df = pd.read_csv(('data/final_predictive_dataframe.csv'))

In [28]:
df

Unnamed: 0.1,Unnamed: 0,season,week,home,away,home_qb,away_qb,passing_value_adjusted_home,rushing_value_adjusted_home,pass_def_value_adjusted_home,...,total_plays_standardized_away,pass_percentage_standardized_home,pass_percentage_standardized_away,home_moneyline,away_moneyline,home_spread,away_spread,total_score_line,home_implied_prob,away_implied_prob
0,0,2023,1,MIN,TB,K. Cousins,B. Mayfield,0.673185,-0.393193,-0.434404,...,,0.602722,,-205,170,-4.5,4.5,45.5,0.672131,0.37037
1,1,2023,1,PIT,SF,K. Pickett,B. Purdy,0.780551,0.181826,-0.187822,...,-0.005191,-0.893167,-0.768058,-105,-125,2.0,-2.0,41.5,0.512195,0.555556
2,2,2023,1,BAL,HOU,L. Jackson,C.J. Stroud,0.121977,0.328151,-0.04023,...,,-0.747644,,-500,380,-9.5,9.5,43.5,0.833333,0.208333
3,3,2023,1,IND,JAX,A. Richardson,T. Lawrence,,,,...,-0.138635,,0.741425,170,-205,3.5,-3.5,46.0,0.37037,0.672131
4,4,2023,1,CLE,CIN,D. Watson,J. Burrow,-0.211152,0.442853,0.359798,...,0.308112,-0.33539,0.787408,102,-122,1.5,-1.5,47.0,0.49505,0.54955
5,5,2023,1,NO,TEN,D. Carr,R. Tannehill,,,,...,-0.776844,,0.219419,-148,124,-3.0,3.0,42.0,0.596774,0.446429
6,6,2023,1,WAS,ARI,S. Howell,J. Dobbs,-1.284806,-0.082328,-0.122658,...,,-1.952529,,-340,270,-7.0,7.0,38.0,0.772727,0.27027
7,7,2023,1,ATL,CAR,D. Ridder,B. Young,0.0479,-0.218146,-0.121759,...,,-0.911153,,-192,160,-3.5,3.5,40.5,0.657534,0.384615
8,8,2023,1,NE,PHI,M. Jones,J. Hurts,-0.240548,-0.266291,-0.275203,...,0.853491,0.631381,-0.910959,154,-185,3.5,-3.5,44.5,0.393701,0.649123
9,9,2023,1,LAC,MIA,J. Herbert,T. Tagovailoa,0.620026,-0.263069,0.441181,...,-1.496281,0.429984,0.50639,-162,136,-3.0,3.0,51.0,0.618321,0.423729


In [33]:
df = pd.read_csv('data/05_seasontest_value_models_aggregated.csv')

In [34]:
df[df.team == 'GB']

Unnamed: 0.1,Unnamed: 0,season,week,team,qb,passing_value,rushing_value,total_plays_standardized,total_possession_time_standardized,pass_percentage_standardized,pass_def_value,rush_def_value,special_teams_value,team_full
11,11,2022,1,GB,A. Rodgers,,,,,,,,,Green Bay Packers
43,43,2022,2,GB,A. Rodgers,-1.11819,0.287694,-0.521561,-0.424529,1.225112,-0.96063,0.214257,-0.357025,Green Bay Packers
75,75,2022,3,GB,A. Rodgers,0.183339,0.326167,-0.115427,0.575075,-0.025805,0.458858,-0.190358,0.061943,Green Bay Packers
107,107,2022,4,GB,A. Rodgers,0.141313,-0.154513,-0.250554,0.611771,0.059232,0.494608,0.10671,-0.157338,Green Bay Packers
139,139,2022,5,GB,A. Rodgers,-0.001626,0.083764,0.02962,0.827871,-0.104999,0.632868,-0.279113,-0.133811,Green Bay Packers
170,170,2022,6,GB,A. Rodgers,0.110901,0.253827,-0.121046,0.357405,0.351738,-0.308777,-0.451942,-0.050687,Green Bay Packers
198,198,2022,7,GB,A. Rodgers,-0.263099,-0.653026,0.389522,0.71209,0.450177,0.707425,-0.827548,-0.505547,Green Bay Packers
227,227,2022,8,GB,A. Rodgers,-0.381807,-0.520603,-0.643217,-0.309126,1.076474,0.174942,-0.176403,-0.673987,Green Bay Packers
253,253,2022,9,GB,A. Rodgers,-0.389982,0.278537,-0.097838,0.333862,0.295285,-0.014367,-0.994273,-0.859983,Green Bay Packers
280,280,2022,10,GB,A. Rodgers,-0.759135,0.395865,0.006596,0.298355,0.938756,-0.112165,0.079887,-0.610783,Green Bay Packers
