In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn import metrics
import numpy as np
import pandas as pd
import os
import random
import csv
from sys import platform
from IPython.core.display import HTML
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.grid_search import GridSearchCV
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))



In [2]:
def filter_coefficients(threshold, df):
    '''
    filters inputted data frame of coefficients by the absolute value of the threshold and
    returns the filtered data frame of coefficients
    '''
    return df[df.abs() > float(threshold)].dropna(axis = 1, how = 'all')



def generate_command(platform, type, filter_value):
    '''
    This function generates the command to the filtered coefficient's data frames into csv files
    for viewing outside of jupyter notebook
    ex: 'path_nfl_savant_filtered_coeff_gt_point_05_csv = os.getcwd() + '\\nfl_savant_filtered_coeff_gt_point_05.csv'
    '''
    if platform == 'win32':
        if type == 'nfl_savant':
            left_side = 'path_nfl_savant_filtered_coeff_gt_'
            middle = ' = os.getcwd() + "\\'
            right_side = 'nfl_savant_filtered_coeff_gt_'
            return left_side + filter_value + '_csv' + middle + right_side + filter_value + '.csv"'
        else:
            left_side = 'path_armchair_filtered_coeff_gt_'
            middle = ' = os.getcwd() + "\\'
            right_side = '"armchair_filtered_coeff_gt_'
            return left_side + filter_value + '_csv' + middle + right_side + filter_value + '.csv"'
    else:
        if type == 'nfl_savant':
            left_side = 'path_nfl_savant_filtered_coeff_gt_'
            middle = ' = os.getcwd() + "/'
            right_side = 'nfl_savant_filtered_coeff_gt_'
            return left_side + filter_value + '_csv' + middle + right_side + filter_value + '.csv"'
        else:
            left_side = 'path_armchair_filtered_coeff_gt_'
            middle = ' = os.getcwd() + "/'
            right_side = 'armchair_filtered_coeff_gt_'
            return left_side + filter_value + '_csv' + middle + right_side + filter_value + '.csv"'

In [3]:
#coefficients will be filtered by the absolute value of these values
filter_values = ['point_05', 'point_1', 'point_5', '1']

#make sure directory paths are correct based on running in a Windows/Unix environment
if platform == 'win32':
    #code is running in a windows machine
    path_nfl_savant_coefficients = os.getcwd() + '\\model_coefficients.csv'
    path_armchair_coefficients = os.getcwd() + '\\armchair_model_coefficients.csv'
    
    #generate paths to save each model's filtered coefficients
    for filter_value in filter_values:
        command_1 = generate_command(platform, 'nfl_savant', filter_value)
        command_2 = generate_command(platform, 'armchair', filter_value)
        exec(command_1)
        exec(command_2)
else:
    path_nfl_savant_coefficients = os.getcwd() + '/model_coefficients.csv'
    path_armchair_coefficients = os.getcwd() + '/armchair_model_coefficients.csv'
    
    #generate paths to save each model's filtered coefficients
    for filter_value in filter_values:
        command_1 = generate_command(platform, 'nfl_savant', filter_value)
        command_2 = generate_command(platform, 'armchair', filter_value)
        exec(command_1)
        exec(command_2)

In [4]:
#load coefficients data for each model into data frame
nfl_savant_df = pd.DataFrame.from_csv(path_nfl_savant_coefficients)
armchair_df = pd.DataFrame.from_csv(path_armchair_coefficients)

In [5]:
#create a column for season
years = [('season', [2014, 2015, 2016])]
season = pd.DataFrame.from_items(years)

#add season as a column to coefficients data frames
nfl_savant_df['season'] = season
armchair_df['season'] = season

#set data frame indices to season
nfl_savant_df.set_index('season', inplace = True)
armchair_df.set_index('season', inplace = True)

In [6]:
armchair_df.head()

Unnamed: 0_level_0,qtr,min,sec,ptso,ptsd,dwn,ytg,yfog,sg,nh,...,def_SEA,def_SF,def_TB,def_TEN,def_WAS,zone_1,zone_2,zone_3,zone_4,zone_5
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,0.003796,0.010008,0.001906,0.02836,-0.023614,-0.585047,-0.097252,-0.000773,-0.45331,-0.11292,...,-0.024927,0.034687,-0.170916,-0.088349,-0.018495,0.104791,-0.006159,0.144737,0.226392,0.251399
2015,-0.073693,0.012782,0.001276,0.036182,-0.023492,-0.513906,-0.095968,0.000479,-0.50263,-0.148691,...,0.073708,0.011196,0.02463,0.045883,0.106854,0.187098,0.060523,0.121253,0.188721,0.241028
2016,-0.021557,0.012612,0.000153,0.030949,-0.026128,-0.558941,-0.091371,0.001345,-0.461554,-0.265159,...,0.081581,0.098335,-0.092978,-0.133074,0.023945,0.269206,0.094472,0.06888,0.138287,0.192109


In [7]:
nfl_savant_df.head()

Unnamed: 0_level_0,Down,ToGo,YardLine,Quarter,Minute,Second,OffenseTeam_ARI,OffenseTeam_ATL,OffenseTeam_BAL,OffenseTeam_BUF,...,DefenseTeam_TEN,DefenseTeam_WAS,YardLineDirection_OPP,YardLineDirection_OWN,Formation_FIELD GOAL,Formation_NO HUDDLE,Formation_NO HUDDLE SHOTGUN,Formation_SHOTGUN,Formation_UNDER CENTER,Formation_WILDCAT
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,-0.629607,-0.097678,-0.002408,0.005571,0.021869,0.00114,-0.327398,-0.640033,-0.658237,0.041452,...,0.001829,0.021584,0.516291,0.299534,-0.006067,1.092858,-0.835603,-0.58488,1.149517,
2015,-0.592746,-0.096903,-0.003689,-0.031379,0.02292,0.001596,-0.498641,-0.503927,-0.555428,0.866139,...,0.417566,-0.032279,0.629386,0.366176,0.900642,1.064511,-1.177246,-0.884053,1.091708,
2016,-0.580591,-0.08873,-0.000254,-0.00772,0.020869,0.001449,-0.574375,-0.518456,-0.264093,0.757913,...,-0.282804,0.131541,0.35372,0.333536,0.0,0.987787,-0.92049,-0.623192,1.209691,0.033461


**Coefficients from NFL Savant Model (model built assuming all plays are independent)**

*Model coefficients that have an absolute value of greater than 0.05, 0.1, 0.5 and 1.0*

In [8]:
nfl_savant_coef_gt_point_05 = filter_coefficients(0.05, nfl_savant_df)
nfl_savant_coef_gt_point_05.to_csv(path_nfl_savant_filtered_coeff_gt_point_05_csv)
nfl_savant_coef_gt_point_05

Unnamed: 0_level_0,Down,ToGo,OffenseTeam_ARI,OffenseTeam_ATL,OffenseTeam_BAL,OffenseTeam_BUF,OffenseTeam_CAR,OffenseTeam_CHI,OffenseTeam_CIN,OffenseTeam_CLE,...,DefenseTeam_TB,DefenseTeam_TEN,DefenseTeam_WAS,YardLineDirection_OPP,YardLineDirection_OWN,Formation_FIELD GOAL,Formation_NO HUDDLE,Formation_NO HUDDLE SHOTGUN,Formation_SHOTGUN,Formation_UNDER CENTER
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,-0.629607,-0.097678,-0.327398,-0.640033,-0.658237,,0.326679,-0.12894,0.403289,,...,,,,0.516291,0.299534,,1.092858,-0.835603,-0.58488,1.149517
2015,-0.592746,-0.096903,-0.498641,-0.503927,-0.555428,0.866139,0.733031,0.650081,0.310501,-0.217597,...,0.110249,0.417566,,0.629386,0.366176,0.900642,1.064511,-1.177246,-0.884053,1.091708
2016,-0.580591,-0.08873,-0.574375,-0.518456,-0.264093,0.757913,0.560477,,,-0.130207,...,-0.051566,-0.282804,0.131541,0.35372,0.333536,,0.987787,-0.92049,-0.623192,1.209691


In [9]:
nfl_savant_coef_gt_point_1 = filter_coefficients(0.1, nfl_savant_df)
nfl_savant_coef_gt_point_1.to_csv(path_nfl_savant_filtered_coeff_gt_point_1_csv)
nfl_savant_coef_gt_point_1

Unnamed: 0_level_0,Down,OffenseTeam_ARI,OffenseTeam_ATL,OffenseTeam_BAL,OffenseTeam_BUF,OffenseTeam_CAR,OffenseTeam_CHI,OffenseTeam_CIN,OffenseTeam_CLE,OffenseTeam_DAL,...,DefenseTeam_TB,DefenseTeam_TEN,DefenseTeam_WAS,YardLineDirection_OPP,YardLineDirection_OWN,Formation_FIELD GOAL,Formation_NO HUDDLE,Formation_NO HUDDLE SHOTGUN,Formation_SHOTGUN,Formation_UNDER CENTER
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,-0.629607,-0.327398,-0.640033,-0.658237,,0.326679,-0.12894,0.403289,,0.210456,...,,,,0.516291,0.299534,,1.092858,-0.835603,-0.58488,1.149517
2015,-0.592746,-0.498641,-0.503927,-0.555428,0.866139,0.733031,0.650081,0.310501,-0.217597,-0.124217,...,0.110249,0.417566,,0.629386,0.366176,0.900642,1.064511,-1.177246,-0.884053,1.091708
2016,-0.580591,-0.574375,-0.518456,-0.264093,0.757913,0.560477,,,-0.130207,,...,,-0.282804,0.131541,0.35372,0.333536,,0.987787,-0.92049,-0.623192,1.209691


In [10]:
nfl_savant_coef_gt_point_5 = filter_coefficients(0.5, nfl_savant_df)
nfl_savant_coef_gt_point_5.to_csv(path_nfl_savant_filtered_coeff_gt_point_5_csv)
nfl_savant_coef_gt_point_5

Unnamed: 0_level_0,Down,OffenseTeam_ARI,OffenseTeam_ATL,OffenseTeam_BAL,OffenseTeam_BUF,OffenseTeam_CAR,OffenseTeam_CHI,OffenseTeam_NYJ,OffenseTeam_PHI,OffenseTeam_SD,OffenseTeam_SEA,OffenseTeam_SF,YardLineDirection_OPP,Formation_FIELD GOAL,Formation_NO HUDDLE,Formation_NO HUDDLE SHOTGUN,Formation_SHOTGUN,Formation_UNDER CENTER
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2014,-0.629607,,-0.640033,-0.658237,,,,0.580537,0.658064,,0.721316,,0.516291,,1.092858,-0.835603,-0.58488,1.149517
2015,-0.592746,,-0.503927,-0.555428,0.866139,0.733031,0.650081,,0.949345,0.556475,,,0.629386,0.900642,1.064511,-1.177246,-0.884053,1.091708
2016,-0.580591,-0.574375,-0.518456,,0.757913,0.560477,,,,,,1.125225,,,0.987787,-0.92049,-0.623192,1.209691


In [11]:
nfl_savant_coef_gt_1 = filter_coefficients(1, nfl_savant_df)
nfl_savant_coef_gt_1.to_csv(path_nfl_savant_filtered_coeff_gt_1_csv)
nfl_savant_coef_gt_1

Unnamed: 0_level_0,OffenseTeam_SF,Formation_NO HUDDLE,Formation_NO HUDDLE SHOTGUN,Formation_UNDER CENTER
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,,1.092858,,1.149517
2015,,1.064511,-1.177246,1.091708
2016,1.125225,,,1.209691


**Coefficients from Arm Chair Analysis Model (model built assuming plays are dependent)**

In [12]:
armchair_coef_gt_point_05 = filter_coefficients(0.05, armchair_df)
armchair_coef_gt_point_05.to_csv(path_armchair_filtered_coeff_gt_point_05_csv)
armchair_coef_gt_point_05

Unnamed: 0_level_0,qtr,dwn,ytg,sg,nh,uc,timo,timd,off_ARI,off_ATL,...,def_SEA,def_SF,def_TB,def_TEN,def_WAS,zone_1,zone_2,zone_3,zone_4,zone_5
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,,-0.585047,-0.097252,-0.45331,-0.11292,1.174469,0.194015,-0.086322,-0.207913,-0.493286,...,,,-0.170916,-0.088349,,0.104791,,0.144737,0.226392,0.251399
2015,-0.073693,-0.513906,-0.095968,-0.50263,-0.148691,1.301252,0.132419,-0.101043,-0.738538,-0.354967,...,0.073708,,,,0.106854,0.187098,0.060523,0.121253,0.188721,0.241028
2016,,-0.558941,-0.091371,-0.461554,-0.265159,1.224508,0.221849,-0.163389,-0.658671,-0.549492,...,0.081581,0.098335,-0.092978,-0.133074,,0.269206,0.094472,0.06888,0.138287,0.192109


In [13]:
armchair_coef_gt_point_1 = filter_coefficients(0.1, armchair_df)
armchair_coef_gt_point_1.to_csv(path_armchair_filtered_coeff_gt_point_1_csv)
armchair_coef_gt_point_1

Unnamed: 0_level_0,dwn,sg,nh,uc,timo,timd,off_ARI,off_ATL,off_BAL,off_BUF,...,def_PHI,def_PIT,def_SD,def_TB,def_TEN,def_WAS,zone_1,zone_3,zone_4,zone_5
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,-0.585047,-0.45331,-0.11292,1.174469,0.194015,,-0.207913,-0.493286,-0.539201,,...,-0.190801,0.105311,0.164759,-0.170916,,,0.104791,0.144737,0.226392,0.251399
2015,-0.513906,-0.50263,-0.148691,1.301252,0.132419,-0.101043,-0.738538,-0.354967,-0.481738,0.861939,...,,,,,,0.106854,0.187098,0.121253,0.188721,0.241028
2016,-0.558941,-0.461554,-0.265159,1.224508,0.221849,-0.163389,-0.658671,-0.549492,-0.345183,0.783428,...,,,,,-0.133074,,0.269206,,0.138287,0.192109


In [14]:
armchair_coef_gt_point_5 = filter_coefficients(0.5, armchair_df)
armchair_coef_gt_point_5.to_csv(path_armchair_filtered_coeff_gt_point_5_csv)
armchair_coef_gt_point_5

Unnamed: 0_level_0,dwn,sg,uc,off_ARI,off_ATL,off_BAL,off_BUF,off_CHI,off_MIN,off_NE,off_NYG,off_OAK,off_PIT,off_SD,off_SF
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2014,-0.585047,,1.174469,,,-0.539201,,,-0.501865,,0.667651,,,0.604038,
2015,-0.513906,-0.50263,1.301252,-0.738538,,,0.861939,0.584948,,-0.516173,,0.796753,0.57449,,
2016,-0.558941,,1.224508,-0.658671,-0.549492,,0.783428,,,,,,,,1.109997


In [15]:
armchair_coef_gt_1 = filter_coefficients(1, armchair_df)
armchair_coef_gt_1.to_csv(path_armchair_filtered_coeff_gt_1_csv)
armchair_coef_gt_1

Unnamed: 0_level_0,uc,off_SF
season,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,1.174469,
2015,1.301252,
2016,1.224508,1.109997
