<a href="https://colab.research.google.com/github/doug14226/colab1/blob/master/Playoffs2019.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A Demonstration of Python for Machine Learning
# Predicting Scorees for the 2019 NFL Playoffs
Copyright (c) 2018 Douglas Lange.  
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

#Section 1: Get data from Google Drive


---
Get teams2018.csv,. games_2018.csv, drives_2018.csv, and games_Playoff_2019.csv from Google Drive.
Execution of this block will produce a Playoffs2019.log file.  If faults occur, please download Playoffs.log and post descritption of error with Playoffs,log as attachment at https://gethub.com/doug14226/colab/issues.  Be sure to mention 
Playoffs.ipynb in the issue  

---
To download Playoffs.log goto the last cell, click the download_Log button, and run the cell.


In [0]:
%%shell
echo $0 >> Playoffs2019.log
if ls teams2018.csv &>> Playoffs2019.log; then
  echo "Teams Previously Loaded"
else wget -O teams2018.csv "https://drive.google.com/uc?export=download&id=1hoZp74vmwpN5QTUVQRGhnrmcX-JmmtoI" &>> Playoffs2019.log;
fi
if ls games_2018.csv &>> Playoffs2019.log; then
  echo "Games Previously Loaded"
else wget -O games_2018.csv "https://drive.google.com/uc?export=download&id=1zZY7d3B42MKtjYw34UGVBWkm7R3FQJtk" &>> Playoffs2019.log;
fi
if ls drives_2018.csv  &>> Playoffs2019.log; then
  echo "Drives Previously Loaded"
else wget -O drives_2018.csv "https://drive.google.com/uc?export=download&id=1Pp-eSgzjKgt_XMBNEihobMMlVBlP8LJq" &>> Playoffs2019.log;
fi
if ls games_Playoff_2019.csv &>> Playoffs2019.log; then
  echo "Games_Playoff Previously Loaded"
else wget -O games_Playoff_2019.csv "https://drive.google.com/uc?export=download&id=1vpVYiV1kGFX4Mu6SxlUuJClM9RFs2Syk" &>> Playoffs2019.log;
fi


#Section 2: Import required Python Packages

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import BayesianRidge
#if pd.__version__ == '0.22.0':
#  !pip3 install pandas --upgrade

#Section 3: Load teams, games, and drives into Pandas Dataframes.

In [0]:
games=pd.read_csv('games_2018.csv',index_col = 'gsis_id')
drives=pd.read_csv('drives_2018.csv')
teams = pd.read_csv('teams2018.csv')
drivebins = pd.cut(drives['drive_start'],[0,20,40,60,80,100])
endbins = pd.cut(drives['drive_end'],[0,20,40,60,80,100])
drives.insert(16,'Start_Bin',drivebins)
drives.insert(17,'End_Bin',endbins)
#line below is from Pandas versipn 24 unfortunately Colaboratory uses incompatable version 22
#intervals = pd.interval_range(start=0, end=100, periods=5, freq=None, name=None, closed='right')
intervals = [pd.Interval(0,20,'right'),pd.Interval(20,40,'right'),pd.Interval(40,60,'right'),pd.Interval(60,80,'right'),pd.Interval(80,100,'right')]


#Section 4: Definition of functions to calculate features from Pandas Data Frames

In [0]:
def possesion_ave(team,possession='pos_team'):
    team_drives = drives[drives[possession].isin([team])]
    features = []
    plays = team_drives.loc[:,'play_count'].sum()
    py = team_drives.loc[:,'penalty_yards'].sum()
#   add more ave per play featurees here
    features.append(py/plays)
    return features
    
def possesion_starts(team,possession='pos_team'):
    team_drives = drives[drives[possession].isin([team])]
    ndrives = len(team_drives.index)
    features = []
    for i in range(5):
        drivessubset = team_drives[team_drives['Start_Bin'].isin([intervals[i]])]
        features.append(len(drivessubset.index)/ndrives)
    return features 

def count_result(drives,bins,intervals,results):
    drivessubset = drives[drives[bins].isin(intervals)]
    nds =len(drivessubset.index)
    if nds:
        results_drives = drivessubset[drivessubset['result'].isin(results)]
        nr = len(results_drives.index)
        return nr/nds 
    return 0.0

def offensive_features(team,possession='pos_team'):
    team_drives = drives[drives[possession].isin([team])]
    ndrives = len(team_drives.index)
    turnover_selector = team_drives['result'].isin(['Fumble','Interception'])
    turnover_drives = team_drives[turnover_selector]
    punt_drives = team_drives[team_drives['result'].isin(['Punt'])]
    threeandout = punt_drives[punt_drives['first_downs'].eq(0)]
    features = [len(turnover_drives)/ndrives]
    safety_selector = team_drives['result'].isin(['Fumble, Safety','Safety'])
    safety_drives = team_drives[safety_selector]
    nonturnover_drives = team_drives[~turnover_selector]
    features.append(len(safety_drives.index)/ndrives)
    for i in [0,1,2,3]:
        features.append(count_result(team_drives,'Start_Bin',[intervals[i]],['Touchdown']))
    for i in [0,1,2,3]:
        features.append(count_result(team_drives,'Start_Bin',[intervals[i]],['Field Goal']))
    # Red Zone Efficiancy
    features.append(count_result(team_drives,'End_Bin',[intervals[4]],['Touchdown']))
    features.append(len(threeandout.index)/ndrives)
    return features


#Section 5: Create Offensive and Defensive feature Dataframes with Team as key

In [0]:
Pdata=pd.DataFrame(np.zeros(shape=(32,18)),index = teams['team_id'],columns = ['Turnover','safety','TDle20','TDle40','TDle60','TDle80','FGle20',
                                                                               'FGle40','FGle60','FGle80','RZ','nfd','Ple20','Ple40,','Ple60',
                                                                               'Ple80','Ple100','pyp'])
                                                                            
        

Ddata=pd.DataFrame(np.zeros(shape=(32,18)),index = teams['team_id'],columns = ['DTurnover','Dsafety','DTDle20','DTDle40','DTDle60','DTDle80','DFGle20',
                                                                               'DFGle40','DFGle60','DFGle80','DRZ','Dnfd','DPle20','DPle40,','DPle60',
                                                                               'DPle80','DPle100','Dpyp'])

for team in teams['team_id']:
    OF = offensive_features(team) + possesion_starts(team) + possesion_ave(team)
    Pdata.loc[team] = OF
    
for team in teams['team_id']:
    DF = offensive_features(team,'def_team') + possesion_starts(team,'def_team') + possesion_ave(team,'def_team')
    Ddata.loc[team] = DF
    

#Section 6: Create Feature andd Result(Scores) Dataframes for 2018 Season

In [0]:
Regular_season = games[games['season_type'].isin(['Regular'])]

Features=pd.DataFrame(np.zeros(shape=(512,37)),columns = ['Home','Turnover','safety','TDle20','TDle40','TDle60','TDle80','FGle20',
                                                        'FGle40','FGle60','FGle80','RZ','nfd','Ple20','Ple40,','Ple60',
                                                        'Ple80','Ple100','pyp','DTurnover','Dsafety','DTDle20','DTDle40','DTDle60','DTDle80','DFGle20',
                                                        'DFGle40','DFGle60','DFGle80','DRZ','Dnfd','DPle20','DPle40,','DPle60',
                                                        'DPle80','DPle100','Dpyp'])           

Scores=pd.DataFrame(np.zeros(shape=(512,1)),columns=['Scores'])

i = 0        
for g in Regular_season.index:
    x=Regular_season.loc[g].tolist()
    t1 = x[1]
    t2 = x[3]
    nh = x[5]
    s = x[2]
    DF = Ddata.loc[t2].tolist()
    OF = Pdata.loc[t1].tolist()
    h=1.0
    if nh:
        h=0.0
    F = [h] + OF + DF
    Features.loc[i] = F
    Scores.loc[i] = s 
    i = i + 1
    s = x[4]
    DF = Ddata.loc[t1].tolist()
    OF = Pdata.loc[t2].tolist()
    F = [0.0] + OF + DF
    Features.loc[i] = F
    Scores.loc[i] = s    
    i = i + 1


#Section 7: Find the weights for each feature in the model

In [0]:
clf = BayesianRidge(compute_score=False)
X = Features.values
y = np.ravel(Scores.values)
clf.fit(X, y)

l=['Home','Turnover','safety','TDle20','TDle40','TDle60','TDle80','FGle20',
                                'FGle40','FGle60','FGle80','RZ','nfd','Ple20','Ple40,','Ple60',
                                'Ple80','Ple100','pyp','DTurnover','Dsafety','DTDle20','DTDle40','DTDle60','DTDle80','DFGle20',
                                'DFGle40','DFGle60','DFGle80','DRZ','Dnfd','DPle20','DPle40,','DPle60',
                                'DPle80','DPle100','Dpyp']  

z=zip(l,clf.coef_)
print(z)
for c,f in z:
    print(c,f)

#Section 9: Use the model to predict playoff results

In [0]:
Playoffs = pd.read_csv('games_Playoff_2019.csv',index_col = 'gsis_id')

PlayoffFeatures=pd.DataFrame(np.zeros(shape=(11,37)),columns = ['Home','Turnover','safety','TDle20','TDle40','TDle60','TDle80','FGle20',
                                                        'FGle40','FGle60','FGle80','RZ','nfd','Ple20','Ple40,','Ple60',
                                                        'Ple80','Ple100','pyp','DTurnover','Dsafety','DTDle20','DTDle40','DTDle60','DTDle80','DFGle20',
                                                        'DFGle40','DFGle60','DFGle80','DRZ','Dnfd','DPle20','DPle40,','DPle60',
                                                        'DPle80','DPle100','Dpyp'])           


i = 0
for g in Playoffs.index:
    x=Playoffs.loc[g].tolist()
    t1 = x[1]
    t2 = x[3]
    s = x[2]
    nothome = x[5]
    DF = Ddata.loc[t2].tolist()
    OF = Pdata.loc[t1].tolist()
    if i!=20 and nothome == 0:        
        F = [1.0] + OF + DF
    else:
        F = [0.0] + OF + DF
    PlayoffFeatures.loc[i] = F
    i = i + 1
    s = x[4]
    DF = Ddata.loc[t1].tolist()
    OF = Pdata.loc[t2].tolist()
    F = [0.0] + OF + DF
    PlayoffFeatures.loc[i] = F   
    i = i + 1
    
X = PlayoffFeatures.values
PlayoffPredict = clf.predict(X)    

Playoffs.insert(2,'pred_home_score',PlayoffPredict[0::2])
Playoffs.insert(5,'pred_away_score',PlayoffPredict[1::2])
Playoffs.to_csv('Playoffs.csv')
Playoffs