In [1]:
import nflgame
import pandas as pd
from __future__ import absolute_import, division, print_function

In [2]:
def adjust_team(teamname):
    retval = teamname
    if teamname=='STL':
        retval = 'LA'
    return retval

In [3]:
def weekly(g,chosen):
    rushAtt=0
    rushYds=0
    passAtt=0
    passCmp=0
    passYds=0
    rushTds=0
    passTds=0 
    for player in g.players.rushing():
        team = adjust_team(player.team)
        if team==chosen:
            rushAtt+=player.rushing_att
            rushYds+=player.rushing_yds
            rushTds+=player.rushing_tds
    for player in g.players.passing():
        team = adjust_team(player.team)
        if team==chosen:
            passAtt+=player.passing_att
            passYds+=player.passing_yds
            passTds+=player.passing_tds
            passCmp+=player.passing_cmp
    return [rushAtt,rushYds,
            rushTds,passAtt,
            passCmp,passYds,
            passTds]

In [4]:
def set_scores(g):
    teams = [g.home,g.away]
    for t in range(0,len(teams)):
        if teams[t]=='STL':
            teams[t]='LA'
    score_ind = {teams[0]:[g.score_home_q1,g.score_home_q2,g.score_home_q3,g.score_home_q4]
             ,teams[1]:[g.score_away_q1,g.score_away_q2,g.score_away_q3,g.score_away_q4]}
    score_qum = {teams[0]:[0,0,0,0],teams[1]:[0,0,0,0]}
    for t in teams:
        for q in range(1,4):
            score_qum[t][q] = score_qum[t][q-1]+score_ind[t][q-1]
    score = {teams[0]:0,teams[1]:0}
    return teams,score,score_qum

In [5]:
class PlayParser:
    def __init__(self,raw_play_int,teams):
        raw_play=str(raw_play_int)
        limit = raw_play.find(')')
        self.first = raw_play[1:limit].split(',')
        play = raw_play[limit+1:]
        self.second = play[2:play.find(')')]
        self.play = str(play)
        self.teams = teams
        self.raw_play = raw_play
        self.isnull=False
        if (len(self.first)!=4 
            or self.second.replace(':','').isdigit() is False 
            or int(self.first[2].replace('Q','')) > 4
            or 'field goal' in self.play
            or 'punt' in self.play
            or 'No Play' in self.play):
            self.isnull = True
    def is_null(self):
        return self.isnull
    def down(self):
        return [int(x) for x in self.first[3].split(' and ')][0]
    def distance(self):
        return [int(x) for x in self.first[3].split(' and ')][1]
    def offense(self):
        offense = self.first[0]
        if offense=='STL':
            offense = 'LA'
        return offense
    def defense(self):
        offense = self.first[0]
        if offense=='STL':
            offense = 'LA'
        defense = teams[1-self.teams.index(offense)]
        if defense=='STL':
            defense = 'LA'
        return defense
    def get_score(self,score,squm,pquarter):
        quarter = int(self.first[2].replace('Q',''))
        if pquarter != quarter:
            for t in self.teams:
                score[t] = squm[t][quarter-1]
        self.score = score
        return score
    def offense_score(self):    
        return self.score[self.offense()]
    def defense_score(self):
        return self.score[self.defense()]
    def is_pass(self):
        return ('pass' in self.play or 'sacked' in self.play)*1
    def play_points(self):
        if 'TOUCHDOWN' in self.play:
            return 6
        if 'field goal' in self.play and 'GOOD' in self.play:
            return 3
        if 'extra point' in self.play and 'GOOD' in self.play:
            return 1
        if('CONVERSION ATTEMPT' in self.play
               and 'SUCCEEDS' in self.play):
            return 2
        return 0
    def resulting_yards(self):
        yards = self.play[self.play.find('for ')+4:self.play.find(' yard')]
        try:
            retval = int(yards)
        except:
            retval = 0
        return retval
    def quarter(self):
        return int(self.first[2].replace('Q',''))
    def time_in_half(self):
        time_already = ((4-int(self.first[2].replace('Q','')))%2)*900
        time = self.second
        if len(self.second)<4:
            time = '0'+self.second
        temp = [int(x) for x in time.split(':')]
        return time_already + temp[0]*60+temp[1]  
    def yards_to_goal(self):
        poss_team = self.first[0]
        temp = self.first[1][1:].split(' ')
        if len(temp)==1:
            return 50
        else:
            half = temp[0]
            los = int(temp[1])
        if(poss_team==half):
            return 100-los
        else:
            return los
    def is_penalty(self):
        return ('PENALTY' in self.play)
    def is_interception(self):
        return ('INTERCEPT' in self.play)
    def is_reversed(self):
        return ('REVERSED' in self.play)
    def is_fumble(self):
        return ('FUMBLES' in self.play)
    def is_completion(self):
        return (('incomplete' not in self.play) and ('pass' in self.play))

In [6]:
plays = []
games = []
for year in range(2011,2017):
    for week in range(1,18):
        nflgames = nflgame.games(year,week=week)
        for g in nflgames:
            teams,score,score_cumulative = set_scores(g)
            previous_quarter = 1
            score = {teams[0]:0,teams[1]:0}
            for play in g.drives.plays():
                p = PlayParser(play,teams)
                play_score = p.play_points()
                if(p.is_null()!=True):
                    previous_quarter = p.quarter()
                    score=p.get_score(score,score_cumulative,previous_quarter)
                    plays.append([p.offense(),p.defense(),year,week,p.down(),p.distance(),p.time_in_half(),
                                  p.yards_to_goal(),p.offense_score(),p.defense_score(),p.play_points(),
                                  p.resulting_yards(),p.is_penalty(),p.is_completion(),p.is_interception(),
                                  p.is_reversed(),p.is_fumble(),p.is_pass()])
                if p.offense() in teams:
                    score[p.offense()]+=play_score
            for team in teams:
                games.append([team,teams[1-teams.index(team)],year,week]+weekly(g,team))

In [142]:
df_plays = pd.DataFrame(plays,columns=['Offense','Defense','Year','Week','Down','Distance',
                                       'TimeInHalf','YardsToGoal','OffenseScore','DefenseScore',
                                       'Points','Result','IsPenalty','IsCompletion',
                                       'IsInterception','IsReversed','IsFumble','IsPass'
                                      ]
                       )

In [143]:
df_games = pd.DataFrame(games,columns=['Offense','Defense','Year','Week','RushAttempt',
                                       'RushYards','RushTds','PassAttempt',
                                       'PassCompletions','PassYards','PassTds'
                                      ]
                       )

In [144]:
tempO = df_games.groupby(['Offense','Year']).sum()[['RushAttempt','RushYards',
                                                    'RushTds','PassAttempt',
                                                    'PassCompletions',
                                                    'PassYards','PassTds'
                                                    ]].reset_index()
tempD = df_games.groupby(['Defense','Year']).sum()[['RushAttempt','RushYards',
                                                    'RushTds','PassAttempt',
                                                    'PassCompletions',
                                                    'PassYards','PassTds'
                                                  ]].reset_index()

In [145]:
names = {'RushAttempt':'RushAttempt_Def','RushYards':'RushYards_Def',
         'RushTds':'RushTds_Def','PassAttempt':'PassAttempt_Def',
         'PassCompletions':'PassCompletions_Def','PassYards':'PassYards_Def',
         'PassTds':'PassTds_Def'}
tempD.rename(columns=names,inplace=True)

In [146]:
tempO['Year'] = tempO['Year']+1
tempD['Year'] = tempD['Year']+1

In [147]:
df_games = pd.merge(df_games,tempO,on=['Offense','Year'],how='left',suffixes={'_x','_Off'})
df_games = df_games.drop(['RushAttempt_x','RushYards_x','RushTds_x',
                          'PassAttempt_x','PassCompletions_x','PassYards_x',
                          'PassTds_x',
               ],axis=1)

In [148]:
df_games = pd.merge(df_games,tempD,on=['Defense','Year'],how='left')

In [149]:
dataset = pd.merge(df_plays,df_games,on=['Offense','Defense','Year','Week'],how='left')

In [150]:
dataset.dropna(inplace=True)
dataset['PassInt'] = dataset['IsPass']*1
dataset['OffYearRatio'] = dataset['PassAttempt_Off']/dataset['RushAttempt_Off']
dataset['DefYearRatio'] = dataset['PassAttempt_Def']/dataset['RushAttempt_Def']
dataset['ScoreDiff'] = dataset['OffenseScore']-dataset['DefenseScore']
dataset = dataset.reset_index(drop=True)
dataset.to_csv('AllVariables_temp.csv')