In [1]:
import csv
import collections
import numpy as np

In [2]:
data_dict = []
with open('NFLPlaybyPlay2015.csv', 'r') as csv_file:
    reader = csv.DictReader(csv_file)
    for line in enumerate(reader):
        data_dict.append(line[1])

In [3]:

games = collections.defaultdict(list)
for row in data_dict:
    games[row['GameID']].append(row)
games = list(games.values())

In [4]:
fields = ['PosTeamScore', 'DefTeamScore', 'TimeSecs', 'down', 'yrdline100', 'ydstogo']

In [5]:
def has_data_for_winner(play):
    try:
        int(play['DefTeamScore'])
        int(play['PosTeamScore'])
    except:
        return False
    if play['posteam'] == 'NA':
        return False
    if play['DefensiveTeam'] == 'NA':
        return False
    return True

def extract_features(play):
    
    features = []
    try:
        for field in fields:
            features.append(int(play[field]))
    except:
        return None
    return features

class Game:
    
    def __init__(self, game_data):
        self.game_data = game_data

    
    def get_winner(self):
        last_play = None
        for i in range(1,100):
            if has_data_for_winner(self.game_data[-i]):
                last_play = self.game_data[-i]
                break
                
        if int(last_play['DefTeamScore']) > int(last_play['PosTeamScore']):
            return last_play['DefensiveTeam']
        elif int(last_play['PosTeamScore']) > int(last_play['DefTeamScore']):
            return last_play['posteam']
        return None
    
    def plays_offensive_team_won(self):
        winner = self.get_winner()
        plays = []
        for play in self.game_data:
            if play['posteam'] == winner:
                plays.append(play)
        return plays
    
    def plays_defensive_team_won(self):
        winner = self.get_winner()
        plays = []
        for play in self.game_data:
            if play['DefensiveTeam'] == winner:
                plays.append(play)
        return plays
    
    def positive_features(self):
        features = []
        for play in self.plays_offensive_team_won():
            if extract_features(play):
                features.append(extract_features(play))
        return features
    
    def negative_features(self):
        features = []
        for play in self.plays_defensive_team_won():
            if extract_features(play):
                features.append(extract_features(play))
        return features
    

In [6]:
positive_examples = []
negative_examples = []
for game in games:
    positive_examples += Game(game).positive_features()
    negative_examples += Game(game).negative_features()
positive_examples = np.array(positive_examples)
negative_examples = np.array(negative_examples)

In [7]:
print(positive_examples.shape)
print(negative_examples.shape)

(19568, 6)
(19294, 6)


In [8]:
positive_examples

array([[   0,    0, 3481,    1,   66,   10],
       [   0,    0, 3445,    2,   64,    8],
       [   0,    0, 3404,    3,   66,   10],
       ..., 
       [  24,   29,   32,    2,    6,    6],
       [  30,   29,   28,    3,    6,    6],
       [  31,   29,    5,    1,   51,   10]])

In [9]:
negative_examples

array([[   0,    0, 3600,    1,   80,   10],
       [   0,    0, 3568,    2,   76,    6],
       [   0,    0, 3527,    3,   78,    8],
       ..., 
       [  29,   31,   24,    1,   80,   10],
       [  29,   31,   18,    1,   64,   10],
       [  29,   31,   12,    2,   58,    4]])