# Simulating NFL games with Monte Carlo


I want to model NFL games using teams's past performance. The idea is to take the game state (offense, defense, score, and clock) and put together a portfolio of what those teams have done in the past, then randomly choosing one of those past outcomes to move the game forward. Picture stacking rocks to build a wall. You have to choose a bunch of rocks and put them together one at a time until eventually you're left with something resembling a wall. The same principle is here. A football game is built of drives. One drive happens, then another, and another, until the teams run out of time.

In [1]:
# Load relevant packages
import pandas as pd
import numpy as np
import math
from time import time
import random
from joblib import Parallel, delayed
import multiprocessing
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
datapath = "/home/welced12/git/football_analytics/data/"
# Read drive-level data from csv
alldrives = pd.read_csv(datapath+'espn_drives2009-2017.csv')

# Load game-level data from csv
gamedata = pd.read_csv(datapath+'espn_gamedata2009-2017.csv')

# Include time information on these drives
alldrives = alldrives.merge(
                right=gamedata[['gameId','season','week']],
                how='left',
                left_on='gameId',
                right_on='gameId')

# Print out what we're working with
alldrives.sample(3)

Unnamed: 0.1,Unnamed: 0,away,away_score_after,away_score_before,drive,home,home_score_after,home_score_before,offense,plays,...,FG,punt,turnover,EoH,secs_rem,starting_fieldposition,time_in_secs,left_in_half,season,week
44867,44867,JAX,9,9,18,OAK,23,23,JAX,3,...,0,1,0,0,1048.0,0.0,131,1048.0,2016,7
7924,7924,GB,10,10,8,MIA,7,7,GB,8,...,0,1,0,0,2336.0,-33.0,298,536.0,2010,6
7550,7550,BUF,20,20,15,JAX,27,27,BUF,3,...,0,1,0,0,982.0,-35.0,101,982.0,2010,5


In [6]:
alldrives.head(1).iloc[ alldrives.head(1).index[0] ]

Unnamed: 0                          0
away                              PIT
away_score_after                    0
away_score_before                   0
drive                               0
home                              TEN
home_score_after                    0
home_score_before                   0
offense                           PIT
plays                               3
result                           Punt
time                             1:44
yds_gained                          2
gameId                      290910023
uid                       290910023-0
TD                                  0
FG                                  0
punt                                1
turnover                            0
EoH                                 0
secs_rem                         3593
starting_fieldposition             -8
time_in_secs                      104
left_in_half                     1793
season                           2009
week                                1
Name: 0, dty

# The necessary functions

In [43]:
def get_possible_drives(home,away,home_poss,time_rem,season):

    # Set some keywords for the filter
    if home_poss:
        off_team = home
        def_team = away  
    else:
        off_team = away
        def_team = home
    
    # Filter for drives for this offense and defense
    teamdrives = alldrives.loc[
        # Condition 1: team is the offense
        ( alldrives.offense.astype(str) == off_team )
        
        | # OR
        
        # Condition 2:
        (
            (   # team is not the offense
                alldrives.offense.astype(str) != def_team
            )
            & # AND
            (   # team is either home or away
                (alldrives.home.astype(str) == def_team) |
                (alldrives.away.astype(str) == def_team)
            )
        )
    ]

    
    # Filter for drives from the past 5 seasons
    teamdrives = teamdrives.loc[ (teamdrives.season >= season-5) ]

    return teamdrives

In [47]:
# Define a class that constitutes a game
class football_game:
    """Class for representing a football game"""
    def __init__(self, home ,away, season=2018, week=1, quiet=False):

        self.home = home
        self.away = away
        self.quiet = quiet
        self.season = season
        self.week = week
        self.half = 1
        self.time_rem = 1800
        self.home_score = 0
        self.away_score = 0
        self.drive_base_weights = {}
        
        # Decide which team gets the ball to start
        coin = random.randint(1,2)
        if coin == 1:
            self.home_poss = True
        else:
            self.home_poss = False

            
    def get_drive_score(self, drive):
        
        # Weight for recency
        if self.season == drive['season']:
            # Down to 1/2 for games from this season
            w_age = 1 - (self.week - drive['week']) / 32
        else:
            # 1/2 on downward for games previous seasons
            w_age = 1 / ( 2*(self.season - drive['season']) )
        
        # Weight for home/away
        w_homeaway = 0.75
        if (drive['home'] == self.home) or (drive['away'] == self.away):
            w_homeaway = 1
        
        # Weight to favor rematch
        w_matchup = 0.75
        teams = [drive['home'],drive['away']]
        if (self.home in teams) and (self.away in teams):
            w_matchup = 1
        
        # Weight for time remaining
        w_time = 0
        if self.time_rem > 300:
            # Most of the time, time remaining won't dictate decisions.
            w_time = 1
        elif (not drive.EoH==1) or (drive.time_in_secs < self.time_rem + 10):
            numerator = drive['left_in_half'] - self.time_rem
            # Scale variance in Gaussian by time remaining.
            # More possibilities with more time left
            stdev = self.time_rem / 3
            w_time = math.exp( -(numerator)**2 / (2*stdev**2) )
              
        # Weight for score difference
        w_score = 1
        if self.home_poss:
            curr_score_diff = self.home_score - self.away_score
        else:
            curr_score_diff = self.away_score - self.home_score
        if drive['offense'] == drive['home']:
            hist_score_diff = drive['home_score_before'] - drive['away_score_before']
        else:
            hist_score_diff = drive['away_score_before'] - drive['home_score_before']
        w_score = math.exp( -(curr_score_diff - hist_score_diff)**2 / 98 )
        
        # Finally, set drive fitness as a product of weights
        fitness = w_age * w_homeaway * w_matchup * w_time * w_score
        return fitness
    
    
    def get_candidate_drive(self):
        poss_drives = self.away_drives
        if self.home_poss:
            poss_drives = self.home_drives
            
        # Choose subset of drives based on time remaining in 1st/2nd half
        if self.time_rem > 300:
            poss_drives = poss_drives[ poss_drives.left_in_half >= 240 ]
        elif self.half == 1:
            poss_drives = poss_drives[
                (poss_drives.left_in_half < 360) &
                (poss_drives.secs_rem > 1800)
            ]
        else:
            poss_drives = poss_drives[
                (poss_drives.left_in_half < 360) &
                (poss_drives.secs_rem < 1800)
            ]
            
        return poss_drives.sample(1)
    
    
    def get_next_drive(self):
        """
        Returns a Series describing the next drive
        """
        # debug
        print("\nGame state")
        print(self.game_sit_dict())
        
        accepted = False
        while not accepted:
            # Choose a candidate drive and score it's fitness
            candidate = self.get_candidate_drive()
            fitness_score = self.get_drive_score(candidate.loc[candidate.index[0], : ])
            # Choose whether to accept candidate
            r = random.random()
            if r < fitness_score:
                accepted = True
                
            # Debug printing
            cols = ['offense', 'plays', 'time', 'result']
            print("Candidate Drive")
            print(candidate[cols])
            print(" ^ Fitness = ",fitness_score)
            if accepted:
                print(" ^ Drive accepted")
            else:
                print(" ^ Drive not accepted")
                
        return candidate.loc[ candidate.index[0], : ] # return Series
    
    
    def game_sit_dict(self):
        # Figure out which team has possession
        if self.home_poss:
            possessor = self.home
        else:
            possessor = self.away
        
        sit_dict = {'home':self.home,
                    'away':self.away,
                    'offense':possessor,
                    'half':self.half,
                    'time_rem':self.time_rem,
                    'home_score':self.home_score,
                    'away_score':self.away_score}
        return sit_dict
    
    
    def record_drive(self, drive, drive_num=1):
        """
        Given a drive, update the proper quantities, 
        assuming dataframes for chosen drives and game history have 
        already been created
        """
        # Get gamestate before this drive
        gamestate = self.game_sit_dict()
        
        # Clock changes
        gamestate_delta = {'time':drive.time_in_secs}
        if drive.time_in_secs < 10:
            gamestate_delta['time'] = 10
        
        # Score changes
        # Home team in selected drive might not be home team in sim. game
        if self.home_poss and (drive.offense == drive.home):
            flip = False
        elif self.home_poss and (drive.offense == drive.away):
            flip = True
        elif (not self.home_poss) and (drive.offense == drive.home):
            flip = True
        elif (not self.home_poss) and (drive.offense == drive.away):
            flip = False
        else:
            print("Something went wrong in determining flipped possession")
            
        if not flip:
            gamestate_delta['home_score'] = drive.home_score_after - drive.home_score_before
            gamestate_delta['away_score'] = drive.away_score_after - drive.away_score_before
        else:
            gamestate_delta['away_score'] = drive.home_score_after - drive.home_score_before
            gamestate_delta['home_score'] = drive.away_score_after - drive.away_score_before

        # Check for negative values in score delta
        scores_delta = (gamestate_delta['home_score'], gamestate_delta['away_score'])
        if sum([1 if (val < 0 or val > 8) else 0 for val in scores_delta]) > 0:
            # Recalculate score change based on drive result
            # Default to zero points
            gamestate_delta['home_score'] = 0
            gamestate_delta['away_score'] = 0
            if (drive.FG == 1):
                if self.home_poss:
                    gamestate_delta['home_score'] = 3
                else:
                    gamestate_delta['away_score'] = 3
            elif (drive.TD == 1):
                if self.home_poss:
                    gamestate_delta['home_score'] = 7
                else:
                    gamestate_delta['away_score'] = 7
                
        
        # Figure out whether possession arrow changes. Default True
        gamestate_delta['poss'] = True
        if ( (self.home_poss) & 
             (gamestate_delta['away_score'] != 0) ):
            gamestate_delta['poss'] = False
        elif ( (not self.home_poss) &
               (gamestate_delta['home_score'] != 0) ):
            gamestate_delta['poss'] = False
                    
        
        # Add entry to simulated game history
        this_series = pd.Series(gamestate)
        this_series['home_score_after'] = self.home_score + gamestate_delta['home_score']
        this_series['away_score_after'] = self.away_score + gamestate_delta['away_score']
        this_series['result'] = drive.result
        this_series['time'] = gamestate_delta['time']
        
        if drive_num == 1:  # Need to start gamestate dataFrame
            self.gamestate_df = pd.Series.to_frame(this_series)
        else:            # Add this series to gamestate dF
            series_df = pd.Series.to_frame(this_series)
            dfs = [ self.gamestate_df, series_df ]
            self.gamestate_df = pd.concat( dfs, axis=1 )
        
        # Update the game's state vars
        self.home_score += gamestate_delta['home_score']
        self.away_score += gamestate_delta['away_score']
        self.time_rem -= gamestate_delta['time']
        if gamestate_delta['poss']:
            self.home_poss = not self.home_poss

# Function to simulate one game

In [39]:
def simulate_game( home, away, **kwargs ):
    
        # Need new wrapper for simulating a game
    if not 'season' in kwargs:
        kwargs['season'] = 2018
    if not 'week' in kwargs:
        kwargs['week'] = 1
    if not 'quiet' in kwargs:
        kwargs['quiet'] = False
        
    newgame = football_game( home, away, **kwargs )
    
    # Assign possible drives for this game
    newgame.home_drives = get_possible_drives( home, away, 
                                               True, 1800, 
                                               newgame.season )
    newgame.away_drives = get_possible_drives( home, away, 
                                               False, 1800, 
                                               newgame.season)
    
    # Choose the first drive
    first_drive = newgame.get_next_drive()
    
    # Make DataFrame for drive history, and update the game object
    newgame.drives_selected = pd.Series.to_frame(first_drive)
    drive_num = 1
    newgame.record_drive( first_drive, drive_num )
    
    for half in (1,2):
        newgame.half = half
        if half > 1:
            newgame.time_rem = 1800
            
        end_of_half = False
        while (not end_of_half) and (newgame.time_rem > 0):
            drive_num += 1
            
            # Select a new drive
            this_drive = newgame.get_next_drive()
            newgame.drives_selected = pd.concat( [newgame.drives_selected,
                                                  this_drive], axis=1 )
            
            # Update the game state
            newgame.record_drive( this_drive, drive_num )
            
            # Check for end of Half
            if this_drive.EoH == 1:
                end_of_half = True
                
        
    # Post-game, need to transpose the DataFrames
    newgame.drives_selected = newgame.drives_selected.transpose()
    newgame.gamestate_df = newgame.gamestate_df.transpose()
    if newgame.home_score > newgame.away_score:
        newgame.winner = newgame.home
    elif newgame.away_score > newgame.home_score:
        newgame.winner = newgame.away
    else:
        newgame.winner = "OT"
            
    newgame.result = {'home':newgame.home, 
                      'home_score':newgame.home_score, 
                      'away':newgame.away, 
                      'away_score':newgame.away_score,
                      'winner':newgame.winner}
    
    return newgame

In [12]:
# Wrapper for single game
def sim_one_game(home, away, **kwargs):
    game_obj = simulate_game(home, away, **kwargs)
    print(game_obj.result)
    return game_obj

In [48]:
# Try it out
g = sim_one_game( "NO", "NWE" )


Game state
{'home': 'NO', 'away': 'NWE', 'offense': 'NWE', 'half': 1, 'time_rem': 1800, 'home_score': 0, 'away_score': 0}
Candidate Drive
      offense  plays  time result
50954     CHI      3  1:56   Punt
 ^ Fitness =  0.14637659653707247
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
35949     ATL      5  3:08   Punt
 ^ Fitness =  0.04869628060817796
 ^ Drive not accepted
Candidate Drive
      offense  plays  time        result
34895     PIT      3  1:42  Interception
 ^ Fitness =  0.09279823156675629
 ^ Drive not accepted
Candidate Drive
      offense  plays  time     result
29077     SEA      8  3:47  Touchdown
 ^ Fitness =  0.0012659913111592433
 ^ Drive not accepted
Candidate Drive
      offense  plays  time     result
44490     CAR      3  1:04  Touchdown
 ^ Fitness =  0.019031524205148663
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
51473     BUF      5  2:44   Punt
 ^ Fitness =  0.3185121812131172
 ^ Drive not accepted
Candi

      offense  plays  time      result
45905      NO      8  2:27  Blocked FG
 ^ Fitness =  0.03423886557083248
 ^ Drive not accepted
Candidate Drive
      offense  plays  time       result
46385      NO      5  0:55  End of Half
 ^ Fitness =  0.020293938546540102
 ^ Drive not accepted
Candidate Drive
      offense  plays  time     result
28283      NO      3  1:36  Touchdown
 ^ Fitness =  0.005227810705544121
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
39421      NO      6  3:41   Punt
 ^ Fitness =  4.693061213891678e-08
 ^ Drive not accepted
Candidate Drive
      offense  plays  time      result
45470      NO      6  1:05  Field Goal
 ^ Fitness =  0.025630664250446702
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
43638      NO      7  3:40   Punt
 ^ Fitness =  4.175299415087049e-08
 ^ Drive not accepted
Candidate Drive
      offense  plays  time        result
31975      NO      2  0:41  Interception
 ^ Fitness =  6.603202313410927

 ^ Fitness =  0.06505626512758776
 ^ Drive accepted

Game state
{'home': 'NO', 'away': 'NWE', 'offense': 'NWE', 'half': 2, 'time_rem': 1431, 'home_score': 16, 'away_score': 10}
Candidate Drive
      offense  plays  time result
38138     PHI      8  4:14  Downs
 ^ Fitness =  0.08657116552564971
 ^ Drive not accepted
Candidate Drive
      offense  plays  time     result
34901     PIT     13  7:04  Touchdown
 ^ Fitness =  0.04102169415940024
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
47215      TB      5  2:57   Punt
 ^ Fitness =  0.05454821382115278
 ^ Drive not accepted
Candidate Drive
      offense  plays  time     result
52727     CAR     10  5:06  Touchdown
 ^ Fitness =  0.2783946947002689
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
53725     ATL      5  2:51   Punt
 ^ Fitness =  0.23888413590983787
 ^ Drive not accepted
Candidate Drive
      offense  plays  time result
46376     LAR      5  2:51   Punt
 ^ Fitness =  0.09739256