### Creating a simulation for baseball games, based on 538's riddler:
https://fivethirtyeight.com/features/can-you-turn-americas-pastime-into-a-game-of-yahtzee/

In [252]:
import random 
import numpy as np
import pandas as pd

#### First, I made a dice roller.  It outputs a list of 2 integers, from 1-6.  I had it print out the baseball action, which will be helpful for logging and debugging later on.

In [347]:
def roll_2_die(verbose=True):
    d = [random.randint(1,6), random.randint(1,6)]
    d = sorted(d)
    if verbose:
        if d in [[1,1]]:
            print 'rolled a double'
        elif d in [[1,2], [1,3], [1,4]]:
            print 'rolled a single'
        elif d in [[1,5]]:
            print 'rolled a base on error'    
        elif d in [[1,6]]:
            print 'rolled a base on balls'
        elif d in [[2,2,], [2,3], [2,4], [2,5]]:
            print 'rolled a strike'
        elif d in [[2,6]]:
            print 'rolled a foul out'
        elif d in [[3,3], [3,4], [3,5], [3,6]]:
            print 'rolled a out at 1st'
        elif d in [[4,4], [4,5], [4,6]]:
            print 'rolled a fly out'
        elif d in [[5,5]]:
            print 'rolled a double play'
        elif d in [[5,6]]:
            print 'rolled a triple'
        elif d in [[6,6]]:
            print 'rolled a home run'
        else:
            print 'error:', d
    
    return(d)

print'example output :', roll_2_die()


example output : rolled a strike
[2, 5]


#### I'm putting the running game stats in a dictionary: innings, outs, strikes etc.  I created an intialize game function for use in the multi-game simulation, so I could reset the game quickly.  

In [345]:
def initialize_game():
    game = {'_score': 0,
            '__inning': 1,
            '__outs': 0, 
            '__strikes':0, 
            'runner_on_1': False,
            'runner_on_2': False,
            'runner_on_3': False,
            }
    return(game)

game = initialize_game()
print 'the running game stats will look like this:',game

the running game stats will look like this: {'__inning': 1, '__strikes': 0, '_score': 0, '__outs': 0, 'runner_on_1': False, 'runner_on_2': False, 'runner_on_3': False}


#### Now I made a function that simulates 1 dice roll.  The game dictionary and the die values are inputs, and the function applies the appropriate increments to the stats of the game dictionary.   The riddle leaves some open questions about how to handle base runners.  For instance, it assumes that runners can score from 2nd on a single.  Fly out, foul out, and out on 1st are much less clear, so I've made some debatable assumptions as to when runners will and won't advance.

In [293]:
def roll_once(d, game):
    # 1,1 double
    if d in [[1,1]]:
        # runner on 3rd scores
        if game['runner_on_3']:
            game['_score'] += 1
            game['runner_on_3'] = False
        # runner on 2nd scores
        if game['runner_on_2']:
            game['_score'] += 1
            game['runner_on_2'] = False
        # runner on 1st advances to 3rd
        if game['runner_on_1']:
            game['runner_on_3'] = True
        # hitter is now on 2nd
        game['runner_on_2'] = True
         # reset strikes
        game['__strikes'] = 0       
    # singles
    elif d in [[1,2], [1,3], [1,4]]:
        # runner on 3rd scores
        if game['runner_on_3']:
            game['_score'] += 1
            game['runner_on_3'] = False
        # runner on 2nd scores
        if game['runner_on_2']:
            game['_score'] += 1
            game['runner_on_2'] = False
        # runner on 1st advances
        if game['runner_on_1']:
            game['runner_on_2'] = True
        # hitter is on 1st
        game['runner_on_1'] = True
        # reset strikes
        game['__strikes'] = 0
    # base on error
    elif d in [[1,5]]:
        # runner on 3rd scores
        if game['runner_on_3']:
            game['_score'] += 1
            game['runner_on_3'] = False
        # runner on 2nd advances to 3rd
        if game['runner_on_2']:
            game['runner_on_2'] = False
            game['runner_on_3'] = True
        # runner on 1st advances
        if game['runner_on_1']:
            game['runner_on_2'] = True
        # hitter is on 1st
        game['runner_on_1'] = True
        # reset strikes
        game['__strikes'] = 0       
    # base on balls
    elif d in [[1,6]]:
        # walked, with runner on first will advance some runners
        if game['runner_on_1']:
            # walked, with bases loaded just increments score, bases still loaded
            if game['runner_on_3'] and game['runner_on_2']:
                game['_score'] += 1
            # walked with 1st and 2nd, now loaded
            elif game['runner_on_2']:
                game['runner_on_3'] = True
            # walked with runner on 1st, now 1st and 2nd
            else:
                game['runner_on_2'] = True
        # walked, without runner on first will just put hitter on first
        else:
            game['runner_on_1'] = True   
        # reset strikes
        game['__strikes'] = 0 
    # strikes
    elif d in [[2,2,], [2,3], [2,4], [2,5]]:
        game['__strikes'] = game['__strikes'] + 1
        # if strikes = 3, add an out and reset
        if game['__strikes'] == 3:
            game['__outs'] += 1
            game['__strikes'] = 0
    # foul out - advancing bases 30 % of the time
    elif d in [[2,6]]:
        # tag up occasionally advancing runners if under 2 outs
        if random.randint(1,10) <= 3 and game['__outs'] < 2:
             # runner on 3rd scores
            if game['runner_on_3']:
                game['_score'] += 1
                game['runner_on_3'] = False
            # runner on 2nd advances
            if game['runner_on_2']:
                game['runner_on_2'] = False
                game['runner_on_3'] = True
            # runner on 1st advances
            if game['runner_on_1']:
                game['runner_on_2'] = True   
        # increment the outs
        game['__outs'] += 1
        game['__strikes'] = 0
    # out at 1st
    elif d in [[3,3], [3,4], [3,5], [3,6]]:
        # The most debated logic..., runners advance 80% of the time
        # However, third base only advances 20% of the time if bases weren't loaded
        if (game['runner_on_3'] # on third
              and not(game['runner_on_2'] and game['runner_on_1']) # but not bases loaded
              and game['__outs'] < 2):
            if random.randint(1,10) <= 2:
                game['_score'] += 1
                game['runner_on_3'] = False                
                # runner on 2nd advances
                if game['runner_on_2']:
                    game['runner_on_2'] = False
                    game['runner_on_3'] = True 
                # runner on 1st advances
                if game['runner_on_1']:
                    game['runner_on_2'] = True
        elif game['__outs'] < 2 and random.randint(1,10) <= 8: # advance all runners 80% of the time 
            # runner on 3rd scores
            if game['runner_on_3']:
                game['_score'] += 1
                game['runner_on_3'] = False
            # runner on 2nd advances
            if game['runner_on_2']:
                game['runner_on_2'] = False
                game['runner_on_3'] = True
            # runner on 1st advances
            if game['runner_on_1']:
                game['runner_on_2'] = True              
        # increment the outs
        game['__outs'] += 1
        game['__strikes'] = 0
    # fly out -- advancing bases 60 % of the time
    elif d in [[4,4], [4,5], [4,6]]:
        # tag up occasionally advancing runners if under 2 outs
        if random.randint(1,10) <= 6 and game['__outs'] < 2:
             # runner on 3rd scores
            if game['runner_on_3']:
                game['_score'] += 1
                game['runner_on_3'] = False
            # runner on 2nd advances
            if game['runner_on_2']:
                game['runner_on_2'] = False
                game['runner_on_3'] = True
            # runner on 1st advances
            if game['runner_on_1']:
                game['runner_on_2'] = True   
        # increment the outs
        game['__outs'] += 1
        game['__strikes'] = 0
    # double play
    elif d in [[5,5]]:
        # increment the outs
        game['__outs'] += 2
        game['__strikes'] = 0
    # triple
    elif d in [[5,6]]:
        # runner on 3rd scores
        if game['runner_on_3']:
            game['_score'] += 1
            game['runner_on_3'] = False
        # runner on 2nd scores
        if game['runner_on_2']:
            game['_score'] += 1
            game['runner_on_2'] = False
        # runner on 1st scores
        if game['runner_on_1']:
            game['_score'] += 1
            game['runner_on_1'] = False
        # hitter is now on 3rd
        game['runner_on_3'] = True
         # reset strikes
        game['__strikes'] = 0  
    # home run
    elif d in [[6,6]]:
        # runner on 3rd scores
        if game['runner_on_3']:
            game['_score'] += 1
            game['runner_on_3'] = False
        # runner on 2nd scores
        if game['runner_on_2']:
            game['_score'] += 1
            game['runner_on_2'] = False
        # runner on 1st scores
        if game['runner_on_1']:
            game['_score'] += 1
            game['runner_on_1'] = False
        # hitter scores
        game['_score'] += 1
         # reset strikes
        game['__strikes'] = 0  
    else:
        print 'error:', d
    
    # settle the outs/innings
    if game['__outs'] > 2:
        game['__inning'] += 1
        game['__outs'] = 0
        game['__strikes'] = 0
        game['runner_on_1'] = False
        game['runner_on_2'] = False
        game['runner_on_3'] = False
    return(game)
    

#### Now that I can simulate one roll, this function will simulate a full 9 innings.

In [346]:
def play_1_game(verbose=False):
    game = initialize_game()
    count = 0
    while game['__inning'] <= 9:
        count += 1
        d = roll_2_die(verbose=verbose)
        game = roll_once(d, game)
        if verbose:
            print 'game stats after roll {}:'.format(count), game
    return(game)  

play_1_game(verbose=True)

rolled a single
game stats after roll 1: {'__inning': 1, '__strikes': 0, '_score': 0, '__outs': 0, 'runner_on_1': True, 'runner_on_2': False, 'runner_on_3': False}
rolled a out at 1st
game stats after roll 2: {'__inning': 1, '__strikes': 0, '_score': 0, '__outs': 1, 'runner_on_1': True, 'runner_on_2': False, 'runner_on_3': False}
rolled a fly out
game stats after roll 3: {'__inning': 1, '__strikes': 0, '_score': 0, '__outs': 2, 'runner_on_1': True, 'runner_on_2': False, 'runner_on_3': False}
rolled a strike
game stats after roll 4: {'__inning': 1, '__strikes': 1, '_score': 0, '__outs': 2, 'runner_on_1': True, 'runner_on_2': False, 'runner_on_3': False}
rolled a double play
game stats after roll 5: {'__inning': 2, '__strikes': 0, '_score': 0, '__outs': 0, 'runner_on_1': False, 'runner_on_2': False, 'runner_on_3': False}
rolled a base on error
game stats after roll 6: {'__inning': 2, '__strikes': 0, '_score': 0, '__outs': 0, 'runner_on_1': True, 'runner_on_2': False, 'runner_on_3': False

{'__inning': 10,
 '__outs': 0,
 '__strikes': 0,
 '_score': 23,
 'runner_on_1': False,
 'runner_on_2': False,
 'runner_on_3': False}

#### That works, now simulate a bunch of games.  I logged the end score of each game to a list.  Then we can see what the average value is.

In [348]:
number_of_games = 1000000
scores = []
for g in range(0,number_of_games):
    game = play_1_game(verbose=False)
    scores.append(game['_score'])

print 'the average score for {} games is: {}'.format(number_of_games, float(sum(scores)) / len(scores))

the average score for 1000000 games is: 16.232338
