In [1]:
import os
import re
import json
from copy import deepcopy
from collections import defaultdict
from typing import Any, Dict, List

import sys

sys.path.insert(0, '../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_pitch_events

In [2]:
def create_graph(teams):
    areas = ['home', 'away']

    possible_outs = ['0', '1', '2']

    possible_states = [
        [0, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [1, 1, 0],
        [0, 0, 1],
        [1, 0, 1],
        [0, 1, 1],
        [1, 1, 1],
    ]

    graph = {}
    for team in teams:
        graph[team] = {
            area: {
                out: {
                    ''.join(map(str, state)): { 'runs': 0, 'types': defaultdict(lambda: 0) }
                    for state in possible_states
                }
                for out in possible_outs
            }
            for area in areas
        }

    return graph

In [3]:
def correct_current_state_for_before_pitch_event(bases, moves):
    for move in moves:
        if move['type'] == 'advanced':
            starting_at_mapping = {
                'home': 2,
                'third': 1,
                'second': 0,
            }

            going_to_mapping = {
                'home': -1,
                'third': 2,
                'second': 1
            }

            bases[starting_at_mapping[move['at']]] = 0

            going_to = going_to_mapping[move['at']]
            if going_to != -1:
                bases[going_to] = 1
        if move['type'] == 'out':
            starting_at_mapping = {
                'first': 0,
                'second': 1,
                'third': 2,
            }
            
            bases[starting_at_mapping[move['at']]] = 0

    return bases

graph = create_graph(['ari'])
for file in os.listdir('../../data/mlb/pbp/'):

    with open(f'../../data/mlb/pbp/{file}', 'r', encoding='UTF8') as pbp_input:
        data = json.load(pbp_input)

    pitch_events = get_pitch_events(data)

    team_lookup = {
        data['home']: 'home',
        data['away']: 'away',
    }

    for period in data['periods']:
        outs = 0
        bases = [0, 0, 0]

        atBat = period['atBat']
        events = period['events']

        for i, event in enumerate(events):
            description = event['desc']
            entities = event['entities']
            current_state_of_bases = bases.copy()

            if entities['type'] == 'intentionally walked':
                if bases[0] == 1:
                    if bases[1] == 1:
                        bases[2] = 1 ## move 2nd up to 3rd
                    else:
                        bases[1] = 1 ## move 1st up to 2nd
                else:
                    bases[0] = 1 ## take 1st
            elif 'pitches' in event:
                for pitch in event['pitches'][:-1]:
                    if current_state_of_bases != pitch['result']['bases']:
                        ## update for base running events while pitching
                        current_state_of_bases = pitch['result']['bases'].copy()

                last_pitch = event['pitches'][-1]

                if 'beforePitchEvent' in last_pitch['result']:
                    before_pitch_event_entities = pitch_events[last_pitch['result']['beforePitchEvent']]['entities']
                    if 'moves' in before_pitch_event_entities:
                        moves = before_pitch_event_entities['moves']
                    elif before_pitch_event_entities['type'] == 'picked off':
                        moves = [{ 'type': 'out', 'at': before_pitch_event_entities['at'] }]

                    current_state_of_bases = correct_current_state_for_before_pitch_event(
                        current_state_of_bases,
                        moves
                    )
                        
                bases = last_pitch['result']['bases'].copy()

            out_key = str(outs)
            state_key = ''.join(map(str, current_state_of_bases))

            if not atBat in graph:
                graph.update(create_graph([atBat]))

            is_info = entities['type'] in ['after-pitch', 'before-pitch', 'sub-p', 'sub-f']
            if not is_info:
                area = team_lookup[atBat]
                item = graph[atBat][area][out_key][state_key]

                ## runs generated from event
                item['runs'] += entities['runs'] if 'runs' in entities else 0

                ## attempts at it
                item['types'][entities['type']] += 1

            outs += entities['outs'] if 'outs' in entities else 0

In [4]:
def slim_graph_down(graph_to_slim):
    keys_to_delete = []
    for team in graph_to_slim.keys():
        for area in graph_to_slim[team].keys():
            for out in graph_to_slim[team][area].keys():
                for state in graph_to_slim[team][area][out].keys():
                    total = sum(graph_to_slim[team][area][out][state]['types'].values())
                    if total == 0:
                        keys_to_delete.append((team, area, out, state))

    for team, area, out, state in keys_to_delete:
        del graph_to_slim[team][area][out][state]

    return graph_to_slim

slim_graph_down(graph)

{'ari': {'home': {'0': {}, '1': {}, '2': {}},
  'away': {'0': {}, '1': {}, '2': {}}},
 'lad': {'home': {'0': {}, '1': {}, '2': {}},
  'away': {'0': {'000': {'runs': 1,
     'types': defaultdict(<function __main__.create_graph.<locals>.<dictcomp>.<dictcomp>.<dictcomp>.<lambda>()>,
                 {'doubled': 2,
                  'grounded out': 3,
                  'flied out': 2,
                  'struck out': 5,
                  'walked': 3,
                  'homered': 1,
                  'popped out': 1,
                  'lined out': 2})},
    '100': {'runs': 0,
     'types': defaultdict(<function __main__.create_graph.<locals>.<dictcomp>.<dictcomp>.<dictcomp>.<lambda>()>,
                 {'walked': 2, 'wild pitch': 1})},
    '010': {'runs': 0,
     'types': defaultdict(<function __main__.create_graph.<locals>.<dictcomp>.<dictcomp>.<dictcomp>.<lambda>()>,
                 {'lined out': 1, 'grounded out': 1, 'walked': 1})},
    '110': {'runs': 1,
     'types': defaultdict(<func

In [5]:
def compute_likelihoods(graph):
    graph_copy = slim_graph_down(deepcopy(graph))

    for team in graph_copy.keys():
        for area in graph_copy[team].keys():
            for out in graph_copy[team][area].keys():
                for state in graph_copy[team][area][out].keys():
                    runs = graph_copy[team][area][out][state]['runs']
                    total = sum(graph_copy[team][area][out][state]['types'].values())
                    graph_copy[team][area][out][state] = 0.0 if runs == 0 else round(float(runs) / total, 3)
    
    return graph_copy

compute_likelihoods(graph)

{'ari': {'home': {'0': {}, '1': {}, '2': {}},
  'away': {'0': {}, '1': {}, '2': {}}},
 'lad': {'home': {'0': {}, '1': {}, '2': {}},
  'away': {'0': {'000': 0.053,
    '100': 0.0,
    '010': 0.0,
    '110': 0.25,
    '011': 0.0,
    '111': 1.0},
   '1': {'000': 0.188,
    '100': 0.0,
    '010': 0.0,
    '110': 0.0,
    '101': 0.5,
    '011': 0.6,
    '111': 0.333},
   '2': {'000': 0.0,
    '100': 0.0,
    '010': 0.0,
    '110': 0.0,
    '001': 0.0,
    '111': 0.0}}},
 'min': {'home': {'0': {'000': 0.036,
    '100': 0.2,
    '010': 0.0,
    '110': 0.0,
    '011': 0.0},
   '1': {'000': 0.068,
    '100': 0.0,
    '010': 0.0,
    '110': 0.4,
    '101': 0.333,
    '011': 0.0},
   '2': {'000': 0.053,
    '100': 0.0,
    '010': 0.2,
    '110': 0.0,
    '101': 0.2,
    '011': 0.0,
    '111': 2.0}},
  'away': {'0': {'000': 0.0, '100': 0.182, '010': 1.0, '110': 0.0, '011': 0.0},
   '1': {'000': 0.0,
    '100': 0.143,
    '010': 0.0,
    '110': 0.25,
    '101': 1.0,
    '011': 1.0,
    '111': 1.0}