In [1]:
import json
from copy import deepcopy
from typing import Any, Dict, List

import sys

sys.path.insert(0, '../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_pitch_events

In [2]:
def create_graph(teams):
    possible_states = [
        [0, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [1, 1, 0],
        [0, 0, 1],
        [1, 0, 1],
        [0, 1, 1],
        [1, 1, 1],
    ]

    possible_outs = [0, 1, 2]

    graph = {}

    for team in teams:
        graph[team] = { str(out): { ''.join(map(str, state)): { 'a': 0, 'r': 0 } for state in possible_states } for out in possible_outs }

    return graph

In [3]:
def correct_current_state_for_before_pitch_event(bases, moves):
    starting_at_mapping = {
        'home': 2,
        'third': 1,
        'second': 0,
    }

    going_to_mapping = {
        'home': -1,
        'third': 2,
        'second': 1
    }

    for move in moves:
        if move['type'] == 'advanced':
            bases[starting_at_mapping[move['at']]] = 0

            going_to = going_to_mapping[move['at']]
            if going_to != -1:
                bases[going_to] = 1

    return bases

games = [
    '401354262',
    '401354276',
    '401354291',
    '401354536'
]

graph = create_graph(['ari'])

for game in games:

    with open(f'../../data/mlb/pbp/pbp_{game}.json', 'r', encoding='UTF8') as pbp_input:
        data = json.load(pbp_input)

    pitch_events = get_pitch_events(data)
    
    for period in data['periods']:
        outs = 0
        bases = [0,0,0]

        atBat = period['atBat']
        if not atBat in graph:
            continue

        events = period['events']
        for i, event in enumerate(events):
            description = event['desc']
            entities = event['entities']
            current_state_of_bases = bases.copy()

            if entities['type'] == 'intentionally walked':
                if bases[0] == 1:
                    if bases[1] == 1:
                        bases[2] = 1 ## move 2nd up to 3rd
                    else:
                        bases[1] = 1 ## move 1st up to 2nd
                else:
                    bases[0] = 1 ## take 1st
            elif 'pitches' in event:
                for pitch in event['pitches'][:-1]:
                    if current_state_of_bases != pitch['result']['bases']:
                        ## update for base running events while pitching
                        current_state_of_bases = pitch['result']['bases'].copy()

                last_pitch = event['pitches'][-1]

                if 'beforePitchEvent' in last_pitch['result']:
                    current_state_of_bases = correct_current_state_for_before_pitch_event(
                        current_state_of_bases,
                        pitch_events[last_pitch['result']['beforePitchEvent']]['entities']['moves']
                    )
                        
                bases = last_pitch['result']['bases'].copy()

            if not ('type' in event and event['type'] in ['after-pitch', 'before-pitch']):
                if atBat in graph:
                    out_key = str(outs)
                    state_key = ''.join(map(str, current_state_of_bases))

                    item = graph[atBat][out_key][state_key]

                    ## runs generated from event
                    item['r'] += entities['runs'] if 'runs' in entities else 0

                    ## attempts at it
                    item['a'] += 1

            outs += entities['outs'] if 'outs' in entities else 0

In [4]:
graph

{'ari': {'0': {'000': {'a': 78, 'r': 0},
   '100': {'a': 7, 'r': 0},
   '010': {'a': 0, 'r': 0},
   '110': {'a': 0, 'r': 0},
   '001': {'a': 0, 'r': 0},
   '101': {'a': 1, 'r': 1},
   '011': {'a': 1, 'r': 1},
   '111': {'a': 0, 'r': 0}},
  '1': {'000': {'a': 32, 'r': 1},
   '100': {'a': 12, 'r': 1},
   '010': {'a': 3, 'r': 1},
   '110': {'a': 9, 'r': 5},
   '001': {'a': 2, 'r': 0},
   '101': {'a': 3, 'r': 2},
   '011': {'a': 0, 'r': 0},
   '111': {'a': 0, 'r': 0}},
  '2': {'000': {'a': 24, 'r': 1},
   '100': {'a': 6, 'r': 0},
   '010': {'a': 2, 'r': 0},
   '110': {'a': 4, 'r': 0},
   '001': {'a': 2, 'r': 0},
   '101': {'a': 3, 'r': 0},
   '011': {'a': 1, 'r': 0},
   '111': {'a': 0, 'r': 0}}}}

In [5]:
def slim_graph_down(graph_to_slim):
    keys_to_delete = []
    for team in graph_to_slim.keys():
        for out in graph_to_slim[team].keys():
            for state in graph_to_slim[team][out].keys():
                if graph_to_slim[team][out][state]['a'] == 0:
                    keys_to_delete.append((team, out, state))

    for team, out, state in keys_to_delete:
        del graph_to_slim[team][out][state]

    return graph_to_slim

def compute_likelihoods(graph):
    graph_copy = slim_graph_down(deepcopy(graph))

    for team in graph_copy.keys():
        for out in graph_copy[team].keys():
            for state in graph_copy[team][out].keys():
                if graph_copy[team][out][state]['r'] == 0:
                    graph_copy[team][out][state] = 0.0
                else:
                    graph_copy[team][out][state] = round(float(graph_copy[team][out][state]['r']) / float(graph_copy[team][out][state]['a']), 3)
    
    return graph_copy

compute_likelihoods(graph)

{'ari': {'0': {'000': 0.0, '100': 0.0, '101': 1.0, '011': 1.0},
  '1': {'000': 0.031,
   '100': 0.083,
   '010': 0.333,
   '110': 0.556,
   '001': 0.0,
   '101': 0.667},
  '2': {'000': 0.042,
   '100': 0.0,
   '010': 0.0,
   '110': 0.0,
   '001': 0.0,
   '101': 0.0,
   '011': 0.0}}}