In [1]:
import os
import re
import json
import pandas as pd
from copy import deepcopy
from collections import defaultdict
from typing import Any, Dict, List

In [2]:
graph = {}
with open(f'../../data/mlb/pbp/computes/team_event_graph.json', 'r', encoding='UTF8') as pbp_input:
    graph = json.load(pbp_input)

In [3]:
def compute_likelihoods(graph):
    graph_copy = deepcopy(graph)
    
    for team in graph_copy.keys():
        for area in graph_copy[team].keys():
            for out in graph_copy[team][area].keys():
                for state in graph_copy[team][area][out].keys():
                    runs = graph_copy[team][area][out][state]['runs']
                    total = sum(graph_copy[team][area][out][state]['types'].values())
                    graph_copy[team][area][out][state] = 0.0 if runs == 0 else round(float(runs) / total, 3)
    
    return graph_copy

likelihood_graph = compute_likelihoods(graph)
likelihood_graph

{'bal': {'away': {'0': {'000': 0.0, '010': 0.0},
   '1': {'000': 0.0, '001': 1.0, '010': 0.0, '100': 0.0},
   '2': {'000': 0.0, '010': 0.0, '100': 1.0}},
  'home': {'0': {}, '1': {}, '2': {}}},
 'cle': {'away': {'0': {'000': 0.0, '011': 0.0, '100': 0.0, '111': 0.0},
   '1': {'000': 0.0, '010': 0.0, '100': 0.0, '111': 0.0},
   '2': {'000': 0.0, '010': 0.0, '100': 0.0, '111': 0.0}},
  'home': {'0': {'000': 0.0,
    '010': 0.0,
    '011': 1.0,
    '100': 0.0,
    '110': 0.0,
    '111': 0.0},
   '1': {'000': 0.0,
    '001': 0.0,
    '010': 0.0,
    '011': 0.25,
    '100': 0.0,
    '101': 1.0,
    '110': 0.0,
    '111': 2.0},
   '2': {'000': 0.0,
    '001': 0.5,
    '010': 0.222,
    '011': 0.0,
    '100': 0.125,
    '110': 0.0,
    '111': 0.5}}},
 'col': {'away': {'0': {'000': 0.0,
    '010': 0.0,
    '100': 0.0,
    '101': 0.0,
    '110': 0.0,
    '111': 0.0},
   '1': {'000': 0.048,
    '010': 0.0,
    '011': 0.0,
    '100': 0.0,
    '101': 1.0,
    '110': 0.0,
    '111': 0.0},
   '2': {'

In [4]:
def flatten_graph(graph):
    records = []
    for team in graph.keys():  
        for where in graph[team].keys():
            for outs in graph[team][where].keys():
                row = {
                    'team': team,
                    'where': where,
                    'team': team,
                    'outs': outs,
                }

                row.update(
                    graph[team][where][outs]
                )

                records.append(row)

    return records

def flatten_full_graph(graph):
    records = []
    for team in graph.keys():  
        for where in graph[team].keys():
            for outs in graph[team][where].keys():
                for bases in graph[team][where][outs].keys():
                    row = {
                        'team': team,
                        'where': where,
                        'team': team,
                        'outs': outs,
                        'bases': bases
                    }

                    row.update(
                        graph[team][where][outs][bases]['types']
                    )

                    records.append(row)

    return records

In [5]:
df = pd.DataFrame(flatten_full_graph(graph))
df = df.sort_values(['team', 'where', 'outs', 'bases'])
df = df[df.team == 'min']
df

Unnamed: 0,team,where,outs,bases,doubled,flied out,popped out,struck out,grounded out,infield single,...,singled,hit by pitch,fielding error,grounded into double play,error,sacrifice fly,lined into double play,homered,throwing error,tripled
63,min,away,0,0,,6.0,2.0,8.0,12.0,3.0,...,5.0,,,,,,,3.0,1.0,
64,min,away,0,1,,,,,,,...,1.0,,,,,,,,,
65,min,away,0,10,,,,,,,...,,,,,,,,,,
66,min,away,0,100,1.0,,1.0,4.0,,,...,3.0,,,1.0,,,,,,
67,min,away,0,110,,,,1.0,,,...,1.0,,,1.0,,,,,,
68,min,away,0,111,,,,,,,...,,,,,,1.0,,,,
69,min,away,1,0,1.0,3.0,1.0,8.0,12.0,,...,4.0,2.0,,,,,,,,
70,min,away,1,1,,,,,,,...,1.0,,,,,,,,,
71,min,away,1,10,,,,1.0,1.0,,...,,,,,,,,,,
72,min,away,1,11,,,,,1.0,,...,,,,,,,,1.0,,


In [6]:
df = pd.DataFrame(flatten_graph(likelihood_graph))
df = df[df.team == 'min'].sort_values(['team', 'where', 'outs'])
df

Unnamed: 0,team,where,outs,000,010,001,100,011,111,110,101
18,min,away,0,0.061,0.0,1.0,0.083,,1.0,0.0,
19,min,away,1,0.0,0.0,1.0,0.133,1.5,1.0,0.25,0.0
20,min,away,2,0.03,0.375,0.5,0.312,0.0,3.0,0.0,0.0
21,min,home,0,0.0,0.25,0.0,0.25,,,0.0,
22,min,home,1,0.032,0.0,0.5,0.071,,0.0,0.5,0.667
23,min,home,2,0.0,0.0,,0.077,0.0,1.0,0.5,0.0
