In [1]:
from typing import Any, Dict, List

import os
import re
import json

import pandas as pd

from copy import deepcopy
from collections import defaultdict

In [2]:
graph = {}
with open(f'../../data/mlb/pbp/computes/team_event_graph.json', 'r', encoding='UTF8') as pbp_input:
    graph = json.load(pbp_input)

In [3]:
def compute_likelihoods(graph):
    graph_copy = deepcopy(graph)
    
    for team in graph_copy.keys():
        for out in graph_copy[team].keys():
            for state in graph_copy[team][out].keys():
                runs = graph_copy[team][out][state]['runs']
                total = sum(graph_copy[team][out][state]['types'].values())
                graph_copy[team][out][state] = 0.0 if runs == 0 else round(float(runs) / total, 3)
    
    return graph_copy

likelihood_graph = compute_likelihoods(graph)
likelihood_graph['MIN']

{'0': {'---': 0.022,
  '--3': 0.0,
  '-2-': 0.286,
  '-23': 0.5,
  '1--': 0.145,
  '1-3': 1.0,
  '12-': 0.222,
  '123': 1.5},
 '1': {'---': 0.021,
  '--3': 0.6,
  '-2-': 0.241,
  '-23': 1.167,
  '1--': 0.077,
  '1-3': 0.444,
  '12-': 0.393,
  '123': 1.0},
 '2': {'---': 0.038,
  '--3': 0.0,
  '-2-': 0.095,
  '-23': 0.0,
  '1--': 0.096,
  '1-3': 0.294,
  '12-': 0.135,
  '123': 1.0}}

In [4]:
def flatten_graph(graph):
    records = []
    for team in graph.keys():
        for outs in graph[team].keys():
            row = {
                'team': team,
                'outs': outs,
            }

            row.update(
                graph[team][outs]
            )

            records.append(row)

    return records

def flatten_full_graph(graph):
    records = []
    for team in graph.keys():
        for outs in graph[team].keys():
            for bases in graph[team][outs].keys():
                row = {
                    'team': team,
                    'outs': outs,
                    'bases': bases
                }

                row.update(
                    graph[team][outs][bases]['types']
                )

                records.append(row)

    return records

In [5]:
df = pd.DataFrame(flatten_full_graph(graph))
df = df.sort_values(['team', 'outs', 'bases'])
df

Unnamed: 0,team,outs,bases,Double,Flyball,Groundout,Home Run,Lineout,Popfly,Single,...,Bunt Groundout,"Double Play: Strikeout Swinging, Polanco",Reached on Interference,Arraez,Gordon,Kepler,Out Advancing,Polanco,Neuse,Picked off
0,BAL,0,---,3.0,3.0,6.0,2.0,4.0,4.0,4.0,...,,,,,,,,,,
1,BAL,0,--3,,1.0,,,,,,...,,,,,,,,,,
2,BAL,0,-2-,,,,1.0,,,1.0,...,,,,,,,,,,
3,BAL,0,-23,,,,,,,,...,,,,,,,,,,
4,BAL,0,1--,1.0,,1.0,,1.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,TBR,2,-2-,1.0,,1.0,,,,1.0,...,,,,,,,,,,
194,TBR,2,-23,,,1.0,,,,,...,,,,,,,,,,
195,TBR,2,1--,1.0,,1.0,,,1.0,,...,,,,,,,,,,
196,TBR,2,1-3,,,2.0,,,,,...,,,,,,,,,,


In [6]:
df = pd.DataFrame(flatten_graph(likelihood_graph)).sort_values(['team', 'outs']).fillna('-')
df

Unnamed: 0,team,outs,---,--3,-2-,-23,1--,12-,1-3,123
0,BAL,0,0.054,1.0,1.0,1.0,0.0,0.333,-,-
1,BAL,1,0.103,-,0.0,1.0,0.0,0.0,0.5,-
2,BAL,2,0.0,0.25,0.0,-,0.25,0.0,0.0,-
3,BOS,0,0.0,-,0.0,0.333,0.286,1.0,0.5,-
4,BOS,1,0.069,1.0,1.0,0.75,0.0,0.25,0.5,-
5,BOS,2,0.0,2.0,0.0,0.5,0.0,0.0,0.0,0.0
6,CHW,0,0.1,-,0.0,-,0.0,-,-,-
7,CHW,1,0.048,0.0,0.0,1.0,0.0,-,0.0,0.0
8,CHW,2,0.0,0.5,0.0,-,0.0,0.0,0.0,0.5
9,DET,0,0.0,-,0.0,-,0.0,0.0,-,-
