In [1]:
from typing import Any, Dict, List

import os
import re
import json

import pandas as pd

from copy import deepcopy
from collections import defaultdict

In [2]:
graph = {}
with open(f'../../data/mlb/pbp/computes/team_event_graph.json', 'r', encoding='UTF8') as pbp_input:
    graph = json.load(pbp_input)

In [3]:
def compute_likelihoods(graph):
    graph_copy = deepcopy(graph)
    
    for team in graph_copy.keys():
        for out in graph_copy[team].keys():
            for state in graph_copy[team][out].keys():
                runs = graph_copy[team][out][state]['runs']
                total = sum(graph_copy[team][out][state]['types'].values())
                graph_copy[team][out][state] = 0.0 if runs == 0 else round(float(runs) / total, 3)
    
    return graph_copy

likelihood_graph = compute_likelihoods(graph)
likelihood_graph

{'LAD': {'0': {'---': 0.0,
   '-2-': 0.0,
   '-23': 0.0,
   '1--': 0.0,
   '12-': 0.333,
   '123': 1.5},
  '1': {'---': 0.0,
   '-2-': 0.0,
   '-23': 1.0,
   '1-3': 1.0,
   '12-': 0.0,
   '123': 0.0},
  '2': {'---': 0.0,
   '--3': 0.0,
   '-2-': 0.0,
   '1--': 0.0,
   '12-': 0.0,
   '123': 0.0}},
 'MIN': {'0': {'---': 0.043, '-2-': 0.0, '-23': 0.0, '1--': 0.222, '12-': 0.0},
  '1': {'---': 0.086,
   '-2-': 0.0,
   '-23': 0.0,
   '1--': 0.0,
   '1-3': 0.333,
   '12-': 0.444},
  '2': {'---': 0.067,
   '-2-': 0.2,
   '-23': 0.0,
   '1--': 0.0,
   '1-3': 0.25,
   '12-': 0.0,
   '123': 2.0}},
 'SEA': {'0': {'---': 0.0,
   '--3': 0.0,
   '-2-': 0.0,
   '-23': 0.0,
   '1--': 0.0,
   '12-': 0.25},
  '1': {'---': 0.037, '--3': 0.0, '-23': 0.0, '1--': 0.0, '12-': 0.0},
  '2': {'---': 0.0,
   '--3': 0.333,
   '-2-': 0.4,
   '-23': 0.0,
   '1--': 0.118,
   '1-3': 0.0,
   '12-': 1.0}}}

In [4]:
def flatten_graph(graph):
    records = []
    for team in graph.keys():
        for outs in graph[team].keys():
            row = {
                'team': team,
                'outs': outs,
            }

            row.update(
                graph[team][outs]
            )

            records.append(row)

    return records

def flatten_full_graph(graph):
    records = []
    for team in graph.keys():
        for outs in graph[team].keys():
            for bases in graph[team][outs].keys():
                row = {
                    'team': team,
                    'outs': outs,
                    'bases': bases
                }

                row.update(
                    graph[team][outs][bases]['types']
                )

                records.append(row)

    return records

In [5]:
df = pd.DataFrame(flatten_full_graph(graph))
df = df.sort_values(['team', 'outs', 'bases'])
df

Unnamed: 0,team,outs,bases,Double,Groundout,Lineout,Popfly,Strikeout,Walk,Flyball,Single,Triple,Home Run,Fielder's Choice,Double Play,Hit By Pitch,Reached on Interference,Error,Picked off
0,LAD,0,---,1.0,1.0,2.0,1.0,2.0,2.0,,,,,,,,,,
1,LAD,0,-2-,,1.0,,,,1.0,,,,,,,,,,
2,LAD,0,-23,,,,,1.0,,,,,,,,,,,
3,LAD,0,1--,,,,,,1.0,,,,,,,,,,
4,LAD,0,12-,,,,,,1.0,1.0,1.0,,,,,,,,
5,LAD,0,123,1.0,,,,,,,1.0,,,,,,,,
6,LAD,1,---,,1.0,,,2.0,1.0,2.0,,,,,,,,,
7,LAD,1,-2-,,,,1.0,,1.0,,,,,,,,,,
8,LAD,1,-23,,,,,,,1.0,1.0,,,,,,,,
9,LAD,1,1-3,,,,,,,,1.0,,,,,,,,


In [7]:
df = pd.DataFrame(flatten_graph(likelihood_graph)).sort_values(['team', 'outs']).fillna('-')
df

Unnamed: 0,team,outs,---,-2-,-23,1--,12-,123,1-3,--3
0,LAD,0,0.0,0.0,0.0,0.0,0.333,1.5,-,-
1,LAD,1,0.0,0.0,1.0,-,0.0,0.0,1.0,-
2,LAD,2,0.0,0.0,-,0.0,0.0,0.0,-,0.0
3,MIN,0,0.043,0.0,0.0,0.222,0.0,-,-,-
4,MIN,1,0.086,0.0,0.0,0.0,0.444,-,0.333,-
5,MIN,2,0.067,0.2,0.0,0.0,0.0,2.0,0.25,-
6,SEA,0,0.0,0.0,0.0,0.0,0.25,-,-,0.0
7,SEA,1,0.037,-,0.0,0.0,0.0,-,-,0.0
8,SEA,2,0.0,0.4,0.0,0.118,1.0,-,0.0,0.333
