In [1]:
import os
import sys
import json
from collections import defaultdict
import pandas as pd

import sys
sys.path.insert(0, '../../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_game_issues

In [3]:
df = pd.read_csv('../../../data/mlb/schedules.csv', index_col=None)
df = df[df.GAME_ID.isin(df.GAME_ID.dropna().tolist())]
df.GAME_ID = df.GAME_ID.astype(int)

df['WIN'] = df['RESULT'].map(lambda a: a[0])

import re
def get_score(score):
    match = re.search(r'\d+-\d+', score)
    return match.group(0)

df['SCORE'] = df['RESULT'].map(get_score)

df = df.loc[:, ['GAME_ID', 'WIN', 'SCORE']]
df = df.set_index(['GAME_ID'])
df.head()

Unnamed: 0_level_0,WIN,SCORE
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
401427971,L,2-1
401354277,L,4-3
401354292,W,10-4
401354303,W,4-0
401354312,L,7-2


In [4]:
games = []
for file in os.listdir('../../../data/mlb/pbp/'):
    with open(f'../../../data/mlb/pbp/{file}', 'r', encoding='UTF8') as pbp_input:
        game = json.load(pbp_input)

    games.append(game)

In [5]:
for game in games:

    runs = defaultdict(lambda: 0)

    for period in game['periods']:
        runs[period['atBat']] += sum(
            event['entities']['runs'] if 'runs' in event['entities'] else 0
            for event in period['events']
        )

        for event in period['events']:
            if 'isInfoPlay' in event:
                continue

    score = df.loc[int(game['id'])].SCORE
    current_game_score = '-'.join(
        list(map(str, sorted(runs.values(), reverse=True)))
    )

    if score != current_game_score:
        print('bad score:')
        print(game['id'], score, current_game_score)

    issues = get_game_issues(game)
    if any(issues['periods']):
        print('issues:')
        print(issues)

issues:
{'id': '401355366', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355249', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401354277', 'periods': [{'id': 'bottom-1', 'issues': [], 'events': [{'id': 10, 'issues': ['order']}]}]}
