In [11]:
import os
import sys
import json
from collections import defaultdict
import pandas as pd

import sys
sys.path.insert(0, '../../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_game_issues

In [12]:
df = pd.read_csv('../../../data/mlb/schedules.csv', index_col=None)
df = df[df.GAME_ID.isin(df.GAME_ID.dropna().tolist())]
df.GAME_ID = df.GAME_ID.astype(int)

df['WIN'] = df['RESULT'].map(lambda a: a[0])

import re
def get_score(score):
    match = re.search(r'\d+-\d+', score)
    return match.group(0)

df['SCORE'] = df['RESULT'].map(get_score)

df = df.loc[:, ['GAME_ID', 'WIN', 'SCORE']]
df = df.set_index(['GAME_ID'])
df.head()

Unnamed: 0_level_0,WIN,SCORE
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
401427971,L,2-1
401354277,L,4-3
401354292,W,10-4
401354303,W,4-0
401354312,L,7-2


In [13]:
game_lookup = {}
for index, row in df.iterrows():
    game_lookup[index] = row['SCORE']

In [14]:
games = []
for file in os.listdir('../../../data/mlb/pbp/2/'):
    with open(f'../../../data/mlb/pbp/2/{file}', 'r', encoding='UTF8') as pbp_input:
        game = json.load(pbp_input)

    games.append(game)

In [18]:
for game in games:

    runs = defaultdict(lambda: 0)

    for period in game['periods']:
        runs[period['atBat']] += period['score']['runs']
        for event in period['events']:
            if 'isInfoPlay' in event:
                continue

    key = int(game['id'])
    if key in game_lookup:
        score = game_lookup[key]
        current_game_score = '-'.join(
            list(map(str, sorted(runs.values(), reverse=True)))
        )

        if score != current_game_score:
            print('bad score:')
            print(game['id'], score, current_game_score)

    issues = get_game_issues(game)
    if any(issues['periods']):
        if issues == ['bases'] or issues == ['outs']:
            continue

        print('issues:')
        print(issues)

issues:
{'id': '401354388', 'periods': [{'id': 'top-8', 'issues': ['outs'], 'events': []}]}
issues:
{'id': '401355366', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401423181', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355756', 'periods': [{'id': 'bottom-11', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401356377', 'periods': [{'id': 'top-15', 'issues': ['bases'], 'events': []}, {'id': 'bottom-15', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355157', 'periods': [{'id': 'bottom-1', 'issues': ['outs'], 'events': [{'id': 12, 'issues': ['move.at']}]}]}
issues:
{'id': '401355249', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355681', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401356112', 'periods': [{'id': 'bottom-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355159', 'periods': [{'id': 'bottom-10', 

In [16]:
from copy import deepcopy


for game in games:
    if game['id'] == '401355174':
        for period in game['periods']:

            for event in period['events']:
                #if event['id'] == 35:
                t = deepcopy(event)
                if 'pitches' in t:
                    del t['pitches']
                print(t['entities'], t['desc'])


In [17]:
from copy import deepcopy


for game in games:
    if game['id'] == '401355157':
        for period in game['periods']:


            if period['id'] == 'bottom-1':
                print(period['score'])
                for event in period['events']:
                    t = deepcopy(event)
                    if 'pitches' in t:
                        del t['pitches']
                    print(t['entities'], t['desc'])


{'runs': 1, 'hits': 2, 'errors': 0, 'outs': 4}
{'player': 'D. Cease', 'type': 'sub-p', 'team': 'CHW'} D. Cease pitching for CHW
{'player': 'Reyes', 'type': 'singled', 'at': 'left'} Reyes singled to left.
{'player': 'H. Castro', 'type': 'singled', 'at': 'deep right', 'moves': [{'player': 'Reyes', 'type': 'advanced', 'at': 'third'}]} H. Castro singled to deep right, Reyes to third.
{'player': 'W. Castro', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'} W. Castro struck out swinging.
{'player': 'Meadows', 'type': 'walked', 'at': 'first', 'moves': [{'player': 'H. Castro', 'type': 'advanced', 'at': 'second'}, {'player': 'Báez', 'type': 'out', 'at': 'not-available'}], 'outs': 1, 'issues': ['move.at']} Meadows walked, H. Castro to second. Báez struck out swinging.
{'player': 'Báez', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'} Báez struck out swinging.
{'type': 'throwing error', 'moves': [{'player': 'Reyes', 'type': 'advanced', 'at': 'home', 'how': 'throwing error', 'by': 'Ce