In [1]:
import os
import sys
import json
from collections import defaultdict
import pandas as pd

import sys
sys.path.insert(0, '../../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_game_issues

In [2]:
df = pd.read_csv('../../../data/mlb/schedules.csv', index_col=None)
df = df[df.GAME_ID.isin(df.GAME_ID.dropna().tolist())]
df.GAME_ID = df.GAME_ID.astype(int)

df['WIN'] = df['RESULT'].map(lambda a: a[0])

import re
def get_score(score):
    match = re.search(r'\d+-\d+', score)
    return match.group(0)

df['SCORE'] = df['RESULT'].map(get_score)

df = df.loc[:, ['GAME_ID', 'WIN', 'SCORE']]
df = df.set_index(['GAME_ID'])
df.head()

Unnamed: 0_level_0,WIN,SCORE
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
401354253,L,5-4
401354266,W,5-2
401354281,W,10-1
401354318,W,3-2
401354331,W,6-4


In [3]:
game_lookup = {}
for index, row in df.iterrows():
    game_lookup[index] = row['SCORE']

In [4]:
games = []
for file in os.listdir('../../../data/mlb/pbp/2/'):
    with open(f'../../../data/mlb/pbp/2/{file}', 'r', encoding='UTF8') as pbp_input:
        game = json.load(pbp_input)

    games.append(game)

In [5]:
for game in games:

    runs = defaultdict(lambda: 0)

    for period in game['periods']:
        runs[period['atBat']] += period['score']['runs']
        #runs[period['atBat']] += sum(
        #    event['entities']['runs'] if 'runs' in event['entities'] else 0
        #    for event in period['events']
        #)

        for event in period['events']:
            if 'isInfoPlay' in event:
                continue

    score = game_lookup[int(game['id'])]
    current_game_score = '-'.join(
        list(map(str, sorted(runs.values(), reverse=True)))
    )

    if score != current_game_score:
        print('bad score:')
        print(game['id'], score, current_game_score)

    issues = get_game_issues(game)
    if any(issues['periods']):
        if issues == ['bases'] or issues == ['outs']:
            continue
        print('issues:')
        print(issues)

issues:
{'id': '401355157', 'periods': [{'id': 'bottom-1', 'issues': ['outs'], 'events': []}]}
issues:
{'id': '401355159', 'periods': [{'id': 'bottom-10', 'issues': ['bases'], 'events': []}]}
bad score:
401355174 7-5 6-5
issues:
{'id': '401355249', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355366', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
bad score:
401355402 6-1 5-1
issues:
{'id': '401355681', 'periods': [{'id': 'top-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401355756', 'periods': [{'id': 'bottom-11', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401356076', 'periods': [{'id': 'bottom-10', 'issues': ['bases'], 'events': []}]}
issues:
{'id': '401356112', 'periods': [{'id': 'bottom-10', 'issues': ['bases'], 'events': []}]}
bad score:
401356126 15-7 14-7
issues:
{'id': '401356377', 'periods': [{'id': 'top-15', 'issues': ['bases'], 'events': []}, {'id': 'bottom-15', 'issues': ['bases'], 'events': []

In [8]:
from copy import deepcopy


for game in games:
    if game['id'] == '401355174':
        for period in game['periods']:

            for event in period['events']:
                #if event['id'] == 35:
                t = deepcopy(event)
                if 'pitches' in t:
                    del t['pitches']
                print(t['entities'], t['desc'])


{'player': 'A. Gomber', 'type': 'sub-p', 'team': 'COL'} A. Gomber pitching for COL
{'outs': 1, 'player': 'Straw', 'type': 'grounded out', 'at': 'second'} Straw grounded out to second.
{'outs': 1, 'player': 'Rosario', 'type': 'flied out', 'at': 'left'} Rosario flied out to left.
{'player': 'Ramírez', 'type': 'singled', 'at': 'right'} Ramírez singled to right.
{'player': 'Gonzalez', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'} Gonzalez struck out swinging.
{'player': 'K. Pilkington', 'type': 'sub-p', 'team': 'CLE'} K. Pilkington pitching for CLE
{'outs': 1, 'player': 'Joe', 'type': 'flied out', 'at': 'right'} Joe flied out to right.
{'player': 'Blackmon', 'type': 'singled', 'at': 'left center'} Blackmon singled to left center.
{'outs': 1, 'player': 'Rodgers', 'type': 'flied out', 'at': 'right'} Rodgers flied out to right.
{'outs': 1, 'player': 'Cron', 'type': 'flied out', 'at': 'center'} Cron flied out to center.
{'player': 'A. Gomber', 'type': 'sub-p', 'team': 'COL'} A. Gomber

In [9]:
from copy import deepcopy


for game in games:
    if game['id'] == '401355157':
        for period in game['periods']:


            if period['id'] == 'bottom-1':
                print(period['score'])
                for event in period['events']:
                    t = deepcopy(event)
                    if 'pitches' in t:
                        del t['pitches']
                    print(t['entities'], t['desc'])


{'runs': 1, 'hits': 2, 'errors': 0, 'outs': 4}
{'player': 'D. Cease', 'type': 'sub-p', 'team': 'CHW'} D. Cease pitching for CHW
{'player': 'Reyes', 'type': 'singled', 'at': 'left'} Reyes singled to left.
{'player': 'H. Castro', 'type': 'singled', 'at': 'deep right', 'moves': [{'player': 'Reyes', 'type': 'advanced', 'at': 'third'}]} H. Castro singled to deep right, Reyes to third.
{'player': 'W. Castro', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'} W. Castro struck out swinging.
{'player': 'Meadows', 'type': 'walked', 'at': 'first', 'moves': [{'player': 'H. Castro', 'type': 'advanced', 'at': 'second'}, {'player': 'Báez', 'type': 'out', 'at': 'not-available'}], 'outs': 1} Meadows walked, H. Castro to second. Báez struck out swinging.
{'player': 'Báez', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'} Báez struck out swinging.
{'type': 'throwing error', 'moves': [{'player': 'Reyes', 'type': 'advanced', 'at': 'home', 'how': 'throwing error', 'by': 'Cease'}, {'player': 'H Ca