In [7]:
import os
import sys
import json
from collections import defaultdict
import pandas as pd

import sys
sys.path.insert(0, '../../../pipelines/tasks/mlb/')

from common.helpers.extractors import get_game_issues

In [8]:
df = pd.read_csv('../../../data/mlb/schedules.csv', index_col=None)
df = df[df.GAME_ID.isin(df.GAME_ID.dropna().tolist())]
df.GAME_ID = df.GAME_ID.astype(int)

df['WIN'] = df['RESULT'].map(lambda a: a[0])

import re
def get_score(score):
    match = re.search(r'\d+-\d+', score)
    return match.group(0)

df['SCORE'] = df['RESULT'].map(get_score)

df = df.loc[:, ['GAME_ID', 'WIN', 'SCORE']]
df = df.set_index(['GAME_ID'])
df.head()

Unnamed: 0_level_0,WIN,SCORE
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
401427971,L,2-1
401354277,L,4-3
401354292,W,10-4
401354303,W,4-0
401354312,L,7-2


In [9]:
games = []
for file in os.listdir('../../../data/mlb/pbp/2/'):
    with open(f'../../../data/mlb/pbp/2/{file}', 'r', encoding='UTF8') as pbp_input:
        game = json.load(pbp_input)

    games.append(game)

In [10]:
for game in games:

    runs = defaultdict(lambda: 0)

    for period in game['periods']:
        runs[period['atBat']] += sum(
            event['entities']['runs'] if 'runs' in event['entities'] else 0
            for event in period['events']
        )

        for event in period['events']:
            if 'isInfoPlay' in event:
                continue

    score = df.loc[int(game['id'])].SCORE
    current_game_score = '-'.join(
        list(map(str, sorted(runs.values(), reverse=True)))
    )

    if score != current_game_score:
        print('bad score:')
        print(game['id'], score, current_game_score)

    issues = get_game_issues(game)
    if any(issues['periods']):
        if issues == ['bases'] or issues == ['outs']:
            continue
        print('issues:')
        print(issues)

KeyError: 401354388

In [None]:
from copy import deepcopy


for game in games:
    if game['id'] == '401356173':
        for period in game['periods']:

            for event in period['events']:
                if event['id'] == 35:
                    t = deepcopy(event)
                    if 'pitches' in t:
                        del t['pitches']
                    print(t)


{'isScoringPlay': False, 'score': {'away': 3, 'home': 0}, 'desc': 'Haggerty singled to center, Crawford safe to third, Haggerty safe at second on throwing error by shortstop Báez, ', 'id': 35, 'entities': {'player': 'Haggerty', 'type': 'singled', 'at': 'center', 'moves': [{'player': 'Crawford', 'type': 'advanced', 'at': 'third'}, {'player': 'Haggerty', 'type': 'advanced', 'at': 'second', 'how': 'throwing error', 'by': 'Báez'}]}}


In [None]:
from copy import deepcopy


for game in games:
    if game['id'] == '401355641':
        for period in game['periods']:

            if period['id'] == 'top-4':
                for event in period['events']:
                    t = deepcopy(event)
                    if 'pitches' in t:
                        del t['pitches']
                    print(t)


{'desc': 'B. Singer pitching for KC', 'isInfoPlay': True, 'id': 38, 'entities': {'player': 'B. Singer', 'type': 'sub-p', 'team': 'KC'}}
{'isScoringPlay': False, 'score': {'away': 0, 'home': 3}, 'desc': 'L. Raley struck out swinging.', 'id': 39, 'entities': {'player': 'L. Raley', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'}}
{'isScoringPlay': False, 'score': {'away': 0, 'home': 3}, 'desc': 'Choi grounded out to second.', 'id': 40, 'entities': {'outs': 1, 'player': 'Choi', 'type': 'grounded out', 'at': 'second'}}
{'isScoringPlay': False, 'score': {'away': 0, 'home': 3}, 'desc': 'Walls struck out swinging.', 'id': 41, 'entities': {'player': 'Walls', 'type': 'struck out', 'outs': 1, 'effort': 'swinging'}}
