In [None]:
import json
import pandas as pd

year = 2019

allgames = pd.DataFrame()
allPlays = pd.DataFrame()

for year, month, day in [(year, month, day) for year in [year] for month in range(3,11) for day in range(1,30)]:
    filename = f'../data/schedule/{year}-{month:02d}-{day:02d}.json'
    with open(filename, 'r') as infile:
        data = json.loads(infile.read())
        games = data["dates"][0]["games"] if data["dates"] else []
        norm = pd.json_normalize(games)
        allgames = pd.concat([allgames, norm])
        
for index, row in allgames.iterrows():
    gamepk = row["gamePk"]
    filename = f'../data/games/{gamepk}.json'
    try:
        with open(filename, 'r') as infile:
            data = json.loads(infile.read())
            plays = data["allPlays"] if data["allPlays"] else []
            norm = pd.json_normalize(plays)
            allPlays = pd.concat([allgames, norm])
    except:
        pass
        

In [None]:
allgames.groupby("seriesDescription").count()

In [None]:
allgames[allgames['seriesDescription']=='Regular Season'].groupby("teams.home.team.name").count()

In [None]:
import json
import pandas as pd
from jsonpath_ng import jsonpath, parse

filename = f'../data/games/564721.json'

with open(filename, 'r') as infile:
    #atbats = pd.json_normalize([rec["result"] | rec["count"] | rec["about"] for rec in data.get("allPlays")])

    data = json.load(infile)
    atbats = [match.value for match in parse('allPlays[*]').find(data)]

    columns = ['gameID', 'eventNum', 'inning', 'half', 'atbat', 'balls',
               'strikes', 'outs', 'pitches', 'homeScore', 'awayScore', 'event', 'result']
    
    for atbat in atbats:
        for play in atbat["playEvents"]:
            if not play["details"].get("event"):
                continue
            print(f'{atbat["about"]["inning"]} {play["details"]["event"]}')
        print(f'{atbat["about"]["inning"]} {atbat["about"]["halfInning"]:10s} {atbat["result"]["event"]}')


In [None]:
atbats.groupby("event").count()

In [None]:
import json
import pandas as pd
from jsonpath_ng import jsonpath, parse

filename = f'../data/schedule/2018-08-19.json'
with open(filename, 'r') as infile:
    data = json.load(infile)
    games = [match.value for match in parse('dates[*].games').find(data)][0]
    
    for game in games:
        print(game["gamePk"])



In [None]:
import feather, glob
import pandas as pd

files = glob.glob(f'../data/parsed/2019/*.ftr')

large = pd.DataFrame()
li = []

for filename in files:
    df = feather.read_dataframe(filename)
    li.append(df)

total = pd.concat(li)


In [None]:
games = total[["homeTeam", "seriesDescription", "gamePk"]].groupby(["seriesDescription", "homeTeam"]).nunique("gamePk")

In [138]:
import feather
import glob
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

files = glob.glob(f'../data/parsed/2022/2022-03-20*.ftr')

large = pd.DataFrame()
li = []

for filename in files:
    df = feather.read_dataframe(filename)
    li.append(df)

total = pd.concat(li)

SHORT_NAMES = {'Los Angeles Angels': "LAA",
               'Houston Astros': "HOU",
               'Oakland Athletics': "OAK",
               'Toronto Blue Jays': "TOR",
               'Atlanta Braves': "ATL",
               'Milwaukee Brewers': "MIL",
               'St. Louis Cardinals': "STL",
               'Chicago Cubs': "CHC",
               'Arizona Diamondbacks': "ARI",
               'Los Angeles Dodgers': "LAD",
               'San Francisco Giants': "SF",
               'Cleveland Guardians': "CLE",
               'Cleveland Indians': "CLE",
               'Seattle Mariners': "SEA",
               'Miami Marlins': "MIA",
               'New York Mets': "NYM",
               'Washington Nationals': "WAS",
               'Baltimore Orioles': "BAL",
               'San Diego Padres': "SD",
               'Philadelphia Phillies': "PHI",
               'Pittsburgh Pirates': "PIT",
               'Texas Rangers': "TEX",
               'Tampa Bay Rays': "TB",
               'Boston Red Sox': "BOS",
               'Cincinnati Reds': "CIN",
               'Colorado Rockies': "COL",
               'Kansas City Royals': "KC",
               'Detroit Tigers': "DET",
               'Minnesota Twins': "MIN",
               'Chicago White Sox': "CHS",
               'New York Yankees': "NYY"}

total = total[["homeTeam", "awayTeam", "gamePk", "defenseScore", "offenseScore", "thrillScore"]
              ].drop_duplicates().replace({"homeTeam": SHORT_NAMES, "awayTeam": SHORT_NAMES})

total[["defenseScore", "offenseScore", "thrillScore"]] = total[["defenseScore", "offenseScore", "thrillScore"]].clip(
    lower=0, upper=10)

games = total


In [144]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=games["defenseScore"],
    y=games["offenseScore"],
    mode='markers+text',
    text=games['homeTeam'] + ' @ ' + games['awayTeam'],
    textposition="bottom center",
    marker=dict(
        size=games["thrillScore"]*8-10,
        color=games["thrillScore"],
        line=dict(width=1, color='DarkSlateGrey'),
        colorscale='Rainbow',
        showscale=True
    )
))

fig.update_layout(width=1000, height=1000,
                  paper_bgcolor='rgb(200, 200, 200)',
                  title_font_size=30,
                  font_size=16,
                  title_text="Today's Games",
                  xaxis_title="Defense Score",
                  yaxis_title="Offense Score")

fig.update_xaxes(zeroline=False, title_font=dict(size=30), showgrid=False)
fig.update_yaxes(zeroline=False, title_font=dict(size=30), showgrid=False)
