# Data Parsing

Parse the JSONL files for relevant info and store in a more useful format.
Event data stored in a pandas DF
Tracking data stored in a dict, keyed by file name (no suffix)

In [1]:
%matplotlib notebook
import json
import pickle
import matplotlib.pyplot as plt
import pandas as pd
plt.ion()
import os
from classes import PitchFrames

In [2]:
df_rows = []
tracking_frames = {} # to look up tracking data by file id
for fname in os.listdir("data/raw wisd data/"):
    with open("data/raw wisd data/"+fname, "r") as f:
        dat = json.load(f)
    fileID = fname[:-6]
    
    # event data
    pitchType = dat["summary_acts"]["pitch"]["type"]
    result = dat["summary_acts"]["pitch"]["result"]
    if pitchType == {}:
        pitchType = None
    action = dat["summary_acts"]["pitch"]["action"]
    if action == {}:
        action = None
    df_rows.append([fileID, pitchType, result, action])
    
    # tracking data
    tracking_frames[fileID] = PitchFrames.from_dict(dat["samples_ball"], dat["samples_bat"])

In [3]:
print("Pitch Types:", set([x[1] for x in df_rows]))
print("Results:", set([x[2] for x in df_rows]))
print("Actions:", set([x[3] for x in df_rows]))

Pitch Types: {None, 'Curveball', 'FourSeamFastball', 'Cutter', 'Changeup', 'Sinker', 'Slider'}
Results: {'Pickoff', 'HitIntoPlay', 'Ball', 'Strike'}
Actions: {None, 'HitByPitch', 'Blocked', 'Pitchout', 'Foul', 'FoulTip', 'BallInDirt', 'ThrowToSecond', 'Called', 'ThrowToFirst'}


In [4]:
events_df = pd.DataFrame(df_rows, columns=["fileID", "pitchType", "result", "action"])
events_df.to_csv("data/WISD events.csv")
with open("data/WISD tracking.pickle", "wb") as f:
    pickle.dump(tracking_frames, f)