# Converting JSON data into a tabular, ball-by-ball format

In [5]:
import pandas as pd
import json
import os
from datetime import datetime
import copy

In [6]:
def load_data_from_json(filepath):
    dict_format = json.loads(open(filepath, "r").read())
    return dict_format

In [7]:
class MatchSituation:
    def __init__(self, match, delivery=None, init=False, previous_delivery = None, format=50):
        self.previous_delivery = previous_delivery
        self.next_delivery = None
        self.parent = match

        if init:
            self.bowlers = {
                x: {"balls_remaining": 6*format/5, "finished_spell": False, "at_crease": False, "wickets": 0, "runs": 0} for x in match.team1_players
            }
            self.batters = {
                x: {"at_crease": False, "on_strike": False, "dismissed": False, "runs": 0, "balls": 0} for x in match.team2_players
            }
            self.runs_remaining = match.match['target']['runs']
            self.wickets_remaining = 10
            self.balls_remaining = 6*format
            self.current_runs = 0
            self.current_wickets = 0
            self.current_balls = 0
            self.extras_count = 0
            self.batter_on_strike = None
            self.bowler_on_strike = None
            self.batter_off_strike = None
            self.runs = 0
        else:
            previous_delivery.next_delivery = self
            self.bowlers = copy.deepcopy(previous_delivery.bowlers)
            self.batters = copy.deepcopy(previous_delivery.batters)
            self.runs_remaining = copy.deepcopy(previous_delivery.runs_remaining)
            self.wickets_remaining = copy.deepcopy(previous_delivery.wickets_remaining)
            self.balls_remaining = copy.deepcopy(previous_delivery.balls_remaining)
            self.current_runs = copy.deepcopy(previous_delivery.current_runs)
            self.current_wickets = copy.deepcopy(previous_delivery.current_wickets)
            self.current_balls = copy.deepcopy(previous_delivery.current_balls)
            self.extras_count = copy.deepcopy(previous_delivery.extras_count)
            self.batter_on_strike = copy.deepcopy(previous_delivery.batter_on_strike)
            self.batter_off_strike = copy.deepcopy(previous_delivery.batter_off_strike)
            self.bowler_on_strike = copy.deepcopy(previous_delivery.bowler_on_strike)
            self.update(delivery)
        
    def update(self, delivery):
        # print(delivery)
        
        self.batters[delivery['batter']]['at_crease'] = True
        self.batters[delivery['batter']]['on_strike'] = True
        self.batter_on_strike = delivery['batter']
        self.batter_off_strike = delivery['non_striker']
        self.batters[delivery['non_striker']]['at_crease'] = True
        self.batters[delivery['non_striker']]['on_strike'] = False
        
        self.batters[delivery['batter']]['runs'] += delivery['runs']['batter']
        self.current_runs += delivery['runs']['total']
        self.runs_remaining -= delivery['runs']['total']

        self.runs = delivery['runs']['total']

        self.extras_count += delivery['runs']['extras']

        if (('extras' in delivery) and ('wides' in delivery['extras'] or 'noballs' in delivery['extras'])) or ('extras' not in delivery):
            self.bowlers[delivery['bowler']]['runs'] += delivery['runs']['total']

        if (('extras' in delivery) and ('wides' in delivery['extras'])) == False:

            self.batters[delivery['batter']]['balls'] += 1

            if (('extras' in delivery) and ('noballs' in delivery['extras'])) == False:

                self.bowlers[delivery['bowler']]['balls_remaining'] -= 1
                self.bowlers[delivery['bowler']]['at_crease'] = True
                self.bowler_on_strike = delivery['bowler']
                self.balls_remaining -= 1
                self.current_balls += 1

            if self.bowlers[delivery['bowler']]['balls_remaining'] % 6 == 0:
                self.bowlers[delivery['bowler']]['at_crease'] = False

            if self.bowlers[delivery['bowler']]['balls_remaining'] == 0:
                self.bowlers[delivery['bowler']]['finished_spell'] = True
        
        if 'wickets' in delivery:
            self.wickets_remaining -= 1
            self.current_wickets += 1

            self.batters[delivery['wickets'][0]['player_out']]['dismissed'] = True
            self.batters[delivery['wickets'][0]['player_out']]['on_strike'] = False
            self.batters[delivery['wickets'][0]['player_out']]['at_crease'] = False
            if delivery['wickets'][0]['kind'] in ['caught', "bowled", "lbw", "stumped"]:
                self.bowlers[delivery['bowler']]['wickets'] += 1

        

class OneDayMatch:
    def __init__(self, match_dict, matchno=None):
        self.match_date = datetime.strptime(match_dict['info']['dates'][0], "%Y-%m-%d")
        self.team1 = match_dict['innings'][0]['team']
        self.team2 = match_dict['innings'][1]['team']
        self.toss_winner = match_dict['info']['toss']['winner']
        self.ground = match_dict['info']['venue']
        self.team1_players = match_dict['info']['players'][self.team1]
        self.team2_players = match_dict['info']['players'][self.team2]
        self.match = match_dict['innings'][1]
        self.winner = match_dict['info']['outcome']['winner']
        self.matchno = matchno
        self.ball_by_ball = [MatchSituation(self, format=50, init=True)]
        self.process_match()
    
    def process_match(self):
        prev_delivery = self.ball_by_ball[-1]
        for over in self.match['overs']:
            for delivery in over['deliveries']:
                    prev_delivery = MatchSituation(self, delivery=delivery, previous_delivery=prev_delivery)


In [12]:
def dump(obj):
    result = {}
    for attr in dir(obj):
      if attr.startswith("__") == False:
        result[attr] = getattr(obj, attr)
    return result

domestic_loc = '../../data/step_02/domestic/'
domestic_matches = []

os.makedirs("../../data/step_03/matches/", exist_ok=True)
domestic_matches_file = "../../data/step_03/matches_domestic.csv"

open(domestic_matches_file, "w").close()

for filename in [x for x in os.listdir(domestic_loc) if x.endswith('.json')]:
  match_file = "../../data/step_03/matches/%s.csv" % filename[:-5]
  open(domestic_matches_file, "w").close()
  match_dict = load_data_from_json(domestic_loc + filename)
  match = OneDayMatch(match_dict, matchno=filename)
  match_dict = dump(match)
  for team in ['team1', 'team2']:
    player_count = 1
    for player in match_dict[team+'_players']:
      match_dict[team+'_player_%d' % player_count] = player
      player_count += 1
  drop_keys = ['ball_by_ball', 'match', 'update', 'process_match', 'team1_players', 'team2_players']
  for key in drop_keys:
    match_dict.pop(key, None)
  match_df = pd.DataFrame(match_dict, index=[0])
  match_df.to_csv(domestic_matches_file, mode='a', index=False, header=False)
  delivery = match.ball_by_ball[0].next_delivery
  while True:
      delivery_dict = dump(delivery)
      try:
        delivery_dict['batter_on_strike'] = delivery_dict['batters'][delivery_dict['batter_on_strike']]
      except KeyError:
        print(delivery_dict)
        break
      delivery_dict['batter_off_strike'] = delivery_dict['batters'][delivery_dict['batter_off_strike']]
      delivery_dict['bowler_on_strike'] = delivery_dict['bowlers'][delivery_dict['bowler_on_strike']]
      keys_to_drop = ['update', 'previous_delivery', 'parent', 'next_delivery', 'batters', 'bowlers']
      for key in list(delivery_dict.keys()):
          if key in keys_to_drop:
              delivery_dict.pop(key, None)
              continue
          if type(delivery_dict[key]) == dict:
              for key2 in delivery_dict[key].keys():
                  delivery_dict[key+"_"+key2] = delivery_dict[key][key2]
              delivery_dict.pop(key, None)
      round_2_keys_to_drop = ['batter_off_strike_at_crease', 'batter_off_strike_on_strike', 'batter_on_strike_at_crease', 'batter_on_strike_on_strike', 'bowler_on_strike_at_crease', 'bowler_on_strike_finished_spell']
      for key in list(delivery_dict.keys()):
          if key in round_2_keys_to_drop:
              delivery_dict.pop(key, None)
        
      delivery_df = pd.DataFrame(delivery_dict, index=[0])
      delivery_df.to_csv(match_file, mode='a', index=False, header=False)

      try:
          delivery = delivery.next_delivery
      except KeyError:
          break

international_loc = '../../data/step_02/international/'
international_matches = []

os.makedirs("../../data/step_03", exist_ok=True)
international_matches_file = "../../data/step_03/matches_international.csv"

open(international_matches_file, "w").close()

for filename in [x for x in os.listdir(international_loc) if x.endswith('.json')]:
  try:
    match_dict = load_data_from_json(international_loc + filename)
    match = OneDayMatch(match_dict, matchno=filename)
    match_dict = dump(match)
    for team in ['team1', 'team2']:
      player_count = 1
      for player in match_dict[team+'_players']:
        match_dict[team+'_player_%d' % player_count] = player
        player_count += 1
    drop_keys = ['ball_by_ball', 'match', 'update', 'process_match', 'team1_players', 'team2_players']
    for key in drop_keys:
      match_dict.pop(key, None)
    match_df = pd.DataFrame(match_dict, index=[0])
    match_df.to_csv(international_matches_file, mode='a', index=False, header=False)
  except KeyError as e:
    continue

{}
{}
{}
{}


KeyError: None

In [None]:
delivery = match.ball_by_ball[0].next_delivery

while True:
    delivery_dict = dump(delivery)
    print(delivery_dict)
    delivery_dict['batter_on_strike'] = delivery_dict['batters'][delivery_dict['batter_on_strike']]
    delivery_dict['batter_off_strike'] = delivery_dict['batters'][delivery_dict['batter_off_strike']]
    delivery_dict['bowler_on_strike'] = delivery_dict['bowlers'][delivery_dict['bowler_on_strike']]
    keys_to_drop = ['update', 'previous_delivery', 'parent', 'next_delivery', 'batters', 'bowlers']
    for key in list(delivery_dict.keys()):
        if key in keys_to_drop:
            delivery_dict.pop(key, None)
            continue
        if type(delivery_dict[key]) == dict:
            for key2 in delivery_dict[key].keys():
                delivery_dict[key+"_"+key2] = delivery_dict[key][key2]
            delivery_dict.pop(key, None)
    round_2_keys_to_drop = ['batter_off_strike_at_crease', 'batter_off_strike_on_strike', 'batter_on_strike_at_crease', 'batter_on_strike_on_strike', 'bowler_on_strike_at_crease', 'bowler_on_strike_finished_spell']
    for key in list(delivery_dict.keys()):
        if key in round_2_keys_to_drop:
            delivery_dict.pop(key, None)

    try:
        delivery = delivery.next_delivery
    except KeyError:
        break

delivery_dict

{'balls_remaining': 299, 'batter_off_strike': 'NRD Compton', 'batter_on_strike': 'ME Trescothick', 'batters': {'ME Trescothick': {'at_crease': True, 'on_strike': True, 'dismissed': False, 'runs': 0, 'balls': 1}, 'NRD Compton': {'at_crease': True, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'PD Trego': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'CA Ingram': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'JC Hildreth': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'AWR Barrow': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'L Gregory': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'JG Myburgh': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 0}, 'TD Groenewald': {'at_crease': False, 'on_strike': False, 'dismissed': False, 'runs': 0, 'balls': 

KeyError: 'batters'