In [None]:
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
import copy
import pickle

In [None]:
# Config variables
clean_data_path = os.path.join("..", "clean_data") 

In [None]:
df_player = pd.read_csv(os.path.join(clean_data_path, "player.csv"))
df_player = df_player.loc[:, ~df_player.columns.str.contains('^Unnamed')]

In [None]:
df_team = pd.read_csv(os.path.join(clean_data_path, "team.csv"))
df_team = df_team.loc[:, ~df_team.columns.str.contains('^Unnamed')]
team_id_map = dict(zip(df_team.team_name, df_team.team_id))

team_id_map["Delhi Capitals"] = team_id_map["Delhi Daredevils"]
team_id_map["Punjab Kings"] = team_id_map["Kings XI Punjab"]

In [None]:
df_match = pd.read_csv(os.path.join(clean_data_path, "match.csv"))
df_match = df_match.loc[:, ~df_match.columns.str.contains('^Unnamed')]
teams_id=set(df_match["team_1"])
teams_names=df_team.loc[df_team.team_id.isin(teams_id)]



In [None]:
df_ball = pd.read_csv(os.path.join(clean_data_path, "ball.csv"))
df_ball = df_ball.loc[:, ~df_ball.columns.str.contains('^Unnamed')]


In [None]:
empty_match_object={
    "batting_insights":{"runs_scored" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "wickets" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "num_4s" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "num_6s" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "dot_balls" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0}},
    "bowling_insights":{"runs_conceded" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "wickets_taken" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "num_4s_conceded" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "num_6s_conceded" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0},
    "dot_balls_bowled" : {"Powerplay" : 0, "Middle_overs" : 0, "Death" : 0, "Total" : 0}},
    "innings":1,
    "match_winner":0,
    "venue":0,
    "opposition": 0,
    "match_date": 0
    }

In [None]:
match_insight_obj={}

In [None]:
ball_temp=df_ball

for i,row in tqdm(ball_temp.iterrows()):
    match_id = int(row["match_id"])
    innings_number = int(row["innings_number"])
    required_match=df_match.loc[df_match["match_id"] == match_id]
    if innings_number==1:
        batting_team=required_match["batting_team"].iloc[0]
        bowling_team=required_match["chasing_team"].iloc[0]
    else:
        batting_team=required_match["chasing_team"].iloc[0]
        bowling_team=required_match["batting_team"].iloc[0]
    
    try:
        match_insight_obj[batting_team]
    except Exception as e:
        match_insight_obj[batting_team] = {}
    
    try:
        match_insight_obj[bowling_team]
    except Exception as e:
        match_insight_obj[bowling_team] = {}
        
    try: 
        match_insight_obj[batting_team][match_id]
    except Exception as e:
        match_insight_obj[batting_team][match_id] = copy.deepcopy(empty_match_object)    
    
    try: 
        match_insight_obj[bowling_team][match_id]
    except Exception as e:
        match_insight_obj[bowling_team][match_id] = copy.deepcopy(empty_match_object)   
    
    
    #runs scored
    total_runs=row["total_runs"]
    if row["ball_number"]<6.0:
        match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Powerplay"]+=total_runs
        match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Powerplay"]+=total_runs
    elif (row["ball_number"]>6.0) and (row["ball_number"]<15.0):
        match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Middle_overs"]+=total_runs
        match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Middle_overs"]+=total_runs
    else:
        match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Death"]+=total_runs
        match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Death"]+=total_runs
    match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Total"] = match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Death"] + match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Middle_overs"] + match_insight_obj[batting_team][match_id]["batting_insights"]["runs_scored"]["Powerplay"]    
    match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Total"] = match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Death"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Middle_overs"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["runs_conceded"]["Powerplay"]
    
    #wickets    
    if not np.isnan(row["player_dismissed"]):
        if row["ball_number"]<6.0:
            match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Powerplay"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Powerplay"]+=1
        elif (row["ball_number"]>6.0) and (row["ball_number"]<15.0):
            match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Middle_overs"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Middle_overs"]+=1
        else:
            match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Death"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Death"]+=1
    match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Total"] = match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Death"] + match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Middle_overs"] + match_insight_obj[batting_team][match_id]["batting_insights"]["wickets"]["Powerplay"]    
    match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Total"] = match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Death"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Middle_overs"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["wickets_taken"]["Powerplay"]
    
    #number of 4's for team (i.e including 4legbyes)
    if row["total_runs"]==4:
        if row["ball_number"]<6.0:
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Powerplay"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Powerplay"]+=1
        elif (row["ball_number"]>6.0) and (row["ball_number"]<15.0):
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Middle_overs"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Middle_overs"]+=1
        else:
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Death"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Death"]+=1
    match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Total"] = match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Death"] + match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Middle_overs"] + match_insight_obj[batting_team][match_id]["batting_insights"]["num_4s"]["Powerplay"]    
    match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Total"] = match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Powerplay"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Middle_overs"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_4s_conceded"]["Death"]
    
    #number of 6s   
    if row["total_runs"]==6:
        if row["ball_number"]<6.0:
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Powerplay"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Powerplay"]+=1
        elif (row["ball_number"]>6.0) and (row["ball_number"]<15.0):
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Middle_overs"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Middle_overs"]+=1
        else:
            match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Death"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Death"]+=1
    match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Total"] = match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Death"] + match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Middle_overs"] + match_insight_obj[batting_team][match_id]["batting_insights"]["num_6s"]["Powerplay"]    
    match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Total"] = match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Powerplay"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Middle_overs"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["num_6s_conceded"]["Death"]    
    
    #number of dot balls of legal deliveries
    if row["total_runs"]==0:
        if row["ball_number"]<6.0:
            match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Powerplay"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Powerplay"]+=1
        elif (row["ball_number"]>6.0) and (row["ball_number"]<15.0):
            match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Middle_overs"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Middle_overs"]+=1
        else:
            match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Death"]+=1
            match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Death"]+=1
    match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Total"] = match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Death"] + match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Middle_overs"] + match_insight_obj[batting_team][match_id]["batting_insights"]["dot_balls"]["Powerplay"]
    match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Total"] = match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Death"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Middle_overs"] + match_insight_obj[bowling_team][match_id]["bowling_insights"]["dot_balls_bowled"]["Powerplay"]
    
    match_insight_obj[batting_team][match_id]["innings"]=innings_number
    try:
        match_insight_obj[batting_team][match_id]["match_winner"]=int(required_match["match_winner"].iloc[0])
    except Exception as e:
        match_insight_obj[batting_team][match_id]["match_winner"]=-1   
    match_insight_obj[batting_team][match_id]["venue"]=required_match["venue_id"].iloc[0]
    match_insight_obj[batting_team][match_id]["match_date"]=required_match["match_date"].iloc[0][2:6]
    match_insight_obj[batting_team][match_id]["opposition"]=bowling_team
    

In [None]:
with open(os.path.join(clean_data_path, 'pre_compute_match.pkl'), 'wb') as file:
    pickle.dump(match_insight_obj, file, pickle.HIGHEST_PROTOCOL)