In [None]:
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
import copy
import pickle

In [None]:
clean_data_path = os.path.join("..", "clean_data") 

In [None]:
with open(os.path.join(clean_data_path, 'pre_compute_match.pkl'), 'rb') as file:
    match_insight_obj = pickle.load(file)

In [None]:
df_team = pd.read_csv(os.path.join(clean_data_path, "team.csv"))
df_team = df_team.loc[:, ~df_team.columns.str.contains('^Unnamed')]
team_id_map = dict(zip(df_team.team_name, df_team.team_id))


team_id_map["Delhi Capitals"] = team_id_map["Delhi Daredevils"]
team_id_map["Punjab Kings"] = team_id_map["Kings XI Punjab"]


In [None]:
df_venue = pd.read_csv(os.path.join(clean_data_path, "venue.csv"))
df_team = df_venue.loc[:, ~df_venue.columns.str.contains('^Unnamed')]
venue_id_map = dict(zip(df_venue.venue_location, df_venue.venue_id))


In [None]:
df_match = pd.read_csv(os.path.join(clean_data_path, "match.csv"))
df_match = df_match.loc[:, ~df_match.columns.str.contains('^Unnamed')]


In [None]:
#General insights for batting team, output [avg 1st inning score, avg PP score, win% batting first, avg 1st score vs opp, PP score vs opp, win% batting first vs opposition]

def match_insight_1(batting_team_id, bowling_team_id, years_list):
    #years_list = list of strings of years to be considered
    #batting_team_id = integer ID of team batting first
    #bowling_team_id = integer ID of team bowling first
    #Output : list of 6 values [avg 1st inning score, avg PP score, win% batting first, avg 1st score vs opp, PP score vs opp, win% batting first vs opposition]  
    
    match_count=0 #variable to store no. of matches to get average
    h2h = 0 #variable to count matches between 2 teams when batting team bats first
    years_string="|".join(years_list)
    total = 0
    pp = 0
    wins =0 #wins for batting team when batting first
    total_opp = 0
    pp_opp = 0
    wins_opp = 0 #wins for batting team when batting first against opposition
    result=[]
    for matches in match_insight_obj[batting_team_id].values():
        if str(matches["match_date"]) in years_string:
            if matches["innings"]==1:
                match_count+=1
                total += matches["batting_insights"]["runs_scored"]["Total"]
                pp += matches["batting_insights"]["runs_scored"]["Powerplay"]
                if matches["match_winner"] == batting_team_id:
                        wins += 1
                if matches["opposition"]==bowling_team_id:
                    h2h +=1
                    total_opp += matches["batting_insights"]["runs_scored"]["Total"]
                    pp_opp += matches["batting_insights"]["runs_scored"]["Powerplay"]
                    if matches["match_winner"] == batting_team_id:
                        wins_opp+=1
    result = [total/match_count, pp/match_count, wins/match_count, total_opp/h2h, pp_opp/h2h, wins_opp/h2h]            
    return result         
            

In [None]:
# Calling match insights 1



In [None]:
# Venue based stats for batting first 
def match_insights_venue1(venue_id, years_list):
    #Input : Venue ID
    #years_list = list of strings of years to be considered
    #Output : List of 3 values [first innings avg score, PP avg score, wins while batting first]
    
    years_string="|".join(years_list)
    total_runs_batting_first = 0
    PP_score_batting_first = 0
    wins_batting_first = 0 #wins for the team batting first in the given venue
    match_count=0
    for team_id in match_insight_obj:
        for matches in match_insight_obj[team_id].values():
            if str(matches["match_date"]) in years_string:
                if matches["venue"]==venue_id:
                    
                    if matches["innings"] == 1:
                        match_count+=1
                        total_runs_batting_first += matches["batting_insights"]["runs_scored"]["Total"]
                        PP_score_batting_first += matches["batting_insights"]["runs_scored"]["Powerplay"]
                        if matches["match_winner"] == team_id:
                            wins_batting_first+=1
    result = [total_runs_batting_first/match_count, PP_score_batting_first/match_count, wins_batting_first/match_count]
    return result

In [None]:
#Calling match_insights_venue


In [None]:
 # bowling team insights while bowling first
def match_insight_2(bowling_team_id, years_list):
    
    years_string="|".join(years_list)
    total_runs_conceded = 0
    PP_runs_conceded = 0
    wins_bowling_first = 0 #wins for the bowling team when they bowled first
    match_count = 0
    for matches in match_insight_obj[bowling_team_id].values():
        if str(matches["match_date"]) in years_string:
            if matches["innings"] == 2:
                match_count+=1
                total_runs_conceded += matches["bowling_insights"]["runs_conceded"]["Total"]
                PP_runs_conceded += matches["bowling_insights"]["runs_conceded"]["Powerplay"]
                if matches["match_winner"] == bowling_team_id:
                        wins_bowling_first += 1
    result = [total_runs_conceded/match_count, PP_runs_conceded/match_count, wins_bowling_first/match_count] 
    return result

In [None]:
#Calling for match_insights_2


In [None]:
#Innings break insight 
def match_insight_3(batting_team_id, bowling_team_id, years_list, total):
    years_string="|".join(years_list)
    wins_batting_team = 0
    wins_bowling_team = 0
    match_count_batting = 0
    match_count_bowling = 0
    # win % for batting team defending that total or less
    for matches in match_insight_obj[batting_team_id].values():
        if str(matches["match_date"]) in years_string:
            if matches["innings"] == 1  and (matches["batting_insights"]["runs_scored"]["Total"]<=total):
                match_count_batting += 1
                if (matches["match_winner"]==batting_team_id):
                    wins_batting_team +=1
    
    #win % for bowling first team to chase that total or more
    for matches in match_insight_obj[bowling_team_id].values():
        if str(matches["match_date"]) in years_string:
            if matches["innings"] == 2  and (matches["bowling_insights"]["runs_conceded"]["Total"]>=total):
                match_count_bowling += 1
                if (matches["match_winner"]==bowling_team_id):
                    wins_bowling_team +=1 
    
    result= [wins_batting_team*100/match_count_batting, wins_bowling_team*100/match_count_bowling]
    return result

In [None]:
# Function call for match_insight_3



In [None]:
# Innings break venue based results
def match_insight_venue2(venue_id, years_list, total):
    years_string="|".join(years_list)
    wins_batting_team = 0
    match_count = 0
    # win % for team batting first at that venue defending that total or less
    for team_id in match_insight_obj:
        for matches in match_insight_obj[team_id].values():
            if str(matches["match_date"]) in years_string:
                if matches["venue"]==venue_id:
                    if matches["innings"] == 1  and (matches["batting_insights"]["runs_scored"]["Total"]<=total):
                        match_count+=1
                        if (matches["match_winner"]==team_id):
                            wins_batting_team +=1
    
    return wins_batting_team/match_count, match_count                        

In [None]:
#Inputs
batting_team= "Chennai Super Kings"
bowling_team = "Mumbai Indians"
venue_location="Chennai"
years=["2020","2019","2018","2017","2016"]
total = 160
venue_id=venue_id_map[venue_location]
batting_team_id = team_id_map[batting_team]
bowling_team_id = team_id_map[bowling_team]

In [None]:
#during toss
output = match_insight_1(batting_team_id, bowling_team_id, years)
print("[avg 1st inning score, avg PP score, win% batting first, avg 1st score vs opp, PP score vs opp, win% batting first vs opposition]")
print(output)

output1 = match_insights_venue1(venue_id, years)
print("List of 3 values at the venue while batting first[first innings avg score, PP avg score, wins while batting first]")
print(output1)

output2 = match_insight_2(bowling_team_id, years)
print("List of 3 values for bowling team while bowling first: [avg runs conceded, avg runs in PP, win% while bowling first]")
print(output2)




In [None]:
#during mid innings break

output3 = match_insight_3(batting_team_id, bowling_team_id, years, total)
print("win % for batting team defending that total or less and win % for bowling first team to chase that total or more")
print(output3)

output4 = match_insight_venue2(venue_id, years, total)
print(" win % for team batting first at that venue defending that total or less")
print(output4)
