In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [2]:
data = pd.read_csv('IPL_Matches_2008_2022.csv')
data.head()
data = data[data['Date'] > '2013-01-01']

data = data[['ID', 'Venue', 'City', 'Team1', 'Team2', 'TossWinner', 'TossDecision', 'WinningTeam', 'WonBy', "Margin"]]
data.head()

data_2023 = pd.read_csv('each_match_records_2023.csv')
data_2023 = data_2023[['team1', 'team2', 'winner']]
data_2023 = data_2023.rename(columns={'team1':'Team1', 'team2':'Team2', 'winner':'WinningTeam'})

data = data[['Team1', 'Team2', 'WinningTeam']]
df = pd.concat([data, data_2023])
df = df.replace({'Delhi Daredevils': 'Delhi Capitals',\
                  'Kings XI Punjab': 'Punjab Kings',
                  'Rising Pune Supergiant': 'Rising Pune Supergiants'})
df = df.drop(df[df.apply(lambda row: 'Pune Warriors' in row.values, axis=1)].index)
df = df.drop(df[df.apply(lambda row: 'Gujarat Lions' in row.values, axis=1)].index)
df = df.drop(df[df.apply(lambda row: 'Rising Pune Supergiants' in row.values, axis=1)].index)

df_copy = df.copy()
df_copy = df_copy.rename(columns = {'Team1':'Team2', 'Team2':'Team1'})
df = pd.concat([df, df_copy])

In [3]:

# Group the dataframe by team pairs and calculate the win/loss percentages for each team pair
team_pairs = df.groupby(['Team1', 'Team2']).size().reset_index(name='TotalGames')
team_wins = df[(df['WinningTeam'] == df['Team1'])].groupby(['Team1', 'Team2']).size().reset_index(name='Wins')
team_pairs = pd.merge(team_pairs, team_wins, on=['Team1', 'Team2'], how='left')
team_pairs['Losses'] = team_pairs['TotalGames'] - team_pairs['Wins']
team_pairs['WinPercentage'] = (team_pairs['Wins'] / team_pairs['TotalGames']) * 100
team_pairs['LossPercentage'] = (team_pairs['Losses'] / team_pairs['TotalGames']) * 100
team_pairs[team_pairs['Team1'] == 'Chennai Super Kings']

# Round the win/loss percentages to two decimal places
team_pairs['WinPercentage'] = team_pairs['WinPercentage'].round(2)
team_pairs['LossPercentage'] = team_pairs['LossPercentage'].round(2)

# Pivot the data to create a new dataframe with teams as rows and columns, and fill missing values with 0
win_loss_ratio = team_pairs.pivot_table(values='WinPercentage', index='Team1', columns='Team2', fill_value=0)
# Convert win/loss ratio dataframe to node-link JSON format
node_link_data = {
    'nodes': [],
    'links': []
}
teams = {}
# Create nodes
for idx, team in enumerate(win_loss_ratio.columns):
    teams[team] = idx + 1
    node = {'id': idx + 1, 'team': team}
    node_link_data['nodes'].append(node)

# Create links
for i, team1 in enumerate(win_loss_ratio.index):
    for j, team2 in enumerate(win_loss_ratio.columns):
        if i != j:
            win_ratio = win_loss_ratio.iloc[i, j]
            link = {'source': teams[team1], 'target': teams[team2], 'win_ratio': win_ratio}
            node_link_data['links'].append(link)

unique_data = []
unique_combinations = set()

for entry in node_link_data['links']:
    source = entry['source']
    target = entry['target']
    reverse_combination = (target, source)

    if (source, target) not in unique_combinations and reverse_combination not in unique_combinations:
        unique_data.append(entry)
        unique_combinations.add((source, target))

node_link_data['links'] = unique_data

In [4]:
# Save the node-link JSON data to a file
import json
with open('node_link_data.json', 'w') as f:
    json.dump(node_link_data, f, indent=4)

In [5]:
data_2023 = pd.read_csv('each_match_records_2023.csv')
data_2023 = data_2023[['team1', 'team2', 'winner_runs','winner', 'winner_wickets']]
data_2023 = data_2023.rename(columns={'team1':'Team1', 'team2':'Team2', 'winner':'WinningTeam'})
data_2023 = data_2023.groupby('WinningTeam').mean().rename(columns={'winner_runs':'Runs', 'winner_wickets':'Wickets'}).reset_index()

In [6]:
data = pd.read_csv('IPL_Matches_2008_2022.csv')
data = data[data['WonBy'] != 'SuperOver']
data = data[data['Date'] > '2013-01-01']

data = data[['Team1', 'Team2', 'WinningTeam', 'WonBy', "Margin"]]
data = data.replace({'Delhi Daredevils': 'Delhi Capitals',\
                  'Kings XI Punjab': 'Punjab Kings',
                  'Rising Pune Supergiant': 'Rising Pune Supergiants'})
data = data.pivot_table(index = 'WinningTeam', columns='WonBy', values = 'Margin', aggfunc='mean').reset_index()
data = data.replace({'Delhi Daredevils': 'Delhi Capitals',\
                  'Kings XI Punjab': 'Punjab Kings',
                  'Rising Pune Supergiant': 'Rising Pune Supergiants'})
data = data.drop(data[data.apply(lambda row: 'Pune Warriors' in row.values, axis=1)].index)
data = data.drop(data[data.apply(lambda row: 'Gujarat Lions' in row.values, axis=1)].index)
data = data.drop(data[data.apply(lambda row: 'Rising Pune Supergiants' in row.values, axis=1)].index)
data.iloc[0]

WonBy
WinningTeam    Chennai Super Kings
Runs                      36.53125
Wickets                   5.738095
Name: 0, dtype: object

In [7]:
tabular = []
for i in range(len(data)):
    stuff = data.iloc[i]
    team = stuff['WinningTeam']
    runs = stuff['Runs'].round(2)
    wickets = stuff['Wickets'].round(2)
    tabular.append({'Team':team, 'Runs':runs, 'Wickets':wickets})

with open("tabular.json", "w") as final:
   json.dump(tabular, final)

In [8]:
# map stuff
data = pd.read_csv('IPL_Matches_2008_2022.csv')
data_2023 = pd.read_csv('each_match_records_2023.csv')
print(data_2023.columns)
data = data[['City', 'Season', 'Team1', 'Team2','TossWinner','TossDecision', 'WinningTeam']]

data_2023 = data_2023[['season', 'location', 'team1', 'team2', 'toss_won', 'toss_decision', 'winner']]
data_2023 = data_2023.rename(columns = {'season': "Season", 'location':'City', 'team1':'Team1', 'team2':'Team2', 'toss_won':'TossWinner', 'toss_decision':'TossDecision', 'winner':'WinningTeam'})
data = pd.concat([data, data_2023])

Index(['season', 'date', 'match_number', 'match_type', 'venue', 'location',
       'team1', 'team2', 'toss_won', 'toss_decision', 'umpire1', 'umpire2',
       'reserve_umpire', 'match_referee', 'winner', 'winner_runs',
       'winner_wickets', 'man_of_match'],
      dtype='object')


In [11]:
def isBatWin(df):
    df['isBatWin'] = ((df['TossDecision'] == 'bat') & (df['WinningTeam'] == df['TossWinner']) | (df['TossDecision'] == 'field') & (df['WinningTeam'] != df['TossWinner']))
    df['isFieldWin'] = df['isBatWin'] == False
    return df

data = isBatWin(data)
cities = data['City'].unique()
city_to_states = {'Ahmedabad': 'Gujarat',
 'Kolkata': 'West Bengal',
 'Mumbai': 'Maharashtra',
 'Navi Mumbai': 'Maharashtra',
 'Pune': 'Maharashtra',
 'Delhi': 'Delhi',
 'Chennai': 'Tamil Nadu',
 'Hyderabad': 'Telangana',
 'Visakhapatnam': 'Andhra Pradesh',
 'Chandigarh': 'Punjab',
 'Bengaluru': 'Karnataka',
 'Jaipur': 'Rajasthan',
 'Indore': 'Madhya Pradesh',
 'Bangalore': 'Karnataka',
 'Kanpur': 'Uttar Pradesh',
 'Rajkot': 'Gujarat',
 'Raipur': 'Chattisgarh',
 'Ranchi': 'Jarkhand',
 'Cuttack': 'Odisha',
 'Dharamsala': 'Himachal Pradesh',
 'Kochi': 'Kerala',
 'Nagpur': 'Maharashtra',
 'Lucknow': 'Uttar Pradesh',
 'Guwahati': 'Assam'}
data['City'].replace(city_to_states, inplace = True)
data['City'].unique()

drop_cities = ['Dubai', 'Sharjah',
       'Abu Dhabi', np.nan, 'Johannesburg',
       'Centurion', 'Durban', 'Bloemfontein', 'Port Elizabeth',
       'Kimberley', 'East London', 'Cape Town']

data = data[~data['City'].isin(drop_cities)]
data = data.replace({True:1.0, False:0.0})

In [12]:
map_dataset = []
for i in range(len(data)):
    map_data = {}
    map_data['State'] = data.iloc[i]['City']
    map_data['Season'] = data.iloc[i]['Season']
    map_data['isBatWin'] = data.iloc[i]['isBatWin']
    map_data['isFieldWin'] = data.iloc[i]['isFieldWin']
    map_dataset.append(map_data)


In [14]:
with open("map_dataset.json", "w") as final:
   json.dump(map_dataset, final)

In [21]:
data[data['City'] == 'Maharashtra'].groupby(['Season', 'City']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,isBatWin,isFieldWin
Season,City,Unnamed: 2_level_1,Unnamed: 3_level_1
2023,Maharashtra,0.285714,0.714286
2007/08,Maharashtra,0.3,0.7
2009/10,Maharashtra,0.625,0.375
2011,Maharashtra,0.3125,0.6875
2012,Maharashtra,0.529412,0.470588
2013,Maharashtra,0.75,0.25
2014,Maharashtra,0.428571,0.571429
2015,Maharashtra,0.733333,0.266667
2016,Maharashtra,0.125,0.875
2017,Maharashtra,0.4,0.6
