In [None]:
import pandas as pd # type: ignore
import numpy as np #type: ignore
import matplotlib as plot # type: ignore

In [None]:
# import the dataset

df = pd.read_csv("matches.csv")
df.head()

In [None]:
# number of super overs played in ipl

df["super_over"].value_counts()

In [None]:
# replace "Rising Pune Supergiant" with "Rising Pune Supergiants" as both are same

df.loc[df['team2'] == "Rising Pune Supergiant", 'team2'] = 'Rising Pune Supergiants'
df.loc[df['team1'] == "Rising Pune Supergiant", 'team1'] = 'Rising Pune Supergiants'

In [None]:
# players who have won the most man of the match

df['player_of_match'].value_counts().head(10)

In [None]:
# different types of matches played since the beginning

df['match_type'].value_counts()

In [None]:
# teams that have played in final with their counts

final_matches = df[df["match_type"] == "Final"]

teams_in_final = pd.concat([final_matches["team1"], final_matches["team2"]])

final_teams_count = teams_in_final.value_counts()

all_teams = set(df["team1"]).union(set(df["team2"]))

final_teams_count = final_teams_count.reindex(all_teams, fill_value=0)

final_teams_count_sorted = final_teams_count.sort_values(ascending=False)

print(final_teams_count_sorted)

In [None]:
df['venue'].value_counts().head()

In [None]:
df[df['venue'] == "Wankhede Stadium"].head()

In [None]:
df[df['venue'] == "Wankhede Stadium, Mumbai"].head()

In [None]:
df['season'].value_counts()

# replace years in wrong format

df.loc[df.season == '2009/10','season'] = 2010
df.loc[df.season == '2007/08','season'] = 2008
df.loc[df.season == '2020/21','season'] = 2020

In [None]:
# find umpires under whom mumbai indians have won

print(df[df['winner'] == 'Mumbai Indians']['umpire1'].value_counts().head(10))
df[df['winner'] == 'Mumbai Indians']['umpire2'].value_counts().head(10)

In [None]:
# to check how much does winning the toss convert to victory

all_teams = df["team1"].unique().tolist()

d = {}

for team in all_teams:
    # win the toss and win the match
    count_win = df[(df["toss_winner"] == team) & (df["winner"] == team)].shape[0]
    # win the toss but lose the match
    count_loss = df[(df["toss_winner"] == team) & (df["winner"] != team)].shape[0]
    # ratio of games won when toss is won against total games when toss is won
    print(f"{team.ljust(max_team_name_length)} : {round(count_win/(count_win + count_loss),2)}")

In [None]:
# which team has won the most tosses compared to games played

total_matches_per_team = {}
total_toss_wins_per_team = {}

for team in all_teams:
    matches_played = df[(df['team1'] == team) | (df['team2'] == team)].shape[0]
    total_matches_per_team[team] = matches_played
    tosses_won = df[df['toss_winner'] == team].shape[0]
    total_toss_wins_per_team[team] = tosses_won

sorted_teams = sorted(total_toss_wins_per_team.items(), key=lambda x: x[1] / total_matches_per_team[x[0]], reverse=True)
max_team_name_length = max(len(team) for team in total_matches_per_team.keys())

for team, toss_wins in sorted_teams:
    toss_win_ratio = toss_wins / total_matches_per_team[team]
    print(f'{team.ljust(max_team_name_length)} : {toss_win_ratio:.3f}')

In [None]:
total_matches_per_team = dict(sorted(total_matches_per_team.items(), key=lambda item: item[1], reverse=True))
max_team_name_length = max(len(team) for team in total_matches_per_team.keys())

for team, matches_played in total_matches_per_team.items():
    print(f'{team.ljust(max_team_name_length)} : {matches_played}')


In [None]:
team_toss_decisions = {}

for team in all_teams:
    team_toss_df = df[df['toss_winner'] == team]
    toss_decisions_count = team_toss_df['toss_decision'].value_counts()
    team_toss_decisions[team] = toss_decisions_count

for team, toss_decisions in team_toss_decisions.items():
    print(team)
    print(toss_decisions)


In [None]:
# determine relationship betweeen winning the toss and winning/losing

w = df[df['toss_winner'] == df['winner']].shape[0]
print(f'number of matches where toss winner wins the match {w}\n')

l = df[df['toss_winner'] != df['winner']].shape[0]
print(f'number of matches where toss winner loses the match {l}')

In [None]:
team_to_victory_ratio = {}

for team in all_teams:
    toss_won = sum(df['toss_winner'] == team)
    match_and_toss_won = sum((df['winner'] == team) & (df['toss_winner'] == team))
    
    if toss_won != 0:  # to avoid division by zero
        ratio = round(match_and_toss_won / toss_won, 3)
    else:
        ratio = 0
        
    team_to_victory_ratio[team] = ratio

team_to_victory_ratio_sorted = dict(sorted(team_to_victory_ratio.items(), key=lambda item: item[1], reverse=True))

for team, ratio in team_to_victory_ratio_sorted.items():
    print(f'{team.ljust(max_team_name_length)} : {ratio:.3f}')


In [None]:
# highest victory margins

sorted_df = df.sort_values(by='result_margin', ascending=False)

sorted_df[['result_margin','team1','team2','winner','date']].head()
