In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Get the top 10 most frequent venues
top_venues = match_info['venue'].value_counts().head(10)

# Plot the most frequent venues
plt.figure(figsize=(12,6))
sns.barplot(x=top_venues.values, y=top_venues.index)
plt.title('Top 10 Most Used Venues')
plt.xlabel('Matches Played')
plt.ylabel('Venue')

# Get the top 10 players of the match winners
top_players = match_info['player_of_match'].value_counts().head(10)

# Plot the top players of the match winners
plt.figure(figsize=(12,6))
sns.barplot(x=top_players.values, y=top_players.index)
plt.title('Top Player of the Match Winners')
plt.xlabel('Matches Won')
plt.ylabel('Players')

# Plot the distribution of matches won by runs
plt.figure(figsize=(10,6))
sns.histplot(match_info['win_by_runs'], bins=30, kde=True)
plt.title('Distribution of Matches Won by Runs')
plt.xlabel('Runs')
plt.ylabel('Frequency')

# Plot the distribution of matches won by wickets
plt.figure(figsize=(10,6))
sns.histplot(match_info['win_by_wickets'], bins=30, kde=True)
plt.title('Distribution of Matches Won by Wickets')
plt.xlabel('Wickets')
plt.ylabel('Frequency')

# Get the toss decision count
toss_decision = match_info['toss_decision'].value_counts()

# Plot the toss decision frequency
plt.figure(figsize=(8,6))
sns.barplot(x=toss_decision.index, y=toss_decision.values)
plt.title('Toss Decision Frequency')
plt.xlabel('Toss Decision')
plt.ylabel('Frequency')

# Get the correlation matrix
correlation_matrix = match_info.corr(numeric_only=True)

# Plot the correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')

# Get the top 10 run-scorers
batsmen_total_runs = match_data.groupby(['striker'])['runs_off_bat'].sum().sort_values(ascending=False).head(10)

# Plot the top 10 batsmen in ODIs
plt.figure(figsize=(12,6))
sns.barplot(x=batsmen_total_runs.values, y=batsmen_total_runs.index)
plt.title('Top 10 Batsmens in ODIs')
plt.xlabel('Scores')
plt.ylabel('Batsman')

# Get the top 10 wicket-takers
top_bowlers_wickets = match_data[match_data['wicket_type'].notnull()].groupby('bowler')['wicket_type'].count().sort_values(ascending=False).head(10)

# Plot the top 10 wicket-takers in ODIs
plt.figure(figsize=(12,6))
sns.barplot(x=top_bowlers_wickets.values, y=top_bowlers_wickets.index)
plt.title('Top 10 Wicket Takers in ODIs')
plt.xlabel('Wickets')
plt.ylabel('Bowler')

# Get the top 10 batsmen and bowlers
top_10_batsmen = batsmen_total_runs.index.tolist()
top_10_bowlers = top_bowlers_wickets.index.tolist()


# List of team rivalries (for example: India vs Pakistan, Australia vs England)
team_rivarlies = [('India', 'Pakistan'), ('Australia', 'England')]

# Initializing a dictionary to store head-to-head performances
rivarly_performance = {}

# Calculating win-loss records for each rivalry
for team1, team2 in team_rivarlies:
    # Filtering matches where team1 played against team2
    head_to_head_matches = match_info[((match_info['team1'] == team1) & (match_info['team2'] == team2)) | 
                                      ((match_info['team1'] == team2) & (match_info['team2'] == team1))]
    # Calculating wins for each team
    team1_wins = head_to_head_matches[head_to_head_matches['winner'] == team1].shape[0]
    team2_wins = head_to_head_matches[head_to_head_matches['winner'] == team2].shape[0]
    # Storing the performance 
    rivarly_performance[f'{team1} vs {team2}'] = {'Wins': [team1_wins, team2_wins]}

# Creating a dataframe for visualization
rivarly_df = pd.DataFrame(rivarly_performance).T

# Extracting the number of wins for team1 and team2
rivarly_df['Team1 Wins'] = rivarly_df['Wins'].apply(lambda x: x[0])
rivarly_df['Team2 Wins'] = rivarly_df['Wins'].apply(lambda x: x[1])

# Plotting the head-to-head performance between teams
rivarly_df[['Team1 Wins', 'Team2 Wins']].plot(kind='bar', stacked=True, figsize=(12,6))
plt.title('Head-to-Head Performance Between Teams')
plt.xlabel('Rivalry')
plt.ylabel('Number of Wins')
plt.xticks(rotation=0)
plt.legend(title='Wins', loc='best')

# Extracting columns for toss decision and winner 
toss_decision = match_info['toss_decision']
winner = match_info['winner']

# Calculating win percentages for different toss decisions
total_matches = len(toss_decision)
bat_wins = sum((toss_decision == 'bat') & (winner == match_info['team1']))
field_wins = sum((toss_decision == 'field') & (winner == match_info['team2']))

# Calculating win percentages for bat and field toss decisions
bat_win_percentage = (bat_wins / total_matches) * 100
field_win_percentage = (field_wins / total_matches) * 100

# Data for the win percentages
win_percentages = [bat_win_percentage, field_win_percentage]
labels = ['Bat', 'Field']
colors = ['#66b3ff', '#ff9999']  # Optional color palette

# Create a pie chart
plt.figure(figsize=(8,6))
plt.pie(win_percentages, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90, wedgeprops={'edgecolor': 'black'})
plt.title('Win Percentage by Toss Decision')
plt.axis('equal')  # Equal aspect ratio ensures that pie chart is drawn as a circle.
plt.show()

# Extracting relevant columns: date, venue, result
selected_columns = ['date', 'venue', 'result']

# Creating a subset of the dataset with the selected columns
subset_data = match_info[selected_columns]

# Calculating match duration in hours 
subset_data['date'] = pd.to_datetime(subset_data['date'])
subset_data['match_duration_hours'] = (subset_data['date'].shift(-1) - subset_data['date']).dt.total_seconds() / 3600

# Plotting the distribution of match durations
plt.figure(figsize=(10, 6))
sns.histplot(subset_data['match_duration_hours'], bins=20, kde=True, color='skyblue')
plt.title('Distribution of Match Durations')
plt.xlabel('Match Duration (hours)')
plt.ylabel('Frequency')

