# NumPy and Pandas Notebook
Use this notebook to experiment with NumPy arrays and Pandas DataFrames.

## IPL Dataset

We load the IPL `matches` and `deliveries` datasets directly from GitHub and use them for all analyses below.

In [None]:
import pandas as pd

matches_url = "https://raw.githubusercontent.com/anirudh027/IPL-Analysis/master/matches.csv"
deliveries_url = "https://raw.githubusercontent.com/anirudh027/IPL-Analysis/master/deliveries.csv"

matches = pd.read_csv(matches_url)
deliveries = pd.read_csv(deliveries_url)

matches.head()

## IPL Questions 1–18 (Analysis)

Run the cell below to compute answers for Questions 1–18 using the loaded IPL dataset.

In [None]:
# Q1: Total number of matches in 2008
matches_2008 = matches[matches["season"] == 2008]
print("Q1 - Total matches in 2008:", len(matches_2008))

# Q2 & Q3: City-wise counts and cities with max/min matches
city_counts = matches["city"].value_counts()
print("\nQ2 & Q3 - City-wise match counts:")
print(city_counts)

max_city = city_counts.idxmax()
max_city_matches = city_counts.max()
min_city = city_counts.idxmin()
min_city_matches = city_counts.min()
print("\nCity with maximum matches:", max_city, "(", max_city_matches, ")")
print("City with minimum matches:", min_city, "(", min_city_matches, ")")

# Q4: Tally the toss decisions each team has taken
print("\nQ4 - Toss decisions by team (bat/field):")
toss_decisions = (
    matches
    .groupby(["toss_winner", "toss_decision"])
    .size()
    .unstack(fill_value=0)
    .sort_index()
)
print(toss_decisions)

# Q5: Count normal vs tied matches
print("\nQ5 - Match result counts (normal/tie/etc.):")
result_counts = matches["result"].value_counts()
print(result_counts)

# Q6: Teams where result was a tie
print("\nQ6 - Teams involved in tied matches:")
tie_matches = matches[matches["result"] == "tie"]
teams_in_ties = set(tie_matches["team1"]).union(set(tie_matches["team2"]))
print(teams_in_ties)

# Q7: Team which won by highest and lowest number of runs (run wins only)
run_wins = matches[matches["win_by_runs"] > 0]
max_run = run_wins["win_by_runs"].max()
min_run = run_wins["win_by_runs"].min()
max_run_match = run_wins[run_wins["win_by_runs"] == max_run].iloc[0]
min_run_match = run_wins[run_wins["win_by_runs"] == min_run].iloc[0]
print("\nQ7 - Highest run win:", max_run, "runs by", max_run_match["winner"])
print("Q7 - Lowest run win:", min_run, "runs by", min_run_match["winner"])

# Q8: Mean, median, std of win_by_runs
win_by_runs = matches["win_by_runs"]
mean_runs = win_by_runs.mean()
median_runs = win_by_runs.median()
std_runs = win_by_runs.std()
print("\nQ8 - win_by_runs stats:")
print("Mean:", mean_runs)
print("Median:", median_runs)
print("Std dev:", std_runs)

# Q9: Venues for highest and lowest run wins
max_run_venue = max_run_match["venue"]
min_run_venue = min_run_match["venue"]
print("\nQ9 - Venue of highest run win:", max_run_venue)
print("Q9 - Venue of lowest run win:", min_run_venue)

# Q10: Players with Player of the Match > 3 times
print("\nQ10 - Players with Player of the Match awards > 3:")
pom_counts = matches["player_of_match"].value_counts()
pom_more_than_3 = pom_counts[pom_counts > 3]
print(pom_more_than_3)

# Q11: Deliveries where batsman scored a six
print("\nQ11 - Sample of deliveries where batsman scored a six:")
sixes = deliveries[deliveries["batsman_runs"] == 6]
print(sixes.head())

# Q12: Average runs scored in matches at each venue
print("\nQ12 - Average total runs per match for each venue:")
runs_per_match = (
    deliveries
    .groupby("match_id")["total_runs"]
    .sum()
    .reset_index()
    .rename(columns={"total_runs": "match_total_runs"})
)
match_info = matches[["id", "venue"]].rename(columns={"id": "match_id"})
runs_with_venue = runs_per_match.merge(match_info, on="match_id", how="left")
avg_runs_by_venue = (
    runs_with_venue
    .groupby("venue")["match_total_runs"]
    .mean()
    .sort_values(ascending=False)
)
print(avg_runs_by_venue)

# Q13: Umpires who umpired the maximum number of times
print("\nQ13 - Umpire match counts (top few and max):")
umpires = pd.concat([matches["umpire1"], matches["umpire2"]], ignore_index=True)
umpires = umpires.dropna()
umpire_counts = umpires.value_counts()
max_umpire_count = umpire_counts.max()
top_umpires = umpire_counts[umpire_counts == max_umpire_count]
print("All umpire counts (head):")
print(umpire_counts.head())
print("\nUmpire(s) with max matches:")
print(top_umpires)

# Q14: Total number of matches played in each season
print("\nQ14 - Matches per season:")
matches_per_season = matches["season"].value_counts().sort_index()
print(matches_per_season)

# Q15: Total runs scored in each season
print("\nQ15 - Total runs per season:")
season_info = matches[["id", "season"]].rename(columns={"id": "match_id"})
runs_with_season = runs_per_match.merge(season_info, on="match_id", how="left")
runs_per_season = (
    runs_with_season
    .groupby("season")["match_total_runs"]
    .sum()
    .sort_index()
)
print(runs_per_season)

# Q16: Total runs scored by each batsman (top 10)
print("\nQ16 - Top 10 batsmen by total runs:")
runs_by_batsman = (
    deliveries
    .groupby("batsman")["batsman_runs"]
    .sum()
    .sort_values(ascending=False)
)
print(runs_by_batsman.head(10))

# Q17: Total wickets taken by each bowler
print("\nQ17 - Wickets by bowler:")
dismissals = deliveries[deliveries["player_dismissed"].notna()]
non_bowler_dismissals = ["run out", "retired hurt", "obstructing the field"]
bowler_wickets = dismissals[~dismissals["dismissal_kind"].isin(non_bowler_dismissals)]
wickets_by_bowler = bowler_wickets["bowler"].value_counts()
print(wickets_by_bowler.head(20))

# Q18: Batting averages (top 10)
print("\nQ18 - Top 10 batting averages (min 1 dismissal):")
runs_per_batsman = deliveries.groupby("batsman")["batsman_runs"].sum()
outs = (
    deliveries[deliveries["player_dismissed"].notna()]
    .groupby("player_dismissed")["player_dismissed"]
    .count()
)
outs.name = "outs"

batting = pd.concat([runs_per_batsman, outs], axis=1).fillna(0)
batting = batting[batting["outs"] > 0]

batting["average"] = batting["batsman_runs"] / batting["outs"]
top_10_averages = batting.sort_values("average", ascending=False).head(10)
print(top_10_averages)