In [None]:
import pandas as pd
import numpy as np
import json

In [None]:
# Load JSON
with open("player_performance.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(len(data), "players found.")
list(data.keys())[:10]

In [None]:
# Flatten JSON structure
rows = []
for player, pdata in data.items():
    for season, stats in pdata.items():
        row = {"player_name": player, "season": int(season)}
        row.update(stats)
        rows.append(row)

df = pd.DataFrame(rows)
df.head()
df.info()

In [None]:
# Filter for recent seasons (2023-2025)
df = df[df["season"].between(2023, 2025)]
print(df["season"].unique())

In [None]:
# Remove inactive players
df = df[df["Total_Matches"] > 0]
df.shape

In [None]:
# Check for missing values
numeric_cols = df.select_dtypes(include='number').columns
df[numeric_cols] = df[numeric_cols].fillna(0)

In [None]:
# Standardize player names
df["player_name"] = df["player_name"].str.strip().str.title()

In [None]:
df.query("player_name.str.contains('Rg Sharma')")

In [None]:
df.to_csv("final_clean_ipl_2023_2025.csv", index=False)