In [3]:
# Imports and Paths

import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.ticker import FuncFormatter

JSON_DIR = "../data/raw/IPL_JSON"
OUT_CSV = "../data/processed/ipl_matches_raw.csv"

os.makedirs("../data/processed", exist_ok=True)

In [8]:
# Parse All JSON Files

records = []
skipped = 0
errors = 0

for file in os.listdir(JSON_DIR):
    if not file.endswith(".json"):
        continue
    try:
        with open(os.path.join(JSON_DIR, file), "r", encoding="utf-8") as f:
            match = json.load(f)
        info = match.get("info", {})
        dates = info.get("dates", [])
        season = int(str(dates[0])[:4]) if dates else None

        outcome = info.get("outcome", {})
        winner = outcome.get("winner")

        if season and winner:
            records.append({"season": season, "winner": winner})
        else:
            skipped += 1
    except Exception:
        errors += 1

df = pd.DataFrame(records)
print("Rows:", len(df), "| Skipped:", skipped, "| Errors:", errors)
df.head()

Rows: 1146 | Skipped: 23 | Errors: 0


Unnamed: 0,season,winner
0,2017,Sunrisers Hyderabad
1,2017,Rising Pune Supergiant
2,2017,Kolkata Knight Riders
3,2017,Kings XI Punjab
4,2017,Royal Challengers Bangalore


In [9]:
# Saving CSV (JSON to CSV)

df.to_csv(OUT_CSV, index=False)
print("Saved:", OUT_CSV)

Saved: ../data/processed/ipl_matches_raw.csv


In [10]:
df.columns

Index(['season', 'winner'], dtype='object')