In [37]:
import os
import json
import glob
import pandas as pd
import polars as pl

In [2]:
def get_json_files(input_folder):
    """
    Returns a list of paths to all JSON files in the specified folder.
    
    Args:
        input_folder (str): Path to the folder to search for JSON files
        
    Returns:
        list: List of full paths to JSON files
    """
    # Make sure the path is normalized
    input_folder = os.path.normpath(input_folder)
    
    # Check if folder exists
    if not os.path.exists(input_folder):
        print(f"Error: Folder '{input_folder}' does not exist")
        return []
    
    # Use glob to find all .json files
    json_files = glob.glob(os.path.join(input_folder, "*.json"))
    
    print(f"Found {len(json_files)} JSON files in '{input_folder}'")
    return json_files

In [3]:
def load_json_file(file_path):
    """
    Loads a JSON file into a Python dictionary.
    
    Args:
        file_path (str): Path to the JSON file
        
    Returns:
        dict or list: The loaded JSON data
        None: If there was an error loading the file
    """
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found")
        return None
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON in '{file_path}': {e}")
        return None
    except Exception as e:
        print(f"Error loading '{file_path}': {e}")
        return None

In [44]:
# file_path = file_paths[-1]

match_df_list = []

file_paths = get_json_files("ipl_json_2025")

for file_path in file_paths:
    match_details ={}
    match_details['path_name'] = file_path.split("/")[-1]
    match = load_json_file(file_path)
    match_info = match['info']
    match_details['city'] = match_info['city']
    match_details['date'] = match_info['dates'][0]
    try:
        match_details['winner'] = match_info['outcome']['winner']
        match_details['winning_margin'] = match_info['outcome']['by']
        try:
            match_details['winning_margin_runs'] = match_info['outcome']['by']['runs']
        except:
            match_details['winning_margin_wickets'] = match_info['outcome']['by']['wickets']
    except:
        match_details['winner'] = match_info['outcome']['eliminator']
        # match_details['winning_margin'] = None
        # match_details['winning_margin_runs'] = None
        print(file_path)
    match_details['player_of_match'] = match_info['player_of_match'][0]
    match_details['venue'] = match_info['venue']
    match_details['team1'] = match_info['teams'][0]
    match_details['team2'] = match_info['teams'][1]
    match_details['toss_winner'] = match_info['toss']['winner']
    match_details['toss_decision'] = match_info['toss']['decision']
    match_df_list.append(match_details)

Found 33 JSON files in 'ipl_json_2025'
ipl_json_2025/1473469.json


In [45]:
match_df = pd.DataFrame(match_df_list)
match_df.head()
match_df.to_csv("data_2025/match_info.csv", index=False)
match_df.to_parquet("data_2025/match_info.parquet", index=False)

In [None]:
# file_path = file_paths[-1]

match_df_list = []

file_paths = get_json_files("ipl_json_2025")


In [36]:
match_details

{'path_name': '1473469.json',
 'city': 'Delhi',
 'date': '2025-04-16',
 'winner': 'Delhi Capitals',
 'player_of_match': 'MA Starc',
 'venue': 'Arun Jaitley Stadium, Delhi',
 'team1': 'Delhi Capitals',
 'team2': 'Rajasthan Royals',
 'toss_winner': 'Rajasthan Royals',
 'toss_decision': 'field'}

In [28]:
match_info['outcome']['eliminator']

'Delhi Capitals'

In [35]:
match['innings'][0].keys()
# match['innings'][0]['team']
match['innings'][0]['overs'][0].keys()
match['innings'][0]['overs'][0]['deliveries']
match['innings'][0]['overs'][0]['deliveries'][0].keys()
match['innings'][0]['overs'][0]['deliveries'][0]['runs']

{'batter': 0, 'extras': 1, 'total': 1}