In [None]:
import json
import pandas as pd
import os

# --- Configuration ---
json_file_path = '/Users/milangabriel/Downloads/gab-score-default-rtdb-export.json'
# Define the output path relative to the script or use an absolute path
csv_output_path = 'firebase_deliveries_output_cleaned.csv'

# --- Columns ---
columns_to_remove = ['localTimestamp', 'timestamp', 'match_id_from_key', 'firebase_delivery_key', 'outcomeSymbol']
desired_column_order = [
    'matchId', 'date', 'battingTeam', 'bowlingTeam', 'deliveryInningsSerial',
    'over', 'ball', 'striker', 'nonStriker', 'bowler', 'runsOffBat',
    'extrasRuns', 'extrasType', 'isNoBall', 'isWide', 'isWicket',
    'wicketType', 'playerDismissed'
]

# --- Main Processing Logic ---
all_deliveries_list = []
df = pd.DataFrame() # Initialize an empty DataFrame

try:
    # --- Load the JSON data ---
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # --- Extract and Flatten Deliveries ---
    if 'matchDeliveries' in data and isinstance(data['matchDeliveries'], dict):
        for match_id, deliveries_dict in data['matchDeliveries'].items():
            if isinstance(deliveries_dict, dict):
                for delivery_key, delivery_data in deliveries_dict.items():
                    if isinstance(delivery_data, dict):
                        # Store original keys for potential filtering/joining later if needed,
                        # before they are removed. Or just add the required ones.
                        delivery_data['match_id_from_key'] = match_id # Keep temporarily
                        delivery_data['firebase_delivery_key'] = delivery_key # Keep temporarily
                        all_deliveries_list.append(delivery_data)
            # No warning print as requested
            # else:
            #    print(f"Warning: Expected dictionary for match '{match_id}', found {type(deliveries_dict)}. Skipping.")
    else:
        print("ERROR: Top-level key 'matchDeliveries' not found or is not a dictionary in the JSON file.")

    # --- Create DataFrame ---
    if all_deliveries_list:
        df = pd.DataFrame(all_deliveries_list)

        # --- Data Cleaning and Reordering ---

        # 1. Remove unwanted columns (handle potential missing columns)
        actual_cols_to_remove = [col for col in columns_to_remove if col in df.columns]
        if actual_cols_to_remove:
            df = df.drop(columns=actual_cols_to_remove)

        # 2. Reorder columns (select only existing columns from the desired order)
        # Ensure all desired columns actually exist in the DataFrame after removals.
        final_columns = [col for col in desired_column_order if col in df.columns]
        # Fill missing columns from the desired order with None or NaN if needed
        for col in desired_column_order:
             if col not in df.columns:
                 df[col] = None # Or pd.NA or np.nan depending on desired type

        df = df[desired_column_order] # Reorder using the full desired list

    else:
        # No need to print "No delivery data found" if no output is desired
        pass # df remains empty

except FileNotFoundError:
    print(f"ERROR: JSON file not found at '{json_file_path}'. Please check the path.")
except json.JSONDecodeError:
    print(f"ERROR: Could not decode JSON from '{json_file_path}'. Is the file valid JSON?")
except Exception as e:
    print(f"An unexpected error occurred during data loading or processing: {e}")
    # df might be partially processed or empty here

# --- Save to CSV ---
if not df.empty:
    try:
        # index=False prevents pandas from writing the DataFrame index as a column
        df.to_csv(csv_output_path, index=False, encoding='utf-8')
        # No success print as requested
        print(f"Successfully saved cleaned DataFrame to CSV: {csv_output_path}")
    except Exception as e:
        print(f"ERROR: Could not save DataFrame to CSV '{csv_output_path}': {e}")
# else:
    # No need to print "DataFrame is empty" if no output is desired
    # print("DataFrame is empty, skipping CSV export.")

# End of script. The 'df' variable holds the final DataFrame in memory if needed,
# but the primary goal was to save the CSV.

Successfully saved cleaned DataFrame to CSV: firebase_deliveries_output_cleaned.csv
