In [2]:
# notebooks/1_Compendium_Data_Exploration.ipynb

import json
import pandas as pd
import os

# --- Configuration ---
# Path is now relative to the notebook's location in the 'notebooks' folder.
file_path = "../data/compendium/data/COMPENDIUM.json"

print(f"--- Exploring Compendium Data ---")
print(f"Attempting to load file from: {file_path}\n")

# --- Step 1: Check if the file exists ---
if not os.path.exists(file_path):
    print(f"ERROR: File not found at '{file_path}'.")
    print("Please ensure you have downloaded the data and placed it in the correct directory.")
else:
    print("File found. Proceeding with analysis.\n")
    
    # --- Step 2: Load the raw JSON data and check its type ---
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            raw_data = json.load(f)
        
        print(f"Successfully loaded JSON data.")
        data_type = type(raw_data)
        print(f"The top-level structure of the JSON is a: {data_type}\n")

        # --- Step 3: Inspect the structure (Dictionary of Dictionaries) ---
        if isinstance(raw_data, dict):
            print(f"The file contains a dictionary with {len(raw_data)} top-level keys.")
            
            # Separate the 'info' key from the actual entries
            info_data = raw_data.pop('info', None) # Safely remove 'info'
            entries_dict = raw_data
            
            if info_data:
                print("\n--- Info Section ---")
                print(json.dumps(info_data, indent=2))
            
            print(f"\nFound {len(entries_dict)} compendium entries.")

            if entries_dict:
                # Get the first entry to inspect its structure
                first_entry_name = list(entries_dict.keys())[0]
                first_entry_data = entries_dict[first_entry_name]
                
                print("\n--- Structure of the First Entry ('{first_entry_name}') ---")
                print(json.dumps(first_entry_data, indent=2))
        
        # --- Step 4: Load into Pandas for a summary ---
        print("\n\n--- Pandas DataFrame Summary ---")
        try:
            # Convert the dictionary of entries into a list of dictionaries
            # We also add the 'name' from the key into the dictionary itself
            entries_list = []
            for name, details in entries_dict.items():
                details['name'] = name # Add the name to the details dict
                entries_list.append(details)

            # Now create the DataFrame from the list
            df = pd.DataFrame(entries_list)
            
            print("DataFrame Info:")
            df.info()
            print("\nDataFrame Head (first 5 rows):")
            print(df.head())

        except Exception as e:
            print(f"An error occurred while creating the Pandas DataFrame: {e}")

    except json.JSONDecodeError:
        print("ERROR: The file is not a valid JSON file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")





--- Exploring Compendium Data ---
Attempting to load file from: ../data/compendium/data/COMPENDIUM.json

File found. Proceeding with analysis.

Successfully loaded JSON data.
The top-level structure of the JSON is a: <class 'dict'>

The file contains a dictionary with 627 top-level keys.

--- Info Section ---
{
  "changes": "Added recipies"
}

Found 626 compendium entries.

--- Structure of the First Entry ('{first_entry_name}') ---
{
  "name": "Honeyvore Bear",
  "description": "This king among animals is dangerous game for even the most seasoned hunters. They'll attack anyone who wanders into their territory regardless of the wanderer's weaponry. As their name implies, they have a natural love of honey. Extreme caution is advised when you spot one, but if you're sneaky enough and maybe just a little bit crazy...you just may be able to ride one.",
  "image": "https://raw.githubusercontent.com/BustyBee/TotK-Compendium-DATA/main/images/Animal_Bear_A_Detail.png",
  "thumbnail": "https://