# Install Required Libraries
Use pip to install the necessary libraries, such as matplotlib and pandas.

In [None]:
# Install Required Libraries
!pip install matplotlib pandas

# Import Required Libraries
Import the necessary libraries, including pandas and matplotlib.

In [None]:
# Import Required Libraries

import pandas as pd  # Importing pandas for data manipulation and analysis
import matplotlib.pyplot as plt  # Importing matplotlib for data visualization

# Load Data Files
Load all data files from the data/processed/disney_lorcana directory into pandas DataFrames.

In [None]:
import os  # Importing os for directory and file operations

# Define the directory containing the data files
data_directory = 'data/processed/disney_lorcana'

# List all files in the data directory
data_files = os.listdir(data_directory)

# Initialize an empty dictionary to store DataFrames
data_frames = {}

# Load each file into a pandas DataFrame and store it in the dictionary
for file in data_files:
    file_path = os.path.join(data_directory, file)
    if file.endswith('.csv'):  # Check if the file is a CSV file
        df_name = file.split('.')[0]  # Use the file name (without extension) as the DataFrame name
        data_frames[df_name] = pd.read_csv(file_path)  # Read the CSV file into a DataFrame

# Display the names of the loaded DataFrames
data_frames.keys()

# Display Basic Information
Display the first few rows and basic information (e.g., column names, data types) for each DataFrame.

In [None]:
# Display Basic Information

# Iterate through each DataFrame in the dictionary
for name, df in data_frames.items():
    print(f"DataFrame: {name}")  # Print the name of the DataFrame
    display(df.head())  # Display the first few rows of the DataFrame
    display(df.info())  # Display basic information about the DataFrame
    print("\n")  # Print a newline for better readability between DataFrames

# Summary Statistics
Generate summary statistics for each DataFrame, including mean, median, and standard deviation.

In [None]:
# Summary Statistics

# Iterate through each DataFrame in the dictionary to generate summary statistics
for name, df in data_frames.items():
    print(f"Summary Statistics for DataFrame: {name}")  # Print the name of the DataFrame
    
    # Calculate and display mean, median, and standard deviation for each numeric column
    summary_stats = df.describe().T  # Transpose for better readability
    summary_stats['median'] = df.median()  # Add median to the summary statistics
    
    display(summary_stats)  # Display the summary statistics
    print("\n")  # Print a newline for better readability between DataFrames

# Visualize Data
Create basic visualizations (e.g., histograms, box plots) to explore the distribution of data in each DataFrame.

In [None]:
# Visualize Data

# Iterate through each DataFrame in the dictionary to create visualizations
for name, df in data_frames.items():
    print(f"Visualizations for DataFrame: {name}")  # Print the name of the DataFrame
    
    # Create histograms for each numeric column
    df.hist(figsize=(10, 10))
    plt.suptitle(f'Histograms for {name}', fontsize=16)
    plt.show()
    
    # Create box plots for each numeric column
    df.plot(kind='box', subplots=True, layout=(3, 3), figsize=(15, 10), title=f'Box Plots for {name}')
    plt.suptitle(f'Box Plots for {name}', fontsize=16)
    plt.show()
    
    print("\n")  # Print a newline for better readability between DataFrames