# Importing Necessary Libraries

In [None]:
# Import Necessary Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

: 

# Dataset Loading and Preprocessing

Load the dataset from the CSV file and preprocess it. This includes:
1. Converting the pixel data into numerical arrays.
2. Mapping emotion labels to corresponding names (e.g., 0 → "Angry", 1 → "Disgust").

In [None]:
# Load the Dataset
def load_dataset(path):
    """Load the dataset from a CSV file."""
    return pd.read_csv(path)

# Preprocess the Dataset
def preprocess_dataset(df):
    """Preprocess the dataset."""
    # Convert pixel data into numerical arrays
    df['pixels'] = df['pixels'].apply(lambda x: [int(p) for p in x.split()])
    
    # Assign emotion labels
    emotion_labels = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
    df['emotion'] = df['emotion'].map({
        0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 
        4: 'Sad', 5: 'Surprise', 6: 'Neutral'
    })
    
    return df


# Visualizations

Visualize different aspects of the dataset:
1. **Emotion Distribution**: The count of each emotion in the dataset.
2. **Pixel Intensity Distribution**: The distribution of pixel intensities across the dataset.
3. **Sample Images**: A few sample images from the dataset along with their emotion labels.

In [None]:
# Plot Emotion Distribution
def plot_emotion_distribution(df):
    """Plot the distribution of emotions."""
    plt.figure(figsize=(10, 6))
    sns.countplot(data=df, x='emotion', order=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'])
    plt.title('Emotion Distribution')
    plt.xlabel('Emotion')
    plt.ylabel('Count')
    plt.show()

# Plot Image Pixel Intensity Distribution
def plot_pixel_intensity_distribution(df):
    """Plot the pixel intensity distribution across the entire dataset."""
    all_pixels = np.concatenate(df['pixels'].values)
    plt.figure(figsize=(10, 6))
    sns.histplot(all_pixels, bins=50, kde=True, color='blue')
    plt.title('Pixel Intensity Distribution')
    plt.xlabel('Pixel Intensity')
    plt.ylabel('Frequency')
    plt.show()

# Plot Sample Images from the Dataset
def plot_sample_images(df, num_samples=5):
    """Plot a few sample images from the dataset."""
    sample_data = df.sample(num_samples)
    plt.figure(figsize=(10, 10))
    
    for i, (index, row) in enumerate(sample_data.iterrows()):
        image = np.array(row['pixels']).reshape(48, 48)
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(image, cmap='gray')
        plt.title(row['emotion'])
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()


# Main Execution

Load the raw dataset, preprocess it, and visualize various aspects of the data.
1. Load the raw dataset from the CSV file.
2. Preprocess the data to convert pixel data and assign emotion labels.
3. Save the processed dataset to a new file.
4. Visualize the emotion distribution, pixel intensity distribution, and sample images.

In [None]:
# Main Execution
if __name__ == "__main__":
    # Define path to the raw dataset
    dataset_path = r"data/raw/fer2013.csv"
    
    # Load the dataset
    raw_df = load_dataset(dataset_path)
    
    # Perform preprocessing
    processed_df = preprocess_dataset(raw_df)
    
    # Save processed data to a new file
    processed_df.to_csv(r"data/processed/processed_fer2013.csv", index=False)
    
    # Plot and visualize the emotion distribution
    plot_emotion_distribution(processed_df)
    
    # Plot and visualize the pixel intensity distribution
    plot_pixel_intensity_distribution(processed_df)
    
    # Plot sample images from the dataset
    plot_sample_images(processed_df)
    
    # Check Dataset Info (Optional)
    processed_df.info()