In [1]:
import os
import shutil  #perform high-level file operations, like copying files.
from sklearn.model_selection import train_test_split #split the data into training and test sets.
import pandas as pd

# Load the CSV file
csv_path = 'legend.csv'  # Update with the correct path if needed
data = pd.read_csv(csv_path)

# Specify the source directory where images are stored
source_dir = 'images/'  # Update this to the folder where the images are located

# Create destination directories for train and test datasets
base_dir = 'data_split'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Create subfolders for each emotion in train and test directories
emotions = data['emotion'].unique()
for emotion in emotions:
    os.makedirs(os.path.join(train_dir, emotion), exist_ok=True)
    os.makedirs(os.path.join(test_dir, emotion), exist_ok=True)

# Split data into train and test sets (80% train, 20% test)
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['emotion'], random_state=42)

# Function to move images to respective folders
def move_images(dataframe, destination_dir):
    for _, row in dataframe.iterrows():
        image_name = row['image']
        emotion = row['emotion']
        src_path = os.path.join(source_dir, image_name)
        dest_path = os.path.join(destination_dir, emotion, image_name)

        if os.path.exists(src_path):  # Check if the image exists
            shutil.copy(src_path, dest_path)
        else:
            print(f"Image {image_name} not found in {source_dir}.")

# Move images to train and test directories
move_images(train_data, train_dir)
move_images(test_data, test_dir)

print("Images have been organized into train and test folders.")


Images have been organized into train and test folders.


In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Directories
train_dir = 'data_split/train'
test_dir = 'data_split/test'

# Image size and batch settings
img_size = (224, 224)
batch_size = 32

# Data generators with preprocessing and augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,             # Normalize pixel values
    rotation_range=20,          # Data augmentation
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load images from folders
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'   # For multi-class classification
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 10947 images belonging to 8 classes.
Found 2738 images belonging to 8 classes.
