# Imports and Config

In [67]:
# imports

import os
import shutil
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping

In [68]:
# config
IMG_SIZE = (224,224)
BATCH_SIZE = 64 
EPOCHS = 10 # testing purposes
NUM_CLASSES = 4 # remapped to 4 instead of 7 classes
CLASS_LABELS = ["Negative", "Positive"]  

# dataset directory
train_dir = "./data/train"
val_dir = "./data/val"
test_dir = "./data/test"


In [59]:
# After training
model_path = "suicide_mood_model.keras"

# Delete existing model if it exists
if os.path.exists(model_path):
    os.remove(model_path)
    print(f"Removed existing model file: {model_path}")

Removed existing model file: suicide_mood_model.keras


# Data Preprocessing

In [70]:
# create validation split
os.makedirs(val_dir, exist_ok=True)
split_ratio = 0.2

for class_name in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_name)
    images = os.listdir(class_path)
    val_count = int(len(images) * split_ratio)
    val_images = images[:val_count]
    train_images = images[val_count:]

    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

    for img in val_images:
        src = os.path.join(class_path, img)
        dst = os.path.join(val_dir, class_name)
        os.makedirs(dst, exist_ok=True)
        shutil.copy(src, dst)

In [71]:
# label remapping for binary classification
def remap_labels(ori_label):
    mapping = {
        "fearful": "Negative",  
        "sad": "Negative",      
        "happy": "Positive"     
    }
    return mapping.get(ori_label, "Negative")  

In [72]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

def relabel_generator(generator):
    while True:
        batch = next(generator)
        if len(batch) != 2:  # Check if batch is properly structured
            raise ValueError(f"Expected batch to be (x,y) but got: {batch}")
            
        x, y = batch
        y_idx = np.argmax(y, axis=1)
        new_labels = []
        
        for idx in y_idx:
            try:
                ori_label = list(generator.class_indices.keys())[idx]
                new_label = remap_labels(ori_label)
                new_labels.append(CLASS_LABELS.index(new_label))
            except Exception as e:
                print(f"Error processing label idx {idx}: {e}")
                # Default to neutral class if there's an error
                new_labels.append(CLASS_LABELS.index("Neutral"))
                
        y_new = tf.keras.utils.to_categorical(new_labels, num_classes=NUM_CLASSES)
        yield x, y_new

# Recreate generators
train_relabeled = relabel_generator(train_generator)
val_relabeled = relabel_generator(val_generator)



Found 16142 images belonging to 3 classes.
Found 3228 images belonging to 3 classes.
Found 4045 images belonging to 3 classes.


In [73]:
# Save model configuration for reference
model_config = {
    "input_shape": IMG_SIZE + (3,),
    "num_classes": NUM_CLASSES,
    "class_labels": CLASS_LABELS
}

# Save to a text file
with open("model_config.txt", "w") as f:
    for key, value in model_config.items():
        f.write(f"{key}: {value}\n")
    
print("Model configuration saved to model_config.txt")

Model configuration saved to model_config.txt


# Model init + training


In [74]:
# Update the output layer for binary classification
base_model = MobileNetV2(input_shape=IMG_SIZE + (3,), include_top=False, weights='imagenet', alpha=0.35)
base_model.trainable = False

inputs = Input(shape=IMG_SIZE + (3,))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)  

model = Model(inputs, outputs)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

## helper functions

In [65]:
from PIL import Image
import glob

def find_corrupted_images(directory):
    corrupted = []
    
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                try:
                    with Image.open(img_path) as img:
                        # Try to load the image and verify it
                        img.verify()
                        # If you want to make sure it can be loaded as an array too:
                        img = Image.open(img_path).convert('RGB')
                except Exception as e:
                    corrupted.append((img_path, str(e)))
    
    return corrupted

# Check all directories
print("Checking train directory...")
train_corrupted = find_corrupted_images(train_dir)
print("Checking validation directory...")
val_corrupted = find_corrupted_images(val_dir)
print("Checking test directory...")
test_corrupted = find_corrupted_images(test_dir)

# Print results
if train_corrupted or val_corrupted or test_corrupted:
    print(f"Found {len(train_corrupted) + len(val_corrupted) + len(test_corrupted)} corrupted images:")
    for path, err in train_corrupted + val_corrupted + test_corrupted:
        print(f"- {path}: {err}")
else:
    print("No corrupted images found. Issue might be with file access or permissions.")

Checking train directory...
Checking validation directory...
Checking test directory...
No corrupted images found. Issue might be with file access or permissions.


In [35]:
def remove_corrupted_files(file_list):
    for file_path, _ in file_list:
        try:
            os.remove(file_path)
            print(f"Removed: {file_path}")
        except Exception as e:
            print(f"Failed to remove {file_path}: {e}")
            
remove_corrupted_files(train_corrupted + val_corrupted + test_corrupted)

NameError: name 'train_corrupted' is not defined

In [32]:
# Add this code to check image counts
def count_images_in_directory(directory):
    total = 0
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):
            files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            total += len(files)
            print(f"Class {class_name}: {len(files)} images")
    print(f"Total images in {directory}: {total}")
    return total

print("\nCounting images in directories:")
train_count = count_images_in_directory(train_dir)
val_count = count_images_in_directory(val_dir)
test_count = count_images_in_directory(test_dir)


Counting images in directories:
Class angry: 3995 images
Class disgusted: 436 images
Class fearful: 4097 images
Class happy: 7215 images
Class neutral: 4965 images
Class sad: 4830 images
Class surprised: 3171 images
Total images in ./data/train: 28709
Class angry: 799 images
Class disgusted: 87 images
Class fearful: 819 images
Class happy: 1443 images
Class neutral: 993 images
Class sad: 965 images
Class surprised: 634 images
Total images in ./data/val: 5740
Class angry: 958 images
Class disgusted: 111 images
Class fearful: 1024 images
Class happy: 1774 images
Class neutral: 1233 images
Class sad: 1247 images
Class surprised: 831 images
Total images in ./data/test: 7178


## model training

In [75]:
# Training Model
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=0.00001,
    verbose=1
)
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

callbacks = [early_stopping, lr_scheduler]


history = model.fit(
    train_relabeled,
    steps_per_epoch=len(train_generator),
    validation_data=val_relabeled,
    validation_steps=len(val_generator),
    epochs=EPOCHS,
    callbacks=callbacks
)

# Delete existing model if it exists
if os.path.exists("suicide_mood_model.keras"):
    os.remove("suicide_mood_model.keras")
    print(f"Removed existing model file: suicide_mood_model.keras")

# Save model
model.save("suicide_mood_model.keras")
print("Model saved successfully.")

Epoch 1/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m472s[0m 2s/step - accuracy: 0.6147 - loss: 0.6981 - val_accuracy: 0.7175 - val_loss: 0.5767 - learning_rate: 0.0010
Epoch 2/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.6941 - loss: 0.5860 - val_accuracy: 0.7714 - val_loss: 0.5179 - learning_rate: 0.0010
Epoch 3/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 1s/step - accuracy: 0.7236 - loss: 0.5556 - val_accuracy: 0.7785 - val_loss: 0.4846 - learning_rate: 0.0010
Epoch 4/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 1s/step - accuracy: 0.7343 - loss: 0.5321 - val_accuracy: 0.7878 - val_loss: 0.4620 - learning_rate: 0.0010
Epoch 5/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m294s[0m 1s/step - accuracy: 0.7345 - loss: 0.5301 - val_accuracy: 0.7890 - val_loss: 0.4772 - learning_rate: 0.0010
Epoch 6/10
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m