In [1]:
import cv2
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from facenet_pytorch import MTCNN
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import face_alignment
#import dlib
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.features = models.vgg16(pretrained=True)
        # Modify the first layer to accept 1 channel input (for grayscale spectrograms)
        self.features.features[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # Modify the final layer to output desired feature size
        self.features.classifier[6] = nn.Linear(self.features.classifier[6].in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)
        x = self.softmax(x)
        return x

In [3]:
def extract_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    mid_frame_index = frame_count // 2  # Index of the frame in the middle of the video
    cap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame_index)
    ret, frame = cap.read()
    if ret:
        cap.release()
        return frame
    else:
        cap.release()
        return None

In [4]:
def detect_face(frame):
    mtcnn = MTCNN()
    boxes, _ = mtcnn.detect(frame)
    if boxes is not None:
        # Assuming only one face in the frame
        box = boxes[0]
        x1, y1, x2, y2 = box
        # Crop the frame to the detected face
        cropped_frame = frame[int(y1):int(y2), int(x1):int(x2)]
        return cropped_frame
    else:
        return None

In [5]:
import cv2
import face_alignment
import requests
import os

# Function to download the pretrained face alignment model if it doesn't exist
def download_face_alignment_model(url, save_path):
    if not os.path.exists(save_path):
        print("Downloading pretrained face alignment model...")
        response = requests.get(url)
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print("Download complete.")

# Specify the URL of the pretrained face alignment model
face_alignment_model_url = "https://github.com/1adrianb/face-alignment-models/releases/download/2.0.1/2DFAN4-11f355bf06.pth.tar"
device = 'cpu' 
# Download the pretrained face alignment model if it doesn't exist
face_alignment_model_path = os.path.abspath("2DFAN4-11f355bf06.pth.tar")
download_face_alignment_model(face_alignment_model_url, face_alignment_model_path)

# Initialize face alignment model
fa = face_alignment.FaceAlignment(2,device=device, flip_input=False)  # 2 corresponds to 2D landmarks

def align_face(frame):
    # Perform face alignment
    aligned_faces = fa.get_landmarks(frame)
    if aligned_faces is not None:
        aligned_face = aligned_faces[0]  # Assuming only one face in the frame
        return aligned_face
    else:
        return None


In [6]:
def preprocess_image(frame):
    # Convert the frame to a PIL Image
    frame_pil = Image.fromarray(frame.astype('uint8'))

    # Convert the image to RGB by duplicating channels
    frame_pil = frame_pil.convert('RGB')

    # Resize and normalize the frame
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for RGB
    ])
    img_tensor = transform(frame_pil)
    return img_tensor

In [7]:
def load_dataset(input_folder):
    X = []
    y = []
    video_files = [file for file in os.listdir(input_folder) if file.endswith(".flv")]
    for video_file in tqdm(video_files):
        video_path = os.path.join(input_folder, video_file)
        frame = extract_frame(video_path)
        if frame is not None:
            cropped_face = detect_face(frame)
            if cropped_face is not None:
                preprocessed_face = preprocess_image(cropped_face)
                X.append(preprocessed_face)
                label = video_file.split("_")[2].split(".")[0]  # Adjusted to handle different file extensions
                if label == "HAP":
                    y.append(0)
                elif label == "SAD":
                    y.append(1)
                elif label == "ANG":
                    y.append(2)
                elif label == "DIS":
                    y.append(3)
                elif label == "FEA":
                    y.append(4)
                elif label == "NEU":
                    y.append(5)
            else:
                print(f"No face detected in {video_file}. Skipping.")
        else:
            print(f"Failed to extract frame from {video_file}. Skipping.")
    return X, y

In [8]:
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [9]:
def test_model(model, criterion, test_loader, device):
    model.eval()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
    epoch_loss = running_loss / len(test_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [10]:
if __name__ == "__main__":
    # Check if input arguments are provided
    # if len(sys.argv) != 2:
    #     print("Usage: python video_to_features_cnn.py input_folder")
    #     sys.exit(1)

    # input_folder = sys.argv[1]
    input_folder = 'videos_fullscale'
    # Check if input folder exists
    if not os.path.exists(input_folder):
        print("Input folder does not exist.")
        sys.exit(1)

    # Load dataset and split into train and test sets
    X = np.load('X.npy', mmap_mode='r')
    y = np.load('y.npy', mmap_mode='r')
    X_rgb = np.repeat(X, 3, axis=1)
    print(f"Total number of samples: {len(X_rgb)}")
    X_train, X_test, y_train, y_test = train_test_split(X_rgb, y, test_size=0.3, random_state=42)
    print(f"Number of train samples: {len(X_train)}", f"Number of test samples: {len(X_test)}")
    # Initialize the model
    model = CNN(num_classes=6)  # 3 classes for HAPPY, SAD, ANGRY
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define loss function and optimizer
    _lr = 0.00001
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=_lr)

    # Create data loaders
    _bs = 32
    train_loader = torch.utils.data.DataLoader(list(zip(X_train, y_train)), batch_size=_bs, shuffle=True)
    test_loader = torch.utils.data.DataLoader(list(zip(X_test, y_test)), batch_size=_bs)
    print(f"Batch size: {_bs}", f"lr: {_lr}")
    # Training loop
    num_epochs = 50
    for epoch in range(num_epochs):
        print("Epoch " + str(epoch))
        train_loss, train_accuracy = train_model(model, criterion, optimizer, train_loader, device)
        test_loss, test_accuracy = test_model(model, criterion, test_loader, device)
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Total number of samples: 7442
Number of train samples: 5209 Number of test samples: 2233




Batch size: 32 lr: 1e-05
Epoch 0


100%|██████████| 163/163 [00:41<00:00,  3.93it/s]
100%|██████████| 70/70 [00:05<00:00, 12.30it/s]


Epoch 1/50, Train Loss: 1.7697, Train Accuracy: 0.2231, Test Loss: 1.6882, Test Accuracy: 0.3439
Epoch 1


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 2/50, Train Loss: 1.6410, Train Accuracy: 0.3863, Test Loss: 1.6074, Test Accuracy: 0.4147
Epoch 2


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 3/50, Train Loss: 1.5745, Train Accuracy: 0.4594, Test Loss: 1.5454, Test Accuracy: 0.4890
Epoch 3


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.29it/s]


Epoch 4/50, Train Loss: 1.5204, Train Accuracy: 0.5176, Test Loss: 1.5189, Test Accuracy: 0.5132
Epoch 4


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 5/50, Train Loss: 1.4761, Train Accuracy: 0.5627, Test Loss: 1.4790, Test Accuracy: 0.5580
Epoch 5


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 6/50, Train Loss: 1.4522, Train Accuracy: 0.5890, Test Loss: 1.4597, Test Accuracy: 0.5728
Epoch 6


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 7/50, Train Loss: 1.4023, Train Accuracy: 0.6427, Test Loss: 1.4431, Test Accuracy: 0.5925
Epoch 7


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 8/50, Train Loss: 1.3763, Train Accuracy: 0.6694, Test Loss: 1.4393, Test Accuracy: 0.5996
Epoch 8


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 9/50, Train Loss: 1.3484, Train Accuracy: 0.6978, Test Loss: 1.4106, Test Accuracy: 0.6274
Epoch 9


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 10/50, Train Loss: 1.3230, Train Accuracy: 0.7237, Test Loss: 1.4086, Test Accuracy: 0.6265
Epoch 10


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 11/50, Train Loss: 1.2973, Train Accuracy: 0.7495, Test Loss: 1.3975, Test Accuracy: 0.6350
Epoch 11


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 12/50, Train Loss: 1.2849, Train Accuracy: 0.7608, Test Loss: 1.3914, Test Accuracy: 0.6467
Epoch 12


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 13/50, Train Loss: 1.2635, Train Accuracy: 0.7854, Test Loss: 1.3828, Test Accuracy: 0.6529
Epoch 13


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 14/50, Train Loss: 1.2506, Train Accuracy: 0.7982, Test Loss: 1.3675, Test Accuracy: 0.6753
Epoch 14


100%|██████████| 163/163 [00:41<00:00,  3.97it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 15/50, Train Loss: 1.2296, Train Accuracy: 0.8176, Test Loss: 1.3751, Test Accuracy: 0.6610
Epoch 15


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 16/50, Train Loss: 1.2179, Train Accuracy: 0.8309, Test Loss: 1.3705, Test Accuracy: 0.6659
Epoch 16


100%|██████████| 163/163 [00:41<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 17/50, Train Loss: 1.2057, Train Accuracy: 0.8418, Test Loss: 1.3595, Test Accuracy: 0.6731
Epoch 17


100%|██████████| 163/163 [00:41<00:00,  3.97it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 18/50, Train Loss: 1.1953, Train Accuracy: 0.8543, Test Loss: 1.3554, Test Accuracy: 0.6892
Epoch 18


100%|██████████| 163/163 [00:41<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 19/50, Train Loss: 1.1828, Train Accuracy: 0.8654, Test Loss: 1.3488, Test Accuracy: 0.6973
Epoch 19


100%|██████████| 163/163 [00:41<00:00,  3.97it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 20/50, Train Loss: 1.1768, Train Accuracy: 0.8706, Test Loss: 1.3586, Test Accuracy: 0.6785
Epoch 20


100%|██████████| 163/163 [00:41<00:00,  3.97it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 21/50, Train Loss: 1.1730, Train Accuracy: 0.8735, Test Loss: 1.3460, Test Accuracy: 0.6914
Epoch 21


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 22/50, Train Loss: 1.1567, Train Accuracy: 0.8906, Test Loss: 1.3344, Test Accuracy: 0.7089
Epoch 22


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 23/50, Train Loss: 1.1523, Train Accuracy: 0.8959, Test Loss: 1.3749, Test Accuracy: 0.6592
Epoch 23


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 24/50, Train Loss: 1.1464, Train Accuracy: 0.9025, Test Loss: 1.3332, Test Accuracy: 0.7067
Epoch 24


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 25/50, Train Loss: 1.1413, Train Accuracy: 0.9059, Test Loss: 1.3308, Test Accuracy: 0.7080
Epoch 25


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 26/50, Train Loss: 1.1335, Train Accuracy: 0.9136, Test Loss: 1.3497, Test Accuracy: 0.6879
Epoch 26


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.30it/s]


Epoch 27/50, Train Loss: 1.1328, Train Accuracy: 0.9142, Test Loss: 1.3456, Test Accuracy: 0.6937
Epoch 27


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 28/50, Train Loss: 1.1245, Train Accuracy: 0.9217, Test Loss: 1.3370, Test Accuracy: 0.7035
Epoch 28


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 29/50, Train Loss: 1.1216, Train Accuracy: 0.9240, Test Loss: 1.3356, Test Accuracy: 0.7013
Epoch 29


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 30/50, Train Loss: 1.1229, Train Accuracy: 0.9226, Test Loss: 1.3347, Test Accuracy: 0.7067
Epoch 30


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 31/50, Train Loss: 1.1238, Train Accuracy: 0.9238, Test Loss: 1.3372, Test Accuracy: 0.7044
Epoch 31


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 32/50, Train Loss: 1.1256, Train Accuracy: 0.9213, Test Loss: 1.3394, Test Accuracy: 0.6986
Epoch 32


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 33/50, Train Loss: 1.1140, Train Accuracy: 0.9326, Test Loss: 1.3412, Test Accuracy: 0.6986
Epoch 33


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 34/50, Train Loss: 1.1151, Train Accuracy: 0.9311, Test Loss: 1.3378, Test Accuracy: 0.7076
Epoch 34


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 35/50, Train Loss: 1.1114, Train Accuracy: 0.9351, Test Loss: 1.3280, Test Accuracy: 0.7129
Epoch 35


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 36/50, Train Loss: 1.1071, Train Accuracy: 0.9386, Test Loss: 1.3471, Test Accuracy: 0.6906
Epoch 36


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 37/50, Train Loss: 1.1069, Train Accuracy: 0.9388, Test Loss: 1.3307, Test Accuracy: 0.7098
Epoch 37


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 38/50, Train Loss: 1.1054, Train Accuracy: 0.9403, Test Loss: 1.3282, Test Accuracy: 0.7156
Epoch 38


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 39/50, Train Loss: 1.1082, Train Accuracy: 0.9374, Test Loss: 1.3437, Test Accuracy: 0.6968
Epoch 39


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.34it/s]


Epoch 40/50, Train Loss: 1.1096, Train Accuracy: 0.9355, Test Loss: 1.3417, Test Accuracy: 0.6986
Epoch 40


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 41/50, Train Loss: 1.1051, Train Accuracy: 0.9418, Test Loss: 1.3395, Test Accuracy: 0.7013
Epoch 41


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 42/50, Train Loss: 1.1031, Train Accuracy: 0.9418, Test Loss: 1.3313, Test Accuracy: 0.7080
Epoch 42


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 43/50, Train Loss: 1.0973, Train Accuracy: 0.9486, Test Loss: 1.3334, Test Accuracy: 0.7067
Epoch 43


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 44/50, Train Loss: 1.0927, Train Accuracy: 0.9528, Test Loss: 1.3250, Test Accuracy: 0.7120
Epoch 44


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 45/50, Train Loss: 1.0958, Train Accuracy: 0.9491, Test Loss: 1.3357, Test Accuracy: 0.7022
Epoch 45


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.33it/s]


Epoch 46/50, Train Loss: 1.0921, Train Accuracy: 0.9524, Test Loss: 1.3335, Test Accuracy: 0.7031
Epoch 46


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.32it/s]


Epoch 47/50, Train Loss: 1.0884, Train Accuracy: 0.9560, Test Loss: 1.3209, Test Accuracy: 0.7232
Epoch 47


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.31it/s]


Epoch 48/50, Train Loss: 1.0907, Train Accuracy: 0.9541, Test Loss: 1.3248, Test Accuracy: 0.7143
Epoch 48


100%|██████████| 163/163 [00:40<00:00,  3.98it/s]
100%|██████████| 70/70 [00:05<00:00, 12.30it/s]


Epoch 49/50, Train Loss: 1.0981, Train Accuracy: 0.9459, Test Loss: 1.3277, Test Accuracy: 0.7147
Epoch 49


100%|██████████| 163/163 [00:41<00:00,  3.97it/s]
100%|██████████| 70/70 [00:05<00:00, 12.30it/s]

Epoch 50/50, Train Loss: 1.0952, Train Accuracy: 0.9495, Test Loss: 1.3337, Test Accuracy: 0.7040





In [11]:
torch.save(model.state_dict(), 'vgg16_video_'+str(num_epochs)+'_'+str(_bs)+'_'+str(_lr))