In [1]:
import cv2
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from facenet_pytorch import MTCNN
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import face_alignment
#import dlib
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torchvision import models

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.features = models.vgg16(pretrained=True)
        # Modify the first layer to accept 2 channel input
        self.features.features[0] = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Modify the final layer to output desired feature size
        self.features.classifier[6] = nn.Linear(self.features.classifier[6].in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)
        x = self.softmax(x)
        return x

In [3]:
def extract_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    mid_frame_index = frame_count // 2  # Index of the frame in the middle of the video
    cap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame_index)
    ret, frame = cap.read()
    if ret:
        cap.release()
        return frame
    else:
        cap.release()
        return None

In [4]:
def detect_face(frame):
    mtcnn = MTCNN()
    boxes, _ = mtcnn.detect(frame)
    if boxes is not None:
        # Assuming only one face in the frame
        box = boxes[0]
        x1, y1, x2, y2 = box
        # Crop the frame to the detected face
        cropped_frame = frame[int(y1):int(y2), int(x1):int(x2)]
        return cropped_frame
    else:
        return None

In [5]:
# Function to download the pretrained face alignment model if it doesn't exist
def download_face_alignment_model(url, save_path):
    if not os.path.exists(save_path):
        print("Downloading pretrained face alignment model...")
        response = requests.get(url)
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print("Download complete.")
device='cpu'
# Specify the URL of the pretrained face alignment model
face_alignment_model_url = "https://github.com/1adrianb/face-alignment-models/releases/download/2.0.1/2DFAN4-11f355bf06.pth.tar"

# Download the pretrained face alignment model if it doesn't exist
face_alignment_model_path = os.path.abspath("2DFAN4-11f355bf06.pth.tar")
download_face_alignment_model(face_alignment_model_url, face_alignment_model_path)

# Initialize face alignment model
fa = face_alignment.FaceAlignment(2, device=device,flip_input=False)  # 2 corresponds to 2D landmarks

def align_face(frame):
    # Perform face alignment
    aligned_faces = fa.get_landmarks(frame)
    if aligned_faces is not None:
        aligned_face = aligned_faces[0]  # Assuming only one face in the frame
        return aligned_face
    else:
        return None


In [6]:
def preprocess_image(frame):
    # Convert the frame to a PIL Image
    frame_pil = Image.fromarray(frame.astype('uint8'))

    # Convert the image to grayscale
    frame_pil = frame_pil.convert('L')

    # Resize and normalize the frame
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485], std=[0.229]),  # For grayscale images, only 1 channel
    ])
    img_tensor = transform(frame_pil)
    return img_tensor

In [7]:
def preprocess_spectrogram(image_path):
    img = Image.open(image_path).convert('L')  # Convert to grayscale
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to match ResNet input size
        transforms.ToTensor(),           # Convert to tensor
    ])
    img_tensor = transform(img)
    # img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension
    return img_tensor

In [8]:
def load_spectrogram_dataset(input_folder):
    X = []
    y = []
    # List all files in the input folder
    files = os.listdir(input_folder)
    # Iterate over files in the folder
    for filename in files:
        if filename.endswith(".png"):  # Assuming mel spectrograms are stored as PNG files
            input_path = os.path.join(input_folder, filename)
            img_tensor = preprocess_spectrogram(input_path)
            X.append(img_tensor)
            # Extract label from filename (assuming filename is in format "abc_IEO_label_xyz.png")
            label = filename.split("_")[2]
            if label == "HAP":
                y.append(0)
            elif label == "SAD":
                y.append(1)
            elif label == "ANG":
                y.append(2)
            elif label == "DIS":
                y.append(3)
            elif label == "FEA":
                y.append(4)
            elif label == "NEU":
                y.append(5)
    return X, y

In [9]:
def load_dataset(input_folder):
    X = []
    y = []
    video_files = [file for file in os.listdir(input_folder) if file.endswith(".flv")]
    for video_file in tqdm(video_files):
        video_path = os.path.join(input_folder, video_file)
        frame = extract_frame(video_path)
        if frame is not None:
            cropped_face = detect_face(frame)
            if cropped_face is not None:
                preprocessed_face = preprocess_image(cropped_face)
                X.append(preprocessed_face)
                label = video_file.split("_")[2].split(".")[0]  # Adjusted to handle different file extensions
                if label == "HAP":
                    y.append(0)
                elif label == "SAD":
                    y.append(1)
                elif label == "ANG":
                    y.append(2)
                elif label == "DIS":
                    y.append(3)
                elif label == "FEA":
                    y.append(4)
                elif label == "NEU":
                    y.append(5)
            else:
                print(f"No face detected in {video_file}. Skipping.")
        else:
            print(f"Failed to extract frame from {video_file}. Skipping.")
    return X, y

In [10]:
# Define the ConcatDataset class to concatenate video frame and spectrogram tensors
class ConcatDataset(torch.utils.data.Dataset):
    def __init__(self, X1, X2, y, modality='multimodal', fullscale=False):
        self.X1 = X1
        self.X2 = X2
        self.y = y
        self.modality = modality
        self.fullscale = fullscale
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        if not self.fullscale:
            img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
            img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
            label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor
        else:
            img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
            img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
            label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor

        concatenated_img = torch.cat((img1, img2), dim=0)  # Concatenate along 0 dimension
        if self.modality == 'visual':
            return img1, label
        if self.modality == 'audio':
            return img2, label
        return concatenated_img, label # concatenate modalities

In [11]:
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [12]:
def test_model(model, criterion, test_loader, device):
    model.eval()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
    epoch_loss = running_loss / len(test_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [13]:
_fullscale = True # Run fullscale experiment?

# Define input_folder and input_folder_spec
if _fullscale:
  input_folder = 'videos_fullscale'
  input_folder_spec = 'melspec_fullscale'
else:
  input_folder = '/content/drive/MyDrive/csci535_aashi/videos'
  input_folder_spec = '/content/drive/MyDrive/csci535_aashi/melspec'

# Check if input folder exists
if not os.path.exists(input_folder):
    print("Input folder does not exist.")
    sys.exit(1)
# Check if input folder exists
if not os.path.exists(input_folder_spec):
    print("Input folder does not exist.")
    sys.exit(1)

# Load dataset and split into train and test sets

if not _fullscale:
  X, y = load_dataset(input_folder)
  X_spec, y_spec = load_spectrogram_dataset(input_folder_spec)

else:
  # Load numpy arrays with memory-mapping
  X = np.load('X.npy', mmap_mode='r')
  y = np.load('y.npy', mmap_mode='r')
  X_spec = np.load('X_spec.npy', mmap_mode='r')
  y_spec = np.load('y_spec.npy', mmap_mode='r')

# Split the data into train and test sets
print(f"Total number of samples: {len(X)}")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Number of train samples (video): {len(X_train)}", f"Number of test samples: {len(X_test)}")
X_train_spec, X_test_spec, y_train_spec, y_test_spec = train_test_split(X_spec, y_spec, test_size=0.3, random_state=42)
print(f"Number of train samples (audio): {len(X_train_spec)}", f"Number of test samples: {len(X_test_spec)}")

Total number of samples: 7442
Number of train samples (video): 5209 Number of test samples: 2233
Number of train samples (audio): 5231 Number of test samples: 2242


In [14]:
# Initialize the model
model = CNN(num_classes=6)  # 3 classes for HAPPY, SAD, ANGRY
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
_lr = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=_lr)

# Concatenate datasets
train_dataset = ConcatDataset(X_train, X_train_spec, y_train)
test_dataset = ConcatDataset(X_test, X_test_spec, y_test)

# Create data loaders
_bs = 32
# train_loader = torch.utils.data.DataLoader(list(zip(X_train, y_train)), batch_size=_bs, shuffle=True)
# test_loader = torch.utils.data.DataLoader(list(zip(X_test, y_test)), batch_size=_bs)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=_bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=_bs)


print(f"Batch size: {_bs}", f"lr: {_lr}")



Batch size: 32 lr: 0.0001


In [15]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    print("Epoch " + str(epoch))
    train_loss, train_accuracy = train_model(model, criterion, optimizer, train_loader, device)
    test_loss, test_accuracy = test_model(model, criterion, test_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 0


100%|██████████| 163/163 [00:19<00:00,  8.32it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 1/50, Train Loss: 1.7913, Train Accuracy: 0.1818, Test Loss: 1.7916, Test Accuracy: 0.1711
Epoch 1


100%|██████████| 163/163 [00:15<00:00, 10.60it/s]
100%|██████████| 70/70 [00:02<00:00, 33.16it/s]


Epoch 2/50, Train Loss: 1.7835, Train Accuracy: 0.1814, Test Loss: 1.7635, Test Accuracy: 0.2262
Epoch 2


100%|██████████| 163/163 [00:15<00:00, 10.62it/s]
100%|██████████| 70/70 [00:02<00:00, 33.30it/s]


Epoch 3/50, Train Loss: 1.7613, Train Accuracy: 0.2283, Test Loss: 1.7176, Test Accuracy: 0.2911
Epoch 3


100%|██████████| 163/163 [00:15<00:00, 10.62it/s]
100%|██████████| 70/70 [00:02<00:00, 33.27it/s]


Epoch 4/50, Train Loss: 1.7516, Train Accuracy: 0.2448, Test Loss: 1.6470, Test Accuracy: 0.3609
Epoch 4


100%|██████████| 163/163 [00:15<00:00, 10.61it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 5/50, Train Loss: 1.6836, Train Accuracy: 0.3194, Test Loss: 1.7052, Test Accuracy: 0.2897
Epoch 5


100%|██████████| 163/163 [00:15<00:00, 10.61it/s]
100%|██████████| 70/70 [00:02<00:00, 33.25it/s]


Epoch 6/50, Train Loss: 1.6307, Train Accuracy: 0.3864, Test Loss: 1.6092, Test Accuracy: 0.4102
Epoch 6


100%|██████████| 163/163 [00:15<00:00, 10.58it/s]
100%|██████████| 70/70 [00:02<00:00, 33.17it/s]


Epoch 7/50, Train Loss: 1.6184, Train Accuracy: 0.4053, Test Loss: 1.5734, Test Accuracy: 0.4505
Epoch 7


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.21it/s]


Epoch 8/50, Train Loss: 1.5946, Train Accuracy: 0.4289, Test Loss: 1.5494, Test Accuracy: 0.4734
Epoch 8


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.32it/s]


Epoch 9/50, Train Loss: 1.5779, Train Accuracy: 0.4415, Test Loss: 1.5941, Test Accuracy: 0.4281
Epoch 9


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.30it/s]


Epoch 10/50, Train Loss: 1.5399, Train Accuracy: 0.4845, Test Loss: 1.5367, Test Accuracy: 0.4948
Epoch 10


100%|██████████| 163/163 [00:15<00:00, 10.53it/s]
100%|██████████| 70/70 [00:02<00:00, 33.26it/s]


Epoch 11/50, Train Loss: 1.5211, Train Accuracy: 0.5085, Test Loss: 1.5201, Test Accuracy: 0.5159
Epoch 11


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.26it/s]


Epoch 12/50, Train Loss: 1.5018, Train Accuracy: 0.5329, Test Loss: 1.4907, Test Accuracy: 0.5468
Epoch 12


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.27it/s]


Epoch 13/50, Train Loss: 1.4827, Train Accuracy: 0.5546, Test Loss: 1.4625, Test Accuracy: 0.5764
Epoch 13


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.26it/s]


Epoch 14/50, Train Loss: 1.4544, Train Accuracy: 0.5805, Test Loss: 1.4902, Test Accuracy: 0.5455
Epoch 14


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.31it/s]


Epoch 15/50, Train Loss: 1.4469, Train Accuracy: 0.5903, Test Loss: 1.4904, Test Accuracy: 0.5464
Epoch 15


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.32it/s]


Epoch 16/50, Train Loss: 1.4352, Train Accuracy: 0.6005, Test Loss: 1.4309, Test Accuracy: 0.6055
Epoch 16


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.31it/s]


Epoch 17/50, Train Loss: 1.4391, Train Accuracy: 0.5969, Test Loss: 1.4672, Test Accuracy: 0.5719
Epoch 17


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.23it/s]


Epoch 18/50, Train Loss: 1.4476, Train Accuracy: 0.5949, Test Loss: 1.4470, Test Accuracy: 0.5867
Epoch 18


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.28it/s]


Epoch 19/50, Train Loss: 1.4244, Train Accuracy: 0.6139, Test Loss: 1.4407, Test Accuracy: 0.6001
Epoch 19


100%|██████████| 163/163 [00:15<00:00, 10.53it/s]
100%|██████████| 70/70 [00:02<00:00, 33.25it/s]


Epoch 20/50, Train Loss: 1.4145, Train Accuracy: 0.6268, Test Loss: 1.4236, Test Accuracy: 0.6158
Epoch 20


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.25it/s]


Epoch 21/50, Train Loss: 1.3694, Train Accuracy: 0.6711, Test Loss: 1.5017, Test Accuracy: 0.5378
Epoch 21


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 22/50, Train Loss: 1.4114, Train Accuracy: 0.6281, Test Loss: 1.4676, Test Accuracy: 0.5705
Epoch 22


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.26it/s]


Epoch 23/50, Train Loss: 1.3796, Train Accuracy: 0.6600, Test Loss: 1.4321, Test Accuracy: 0.6010
Epoch 23


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.17it/s]


Epoch 24/50, Train Loss: 1.3568, Train Accuracy: 0.6823, Test Loss: 1.4571, Test Accuracy: 0.5808
Epoch 24


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.28it/s]


Epoch 25/50, Train Loss: 1.3565, Train Accuracy: 0.6819, Test Loss: 1.4052, Test Accuracy: 0.6364
Epoch 25


100%|██████████| 163/163 [00:15<00:00, 10.54it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 26/50, Train Loss: 1.3434, Train Accuracy: 0.6982, Test Loss: 1.4498, Test Accuracy: 0.5911
Epoch 26


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.33it/s]


Epoch 27/50, Train Loss: 1.3379, Train Accuracy: 0.7036, Test Loss: 1.4157, Test Accuracy: 0.6238
Epoch 27


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.26it/s]


Epoch 28/50, Train Loss: 1.3739, Train Accuracy: 0.6660, Test Loss: 1.4399, Test Accuracy: 0.6023
Epoch 28


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 29/50, Train Loss: 1.3853, Train Accuracy: 0.6560, Test Loss: 1.4623, Test Accuracy: 0.5768
Epoch 29


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.25it/s]


Epoch 30/50, Train Loss: 1.3799, Train Accuracy: 0.6615, Test Loss: 1.4378, Test Accuracy: 0.6014
Epoch 30


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 31/50, Train Loss: 1.3485, Train Accuracy: 0.6934, Test Loss: 1.4434, Test Accuracy: 0.5974
Epoch 31


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.31it/s]


Epoch 32/50, Train Loss: 1.3486, Train Accuracy: 0.6936, Test Loss: 1.4242, Test Accuracy: 0.6135
Epoch 32


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.30it/s]


Epoch 33/50, Train Loss: 1.3410, Train Accuracy: 0.7001, Test Loss: 1.4216, Test Accuracy: 0.6198
Epoch 33


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.31it/s]


Epoch 34/50, Train Loss: 1.3319, Train Accuracy: 0.7088, Test Loss: 1.4147, Test Accuracy: 0.6274
Epoch 34


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.28it/s]


Epoch 35/50, Train Loss: 1.3086, Train Accuracy: 0.7333, Test Loss: 1.4134, Test Accuracy: 0.6288
Epoch 35


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.30it/s]


Epoch 36/50, Train Loss: 1.3309, Train Accuracy: 0.7120, Test Loss: 1.4086, Test Accuracy: 0.6323
Epoch 36


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.33it/s]


Epoch 37/50, Train Loss: 1.3807, Train Accuracy: 0.6602, Test Loss: 1.4418, Test Accuracy: 0.6001
Epoch 37


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.35it/s]


Epoch 38/50, Train Loss: 1.3701, Train Accuracy: 0.6713, Test Loss: 1.4257, Test Accuracy: 0.6158
Epoch 38


100%|██████████| 163/163 [00:15<00:00, 10.55it/s]
100%|██████████| 70/70 [00:02<00:00, 33.24it/s]


Epoch 39/50, Train Loss: 1.3930, Train Accuracy: 0.6483, Test Loss: 1.4333, Test Accuracy: 0.6068
Epoch 39


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.34it/s]


Epoch 40/50, Train Loss: 1.3926, Train Accuracy: 0.6491, Test Loss: 1.4796, Test Accuracy: 0.5607
Epoch 40


100%|██████████| 163/163 [00:15<00:00, 10.58it/s]
100%|██████████| 70/70 [00:02<00:00, 33.29it/s]


Epoch 41/50, Train Loss: 1.3861, Train Accuracy: 0.6552, Test Loss: 1.4185, Test Accuracy: 0.6216
Epoch 41


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.22it/s]


Epoch 42/50, Train Loss: 1.3987, Train Accuracy: 0.6448, Test Loss: 1.4924, Test Accuracy: 0.5499
Epoch 42


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.09it/s]


Epoch 43/50, Train Loss: 1.3842, Train Accuracy: 0.6581, Test Loss: 1.4911, Test Accuracy: 0.5513
Epoch 43


100%|██████████| 163/163 [00:15<00:00, 10.56it/s]
100%|██████████| 70/70 [00:02<00:00, 33.33it/s]


Epoch 44/50, Train Loss: 1.3868, Train Accuracy: 0.6543, Test Loss: 1.4741, Test Accuracy: 0.5683
Epoch 44


100%|██████████| 163/163 [00:15<00:00, 10.58it/s]
100%|██████████| 70/70 [00:02<00:00, 33.20it/s]


Epoch 45/50, Train Loss: 1.4273, Train Accuracy: 0.6147, Test Loss: 1.4535, Test Accuracy: 0.5889
Epoch 45


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.33it/s]


Epoch 46/50, Train Loss: 1.4293, Train Accuracy: 0.6134, Test Loss: 1.4859, Test Accuracy: 0.5553
Epoch 46


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.27it/s]


Epoch 47/50, Train Loss: 1.4442, Train Accuracy: 0.5982, Test Loss: 1.5043, Test Accuracy: 0.5374
Epoch 47


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.31it/s]


Epoch 48/50, Train Loss: 1.4698, Train Accuracy: 0.5736, Test Loss: 1.5748, Test Accuracy: 0.4680
Epoch 48


100%|██████████| 163/163 [00:15<00:00, 10.58it/s]
100%|██████████| 70/70 [00:02<00:00, 33.30it/s]


Epoch 49/50, Train Loss: 1.5134, Train Accuracy: 0.5287, Test Loss: 1.6223, Test Accuracy: 0.4201
Epoch 49


100%|██████████| 163/163 [00:15<00:00, 10.57it/s]
100%|██████████| 70/70 [00:02<00:00, 33.32it/s]

Epoch 50/50, Train Loss: 1.6099, Train Accuracy: 0.4329, Test Loss: 1.5713, Test Accuracy: 0.4716





In [16]:
torch.save(model.state_dict(), 'vgg16_audio_video_'+str(num_epochs)+'_'+str(_bs)+'_'+str(_lr))