In [1]:
import os
gpu_ids = [4]
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids))
import random
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import VideoMAEFeatureExtractor, VideoMAEModel
from sklearn.metrics import f1_score, recall_score, accuracy_score
from tqdm import tqdm

# ---- SETTINGS ----

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

clip_dir = "/data/home/huixian/Documents/Homeworks/535_project/MOSEI/Clip/Clips_16frames"
mapping_csv = "/data/home/huixian/Documents/Homeworks/535_project/MOSEI/Clip/clip_sentiment_split.csv"

batch_size = 16
clip_len = 16
num_epochs = 20

# ---- DATASET ----
class VideoClipDataset(Dataset):
    def __init__(self, clip_dir, csv_path, feature_extractor):
        self.clip_dir = clip_dir
        self.df = pd.read_csv(csv_path)
        self.feature_extractor = feature_extractor
        self.samples = list(self.df.itertuples(index=False))  # FIXED


    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        row = self.samples[idx]
        clip_path = os.path.join(self.clip_dir, row.clip_filename)

        cap = cv2.VideoCapture(clip_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame[:, :, ::-1])  # BGR to RGB
        cap.release()

        if len(frames) < clip_len:
            frames += [frames[-1]] * (clip_len - len(frames))
        frames = frames[:clip_len]

        inputs = self.feature_extractor(images=frames, return_tensors="pt")["pixel_values"].squeeze(0)
        return inputs, torch.tensor(row.sentiment_score, dtype=torch.float32)

# ---- LOSS ----
class CenteredWeightedMSELoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, preds, targets):
        ideal = torch.zeros_like(targets)
        ideal[targets < -0.3] = -3.0
        ideal[targets > 0.3] = 3.0
        ideal[(-0.3 <= targets) & (targets <= 0.3)] = 0.0

        weights = torch.ones_like(targets)
        weights[targets < -0.3] = 1.3
        weights[targets > 0.3] = 1.3
        weights[(-0.3 <= targets) & (targets <= 0.3)] = 1.0

        mse = (preds - ideal) ** 2
        return (weights * mse).mean()

# ---- MODEL ----
class SentimentRegressor(nn.Module):
    def __init__(self, feature_dim):
        super().__init__()
        self.regressor = nn.Sequential(
            nn.Linear(feature_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        return self.regressor(x).squeeze(1)

# ---- TRAINING UTILS ----
def run_epoch(model, loader, optimizer, is_train=True):
    model.train() if is_train else model.eval()
    total_preds, total_labels = [], []
    total_loss = 0

    for clips, targets in tqdm(loader, leave=False):
        clips, targets = clips.to(device), targets.to(device)

        with torch.set_grad_enabled(is_train):
            features = video_mae(clips).last_hidden_state.mean(dim=1)
            preds = model(features)
            loss = loss_fn(preds, targets)

            if is_train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        total_loss += loss.item()
        total_preds.extend(preds.detach().cpu().numpy())
        total_labels.extend(targets.detach().cpu().numpy())

    return total_loss / len(loader), np.array(total_preds), np.array(total_labels)

def evaluate(preds, labels):
    def to_label(x):
        return "Negative" if x < -0.3 else "Positive" if x > 0.3 else "Neutral"
    preds_label = [to_label(p) for p in preds]
    labels_label = [to_label(l) for l in labels]

    macro_f1 = f1_score(labels_label, preds_label, average="macro")
    micro_f1 = f1_score(labels_label, preds_label, average="micro")
    recall = recall_score(labels_label, preds_label, average=None, labels=["Negative", "Neutral", "Positive"])
    acc = accuracy_score(labels_label, preds_label)
    return macro_f1, micro_f1, recall, acc

# ---- FEATURE EXTRACTOR ----
feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
video_mae = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base").to(device)
video_mae.eval()
for param in video_mae.parameters():
    param.requires_grad = False

# ---- LOAD DATASET AND SPLIT BASED ON CSV ----
full_dataset = VideoClipDataset(clip_dir, mapping_csv, feature_extractor)

# Load the CSV again to fetch split info per clip
df = pd.read_csv(mapping_csv)

# Extract clip-level split indices
train_indices = df[df['split'] == 'train'].index.tolist()
val_indices   = df[df['split'] == 'val'].index.tolist()
test_indices  = df[df['split'] == 'test'].index.tolist()

# Create datasets using clip-level splits
train_dataset = Subset(full_dataset, train_indices)
val_dataset   = Subset(full_dataset, val_indices)
test_dataset  = Subset(full_dataset, test_indices)


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# ---- MODEL SETUP ----
regressor = SentimentRegressor(feature_dim=768).to(device)
loss_fn = CenteredWeightedMSELoss()  # or use CenteredWeightedMSELoss()
optimizer = optim.Adam(regressor.parameters(), lr=2e-4)

# ---- TRAIN LOOP ----
best_macro_f1 = -np.inf
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch}")
    train_loss, _, _ = run_epoch(regressor, train_loader, optimizer, is_train=True)
    val_loss, val_preds, val_labels = run_epoch(regressor, val_loader, optimizer, is_train=False)

    macro_f1, micro_f1, recall, acc = evaluate(val_preds, val_labels)
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
    print(f"Macro-F1: {macro_f1:.4f} | Micro-F1: {micro_f1:.4f} | Acc: {acc:.4f} | Recall: {recall}")

    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        torch.save(regressor.state_dict(), "best_regressor_Customized_Loss_final.pth")
        print(f"✅ Best model saved at epoch {epoch} with Macro-F1={macro_f1:.4f}")

# ---- TEST EVALUATION ----
test_loss, test_preds, test_labels = run_epoch(regressor, test_loader, optimizer, is_train=False)
macro_f1, micro_f1, recall, acc = evaluate(test_preds, test_labels)
print("\n----- TEST RESULTS -----")
print(f"Macro-F1: {macro_f1:.4f} | Micro-F1: {micro_f1:.4f} | Acc: {acc:.4f} | Recall: {recall}")





Epoch 0


                                                 

Train Loss: 8.2192 | Val Loss: 8.0449
Macro-F1: 0.1570 | Micro-F1: 0.3080 | Acc: 0.3080 | Recall: [0. 1. 0.]
✅ Best model saved at epoch 0 with Macro-F1=0.1570

Epoch 1


                                                 

Train Loss: 8.1263 | Val Loss: 7.9876
Macro-F1: 0.2618 | Micro-F1: 0.3415 | Acc: 0.3415 | Recall: [0.17985612 0.86610879 0.03088803]
✅ Best model saved at epoch 1 with Macro-F1=0.2618

Epoch 2


                                                 

Train Loss: 8.0016 | Val Loss: 8.0122
Macro-F1: 0.2536 | Micro-F1: 0.3015 | Acc: 0.3015 | Recall: [0.04316547 0.65271967 0.25482625]

Epoch 3


                                                 

Train Loss: 7.9029 | Val Loss: 7.9657
Macro-F1: 0.3136 | Micro-F1: 0.3608 | Acc: 0.3608 | Recall: [0.48561151 0.55230126 0.05019305]
✅ Best model saved at epoch 3 with Macro-F1=0.3136

Epoch 4


                                                 

Train Loss: 7.7921 | Val Loss: 7.9426
Macro-F1: 0.3468 | Micro-F1: 0.3518 | Acc: 0.3518 | Recall: [0.26978417 0.53556485 0.27027027]
✅ Best model saved at epoch 4 with Macro-F1=0.3468

Epoch 5


                                                 

Train Loss: 7.6486 | Val Loss: 8.0491
Macro-F1: 0.3291 | Micro-F1: 0.3750 | Acc: 0.3750 | Recall: [0.64748201 0.36820084 0.08880309]

Epoch 6


                                                 

Train Loss: 7.5810 | Val Loss: 8.1350
Macro-F1: 0.3379 | Micro-F1: 0.3505 | Acc: 0.3505 | Recall: [0.17625899 0.38493724 0.50579151]

Epoch 7


                                                 

Train Loss: 7.4459 | Val Loss: 8.0161
Macro-F1: 0.3607 | Micro-F1: 0.3686 | Acc: 0.3686 | Recall: [0.48561151 0.35564854 0.25482625]
✅ Best model saved at epoch 7 with Macro-F1=0.3607

Epoch 8


                                                 

Train Loss: 7.3174 | Val Loss: 8.0823
Macro-F1: 0.3622 | Micro-F1: 0.3621 | Acc: 0.3621 | Recall: [0.38129496 0.34728033 0.35521236]
✅ Best model saved at epoch 8 with Macro-F1=0.3622

Epoch 9


                                                 

Train Loss: 7.2261 | Val Loss: 8.1720
Macro-F1: 0.3567 | Micro-F1: 0.3570 | Acc: 0.3570 | Recall: [0.32733813 0.33891213 0.40540541]

Epoch 10


                                                 

Train Loss: 7.1289 | Val Loss: 8.1605
Macro-F1: 0.3572 | Micro-F1: 0.3621 | Acc: 0.3621 | Recall: [0.45323741 0.29288703 0.32818533]

Epoch 11


                                                 

Train Loss: 7.0683 | Val Loss: 8.3930
Macro-F1: 0.3271 | Micro-F1: 0.3634 | Acc: 0.3634 | Recall: [0.65827338 0.22175732 0.17760618]

Epoch 12


                                                 

Train Loss: 6.9861 | Val Loss: 8.3131
Macro-F1: 0.3662 | Micro-F1: 0.3686 | Acc: 0.3686 | Recall: [0.35611511 0.30962343 0.43629344]
✅ Best model saved at epoch 12 with Macro-F1=0.3662

Epoch 13


                                                 

Train Loss: 6.8725 | Val Loss: 8.7071
Macro-F1: 0.3393 | Micro-F1: 0.3647 | Acc: 0.3647 | Recall: [0.20503597 0.23849372 0.65250965]

Epoch 14


                                                 

Train Loss: 6.8216 | Val Loss: 8.4818
Macro-F1: 0.3588 | Micro-F1: 0.3686 | Acc: 0.3686 | Recall: [0.32374101 0.24686192 0.52895753]

Epoch 15


                                                 

Train Loss: 6.7497 | Val Loss: 8.3761
Macro-F1: 0.3642 | Micro-F1: 0.3686 | Acc: 0.3686 | Recall: [0.4028777  0.27196653 0.42084942]

Epoch 16


                                                 

Train Loss: 6.6786 | Val Loss: 8.4171
Macro-F1: 0.3698 | Micro-F1: 0.3737 | Acc: 0.3737 | Recall: [0.38848921 0.28451883 0.44015444]
✅ Best model saved at epoch 16 with Macro-F1=0.3698

Epoch 17


                                                 

Train Loss: 6.5705 | Val Loss: 8.3892
Macro-F1: 0.3498 | Micro-F1: 0.3608 | Acc: 0.3608 | Recall: [0.5        0.23430962 0.32818533]

Epoch 18


                                                 

Train Loss: 6.5754 | Val Loss: 8.5695
Macro-F1: 0.3677 | Micro-F1: 0.3763 | Acc: 0.3763 | Recall: [0.33093525 0.26359833 0.52895753]

Epoch 19


                                                 

Train Loss: 6.4604 | Val Loss: 8.5989
Macro-F1: 0.3651 | Micro-F1: 0.3750 | Acc: 0.3750 | Recall: [0.3381295  0.24686192 0.53281853]


                                               


----- TEST RESULTS -----
Macro-F1: 0.3856 | Micro-F1: 0.4154 | Acc: 0.4154 | Recall: [0.36       0.17410714 0.66192171]


