# inference.py

Self-Contained Inference Script for Video Sentiment Classification
-------------------------------------------------------------------
This script loads a pretrained CNN+LSTM model checkpoint (cnn_lstm_best_val.pth)
from the Model/ folder, scans all videos in TestSet/Happy and TestSet/Angry,
samples 10 frames from the last 5 seconds of each clip, runs inference,
and writes:
  1) A multi-page PDF (test_results_with_title.pdf) with a title page and one page per clip showing sampled frames plus True|Pred labels.
  2) Individual PNG snapshots for each clip under test_results_png/.

Usage:
  1) Place this file alongside the following folder structure:
       Video-Analysis/
       ├── Model/
       │   └── cnn_lstm_best_val.pth
       ├── TestSet/
       │   ├── Happy/    (contains .mp4/.avi happy video clips)
       │   └── Angry/    (contains .mp4/.avi angry video clips)
       ├── inference.py  (this script)
  2) In your terminal or Jupyter environment, `cd` into Video-Analysis/
  3) Run with Python 3 (make sure you have all required libraries installed):
       python inference.py
  4) Outputs:
       - test_results_with_title.pdf
       - test_results_png/clip_*.png


In [2]:
import os
import sys
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import cv2
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# -----------------------------------------------------------------------------
# 1. Verify working directory and required folders
# -----------------------------------------------------------------------------
cwd = os.getcwd()
print(f"Current working directory: {cwd}")

# Ensure Model/ checkpoint exists
MODEL_DIR = os.path.join(cwd, "Model")
CHECKPOINT_NAME = "cnn_lstm_best_val.pth"
best_model_path = os.path.join(MODEL_DIR, CHECKPOINT_NAME)
if not os.path.isfile(best_model_path):
    print(f"ERROR: Model checkpoint not found at: {best_model_path}")
    sys.exit(1)

# Ensure TestSet/ folders exist
TESTSET_DIR = os.path.join(cwd, "TestSet")
HAPPY_DIR = os.path.join(TESTSET_DIR, "Happy")
ANGRY_DIR = os.path.join(TESTSET_DIR, "Angry")
if not os.path.isdir(HAPPY_DIR) or not os.path.isdir(ANGRY_DIR):
    print(f"ERROR: Expecting subfolders 'Happy' and 'Angry' under {TESTSET_DIR}")
    sys.exit(1)

# -----------------------------------------------------------------------------
# 2. Define the CNN+LSTM model class (must match training-time definition)
# -----------------------------------------------------------------------------
class CNN_LSTM_Classifier(nn.Module):
    def __init__(self, n_classes=2, hidden_dim=256, n_layers=1, pretrained=False):
        super(CNN_LSTM_Classifier, self).__init__()
        # Load MobileNetV2 backbone (only feature extractor)
        backbone = models.mobilenet_v2(pretrained=pretrained)
        self.feature_extractor = backbone.features       # outputs (batch*T, 1280, 7, 7)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))       # collapse (1280,7,7) -> (1280,1,1)
        self.feat_dim = backbone.last_channel             # 1280 for MobileNetV2

        # Freeze backbone
        for param in self.feature_extractor.parameters():
            param.requires_grad = False

        # LSTM over per-frame features
        self.lstm = nn.LSTM(
            input_size=self.feat_dim,
            hidden_size=hidden_dim,
            num_layers=n_layers,
            batch_first=True
        )

        # Final FC layer: hidden_dim -> n_classes
        self.fc = nn.Linear(hidden_dim, n_classes)

    def forward(self, x):
        """
        x: tensor of shape (batch_size, n_frames, 3, 224, 224)
        returns: logits of shape (batch_size, n_classes)
        """
        B, T, C, H, W = x.size()                           # e.g. (1, 10, 3, 224, 224)
        x = x.view(B * T, C, H, W)                         # -> (B*T, 3, 224, 224)
        feat = self.feature_extractor(x)                   # -> (B*T, 1280, 7, 7)
        feat = self.avgpool(feat).view(B * T, -1)          # -> (B*T, 1280)
        feat = feat.view(B, T, self.feat_dim)              # -> (B, T, 1280)
        lstm_out, (h_n, c_n) = self.lstm(feat)              # -> (B, T, hidden_dim)
        last_hidden = lstm_out[:, -1, :]                    # -> (B, hidden_dim)
        logits = self.fc(last_hidden)                       # -> (B, n_classes)
        return logits

# -----------------------------------------------------------------------------
# 3. Instantiate model, load checkpoint, set to eval
# -----------------------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_LSTM_Classifier(n_classes=2, hidden_dim=256, n_layers=1, pretrained=False)
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint)
model.to(device)
model.eval()
print("Loaded model and set to evaluation mode.")

# -----------------------------------------------------------------------------
# 4. Build a DataFrame of test clips
# -----------------------------------------------------------------------------
rows = []
for label_name, folder in [("happy", HAPPY_DIR), ("angry", ANGRY_DIR)]:
    for fname in sorted(os.listdir(folder)):
        if fname.lower().endswith((".mp4", ".avi")):
            full_path = os.path.join(folder, fname).replace("\\", "/")
            rows.append({"filepath": full_path, "label": label_name})

df_testser = pd.DataFrame(rows)
if df_testser.empty:
    print("ERROR: No video files found in TestSet/Happy or TestSet/Angry.")
    sys.exit(1)

print("Test DataFrame:")
print(df_testser.head(), "\nCounts per label:")
print(df_testser["label"].value_counts())

# -----------------------------------------------------------------------------
# 5. Define VideoDatasetInference class
# -----------------------------------------------------------------------------
class VideoDatasetInference(Dataset):
    """
    Samples exactly n_frames from the last sample_last_secs seconds of each video.
    Returns a tensor of shape (n_frames, 3, 224, 224) and the label index.
    """
    def __init__(self, df, n_frames=10, sample_last_secs=5, transform=None):
        self.df = df.reset_index(drop=True)
        self.n_frames = n_frames
        self.sample_last_secs = sample_last_secs
        self.transform = transform
        self.labels = sorted(self.df["label"].unique())            # ["angry", "happy"]
        self.label2idx = {lbl: idx for idx, lbl in enumerate(self.labels)}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filepath = row["filepath"]
        label_str = row["label"]
        label_idx = self.label2idx[label_str]

        cap = cv2.VideoCapture(filepath)
        if not cap.isOpened():
            # Return dummy zero tensor if video fails
            dummy = torch.zeros((self.n_frames, 3, 224, 224))
            return dummy, label_idx

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30.0
        window_len = int(fps * self.sample_last_secs)
        start_frame = max(0, total_frames - window_len)

        if total_frames > 0 and start_frame < total_frames:
            indices = np.linspace(start_frame, total_frames - 1, num=self.n_frames).astype(int)
        else:
            indices = np.zeros(self.n_frames, dtype=int)

        frames = []
        for fid in indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(fid))
            ret, frame = cap.read()
            if not ret:
                break
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(frame_rgb)
            if self.transform:
                img = self.transform(img)
            else:
                img = transforms.ToTensor()(img)
            frames.append(img)
        cap.release()

        if len(frames) == 0:
            dummy = torch.zeros((self.n_frames, 3, 224, 224))
            return dummy, label_idx

        # Pad with last valid frame if fewer than n_frames read
        while len(frames) < self.n_frames:
            frames.append(frames[-1].clone())

        clip_tensor = torch.stack(frames, dim=0)   # shape: (n_frames, 3, 224, 224)
        return clip_tensor, label_idx

# -----------------------------------------------------------------------------
# 6. Create DataLoader for inference
# -----------------------------------------------------------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    ),
])

inference_dataset = VideoDatasetInference(df_testser, n_frames=10, sample_last_secs=5, transform=transform)
inference_loader  = DataLoader(inference_dataset, batch_size=1, shuffle=False, num_workers=0)

# -----------------------------------------------------------------------------
# 7. Run inference, build PDF with title page, and save individual PNGs
# -----------------------------------------------------------------------------
pdf_path = os.path.join(cwd, "test_results_with_title.pdf")
png_folder = os.path.join(cwd, "test_results_png")
os.makedirs(png_folder, exist_ok=True)

with PdfPages(pdf_path) as pdf:
    # --- Title Page ---
    fig_title = plt.figure(figsize=(11.69, 8.27))  # A4 landscape
    fig_title.patch.set_facecolor('white')
    plt.axis('off')

    title_str = (
        "Proof‐of‐Concept:\n"
        "Video Sentiment Classification\n"
        
    )
    plt.text(0.5, 0.5, title_str,
             ha='center', va='center',
             fontsize=24, weight='bold')

    pdf.savefig(fig_title)
    plt.close(fig_title)
    # --- End Title Page ---

    # --- Inference Loop ---
    with torch.no_grad():
        for idx, (clip_tensor, label_idx) in enumerate(inference_loader):
            clip_tensor = clip_tensor.to(device)    # (1, 10, 3, 224, 224)
            true_label = inference_dataset.labels[label_idx.item()]

            # Forward pass
            logits = model(clip_tensor)             # (1, 2)
            pred_idx = logits.argmax(dim=1).item()
            pred_label = inference_dataset.labels[pred_idx]

            # Sampled frames (first 5 for display)
            frames = clip_tensor.cpu().squeeze(0)    # (10, 3, 224, 224)
            n_plot = min(5, frames.shape[0])

            fig = plt.figure(figsize=(12, 3))
            for j in range(n_plot):
                frame_j = frames[j].permute(1, 2, 0).numpy()
                # Un-normalize
                frame_j = (frame_j * np.array([0.229, 0.224, 0.225]) +
                           np.array([0.485, 0.456, 0.406]))
                frame_j = np.clip(frame_j, 0, 1)

                ax = plt.subplot(1, n_plot, j + 1)
                ax.imshow(frame_j)
                ax.axis("off")

            page_title = f"Clip {idx + 1}: True = {true_label.upper()}  |  Pred = {pred_label.upper()}"
            fig.suptitle(page_title, fontsize=14)
            plt.tight_layout(rect=[0, 0, 1, 0.92])

            # Save to PDF
            pdf.savefig(fig)

            # Save PNG
            png_name = f"clip_{idx+1}_true_{true_label}_pred_{pred_label}.png"
            fig.savefig(os.path.join(png_folder, png_name))
            plt.close(fig)

print(f"Saved multi‐page PDF with title page here:\n  {pdf_path}")
print(f"Saved individual PNGs in folder:\n  {png_folder}")


Current working directory: C:\Users\IAGhe\OneDrive\Documents\Learning\portfolio\Video
Loaded model and set to evaluation mode.
Test DataFrame:
                                            filepath  label
0  C:/Users/IAGhe/OneDrive/Documents/Learning/por...  happy
1  C:/Users/IAGhe/OneDrive/Documents/Learning/por...  happy
2  C:/Users/IAGhe/OneDrive/Documents/Learning/por...  happy
3  C:/Users/IAGhe/OneDrive/Documents/Learning/por...  happy
4  C:/Users/IAGhe/OneDrive/Documents/Learning/por...  happy 
Counts per label:
label
angry    7
happy    6
Name: count, dtype: int64




Saved multi‐page PDF with title page here:
  C:\Users\IAGhe\OneDrive\Documents\Learning\portfolio\Video\test_results_with_title.pdf
Saved individual PNGs in folder:
  C:\Users\IAGhe\OneDrive\Documents\Learning\portfolio\Video\test_results_png
