# Few-Shot Training Workflow

## Frame Sampling & Embedding

### Input:
- 7-second clips (goal/non-goal).

### Action:
1. Uniformly sample **N frames**:
   - **16 frames** for ResNet-50.
   - **32 frames** for R(2+1)D.
2. Preprocess frames:
   - Resize and normalize.
3. Pass frames through a pretrained backbone:
   - **ResNet-50** → 2048-dimensional feature vectors.
   - **R(2+1)D-18** → 512-dimensional feature vectors.

### Output:
- Per-frame feature tensors.
- Mean-pooled into one **2048-D** or **512-D** clip embedding saved as `.npy`.

---

## Prototype Construction

### Input:
- All training clip embeddings per class.

### Action:
1. Compute class “prototype”:
   - Average all embeddings for each class.
   - L2-normalize the resulting vectors.

### Output:
- Two normalized vectors:
  - `proto_goal`
  - `proto_nongoal`

---

## Few-Shot Classification

### Input:
- Test clip embedding.
- Class prototypes (`proto_goal`, `proto_nongoal`).

### Action:
1. L2-normalize the test embedding.
2. Compute cosine similarity to each prototype.
3. Compare the difference in similarity to a threshold (Δ).

### Output:
- Predicted label: **"Goal"** or **"Non-Goal"**.

---

## Evaluation

### Input:
- All test predictions with true labels.

### Action:
1. Compute:
   - Confusion matrix.
   - Precision, recall, F₁ score.
   - Overall accuracy.
2. Sweep Δ to find the best threshold.

### Output:
- Numeric metrics.
- Per-clip similarity scores printed in the console.

---

## Few-Shot Approach

This is a **prototype-based few-shot method**:
- Each class is represented by a **mean feature vector**.
- New clips are classified by their **cosine similarity** to these few-

### Same frame level pre-Processing taken from (monday.ipynb)

In [None]:
import os
import cv2
import numpy as np

def process_video(input_path, output_path):
    """
    Process the video to:
    1. Convert to grayscale.
    2. Remove grass and audience areas while preserving the ball.
    """
    # Load video
    cap = cv2.VideoCapture(input_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Changed to mp4v for better compatibility
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
    
    print(f"Processing {total_frames} frames from {input_path}...")
    
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        if frame_count % 100 == 0:
            print(f"Processed {frame_count}/{total_frames} frames")

        # Convert to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Convert original frame to HSV for better color detection
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

        # Define green grass color range in HSV (more specific range)
        lower_green = np.array([35, 40, 40])
        upper_green = np.array([85, 255, 255])
        grass_mask = cv2.inRange(hsv, lower_green, upper_green)

        # Define ball detection (white/light colored ball)
        lower_ball_hsv = np.array([0, 0, 200])
        upper_ball_hsv = np.array([180, 30, 255])
        ball_mask_hsv = cv2.inRange(hsv, lower_ball_hsv, upper_ball_hsv)
        
        _, ball_mask_gray = cv2.threshold(gray_frame, 200, 255, cv2.THRESH_BINARY)
        ball_mask = cv2.bitwise_or(ball_mask_hsv, ball_mask_gray)
        
        kernel = np.ones((3, 3), np.uint8)
        ball_mask = cv2.morphologyEx(ball_mask, cv2.MORPH_CLOSE, kernel)
        ball_mask = cv2.morphologyEx(ball_mask, cv2.MORPH_OPEN, kernel)

        lower_audience = np.array([0, 100, 150])
        upper_audience = np.array([180, 255, 255])
        audience_mask = cv2.inRange(hsv, lower_audience, upper_audience)
        audience_mask = cv2.bitwise_and(audience_mask, cv2.bitwise_not(ball_mask))

        combined_mask = cv2.bitwise_or(grass_mask, audience_mask)
        keep_mask = cv2.bitwise_not(combined_mask)
        keep_mask = cv2.bitwise_or(keep_mask, ball_mask)

        processed_frame = cv2.bitwise_and(gray_frame, gray_frame, mask=keep_mask)
        out.write(processed_frame)
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Processed video saved as: {output_path}")

def process_all_videos(input_folder, output_folder):
    """
    Process all videos in the input folder and save them to the output folder.
    
    Args:
        input_folder (str): Path to the folder containing input videos.
        output_folder (str): Path to the folder to save processed videos.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for video_file in os.listdir(input_folder):
        input_path = os.path.join(input_folder, video_file)
        output_path = os.path.join(output_folder, video_file)
        
        if os.path.isfile(input_path) and video_file.endswith(".mp4"):
            process_video(input_path, output_path)

if __name__ == "__main__":
    # Define input and output folders
    base_path = "F:/AIM Lab/Experiment/Clips"
    goal_input_folder = os.path.join(base_path, "goal")
    no_goal_input_folder = os.path.join(base_path, "no goal")
    goal_output_folder = os.path.join(base_path, "Goal p1")
    no_goal_output_folder = os.path.join(base_path, "NoGoal p1")
    
    # Process videos in the "goal" folder
    print("Processing videos in the 'goal' folder...")
    process_all_videos(goal_input_folder, goal_output_folder)
    
    # Process videos in the "no goal" folder
    print("Processing videos in the 'no goal' folder...")
    process_all_videos(no_goal_input_folder, no_goal_output_folder)

###  cell right below was not used

In [3]:
import os
import cv2
import numpy as np

def extract_and_process_frames(video_path, output_folder, fps=2, target_size=(224, 224)):
    """
    Extract frames from a video at a specified FPS, resize, and save them.

    Args:
        video_path (str): Path to the input video file.
        output_folder (str): Path to the folder to save processed frames.
        fps (int): Frames per second to extract.
        target_size (tuple): Target size for resizing (width, height).
    """
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load video
    cap = cv2.VideoCapture(video_path)
    video_fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_interval = int(video_fps / fps)

    frame_count = 0
    saved_frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Process every nth frame based on the frame interval
        if frame_count % frame_interval == 0:
            # Resize frame
            resized_frame = cv2.resize(frame, target_size)

            # Save frame as a .jpg file
            frame_filename = os.path.join(output_folder, f"frame_{saved_frame_count:04d}.jpg")
            cv2.imwrite(frame_filename, resized_frame)
            saved_frame_count += 1

        frame_count += 1

    cap.release()
    print(f"Processed {saved_frame_count} frames from {video_path} and saved to {output_folder}")

def process_all_videos(input_folder, output_folder, fps=2, target_size=(224, 224)):
    """
    Process all videos in the input folder and save frames to the output folder.

    Args:
        input_folder (str): Path to the folder containing input videos.
        output_folder (str): Path to the folder to save processed frames.
        fps (int): Frames per second to extract.
        target_size (tuple): Target size for resizing (width, height).
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in os.listdir(input_folder):
        input_path = os.path.join(input_folder, video_file)
        video_output_folder = os.path.join(output_folder, os.path.splitext(video_file)[0])

        if os.path.isfile(input_path) and video_file.endswith(".mp4"):
            extract_and_process_frames(input_path, video_output_folder, fps, target_size)

if __name__ == "__main__":
    # Define input and output folders
    base_path = "F:/AIM Lab/Experiment/Clips"
    goal_input_folder = os.path.join(base_path, "Goal p1")
    no_goal_input_folder = os.path.join(base_path, "NoGoal p1")
    goal_output_folder = os.path.join(base_path, "Goal p2")
    no_goal_output_folder = os.path.join(base_path, "NoGoal p2")

    # Process videos in the "Goal p1" folder
    print("Processing videos in the 'Goal p1' folder...")
    process_all_videos(goal_input_folder, goal_output_folder, fps=2, target_size=(224, 224))

    # Process videos in the "NoGoal p1" folder
    print("Processing videos in the 'NoGoal p1' folder...")
    process_all_videos(no_goal_input_folder, no_goal_output_folder, fps=2, target_size=(224, 224))

Processing videos in the 'Goal p1' folder...
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g1.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g1
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g10.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g10
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g11.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g11
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g12.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g12
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g13.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g13
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g14.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g14
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g15.mp4 and saved to F:/AIM Lab/Experiment/Clips\Goal p2\g15
Processed 15 frames from F:/AIM Lab/Experiment/Clips\Goal p1\g16.mp4 and saved to F:/AIM Lab/Experiment

### above cell was not used

### Using Resnet50

In [None]:
import os
import cv2
import numpy as np
import torch
import torchvision
from typing import List

# Load ResNet-50 model
model = torchvision.models.resnet50(pretrained=True)
model.fc = torch.nn.Identity()  # Remove the final classifier to get 2048-d features
model.eval()  # Set the model to evaluation mode

# Preprocessing pipeline for frames
preprocess = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),                    # H×W×C → C×H×W, [0,1]
    torchvision.transforms.Normalize(mean=[.485, .456, .406], std=[.229, .224, .225]),
    torchvision.transforms.Resize((224, 224)),
])

def embed_clip(frames: List[np.ndarray]):
    """
    Generate ResNet-50 embeddings for a list of frames.
    
    Args:
        frames (List[np.ndarray]): List of frames (H×W×C, BGR format).
    
    Returns:
        np.ndarray: N×2048 array of frame embeddings.
    """
    feats = []
    for frame in frames:
        # Convert BGR (OpenCV) to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        x = preprocess(frame_rgb).unsqueeze(0)  # 1×3×224×224
        with torch.no_grad():
            f = model(x)  # 1×2048
        feats.append(f.squeeze(0).cpu().numpy())
    return np.stack(feats, axis=0)  # N×2048

def sample_frames_from_video(video_path, num_samples=16):
    """
    Uniformly sample num_samples frames from the video at video_path.
    Returns a list of BGR frames (as numpy arrays).
    """
    print(f"Opening video: {video_path}")
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open video {video_path}")
        return []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Total frames in video: {total_frames}")
    if total_frames == 0:
        print(f"Error: No frames found in video {video_path}")
        return []

    if total_frames < num_samples:
        # If fewer frames than samples, just read them all
        indices = list(range(total_frames))
    else:
        # Uniformly spaced frame indices
        indices = np.linspace(0, total_frames - 1, num=num_samples, dtype=int)

    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            print(f"Warning: Failed to read frame {idx} from {video_path}")
            continue
        frames.append(frame)
    cap.release()

    if not frames:
        print(f"Error: No frames sampled from video {video_path}")
    else:
        print(f"Successfully sampled {len(frames)} frames from {video_path}")
    return frames

def process_folder(input_folder, output_folder, num_samples=16):
    """
    Process videos in a folder, generate mean-pooled ResNet-50 embeddings for sampled frames,
    and save the embeddings to the output folder.
    
    Args:
        input_folder (str): Path to the folder containing input videos.
        output_folder (str): Path to save embeddings.
        num_samples (int): Number of frames to sample per video.
    """
    print(f"Processing folder: {input_folder}")
    if not os.path.exists(input_folder):
        print(f"Error: Folder does not exist: {input_folder}")
        return

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for fname in os.listdir(input_folder):
        if not fname.lower().endswith('.mp4'):
            print(f"Skipping non-video file: {fname}")
            continue
        video_path = os.path.join(input_folder, fname)
        print(f"Processing video: {video_path}")
        frames = sample_frames_from_video(video_path, num_samples=num_samples)
        if frames:
            # Generate embeddings and mean-pool them
            embeddings = embed_clip(frames)
            clip_vec = np.mean(embeddings, axis=0)  # Mean-pool → (2048,)
            
            # Save the embedding as a .npy file
            output_path = os.path.join(output_folder, f"{os.path.splitext(fname)[0]}.npy")
            np.save(output_path, clip_vec)
            print(f"Saved embedding to {output_path}")
        else:
            print(f"Error: No frames sampled from {fname}")

if __name__ == "__main__":
    # Define input and output folders
    goal_folder = "F:/AIM Lab/Experiment/Clips/Goal p1/Test"
    nongoal_folder = "F:/AIM Lab/Experiment/Clips/NoGoal p1/Test"
    goal_output_folder = "F:/AIM Lab/Experiment/Clips/Goal Embeddings/Test"
    nongoal_output_folder = "F:/AIM Lab/Experiment/Clips/NoGoal Embeddings/Test"

    # Process goal clips
    print("Processing goal clips:")
    process_folder(goal_folder, goal_output_folder, num_samples=16)

    # Process non-goal clips
    print("Processing non-goal clips:")
    process_folder(nongoal_folder, nongoal_output_folder, num_samples=16)



Processing goal clips:
Processing folder: F:/AIM Lab/Experiment/Clips/Goal p1/Test
Processing video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g31.mp4
Opening video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g31.mp4
Total frames in video: 175
Successfully sampled 16 frames from F:/AIM Lab/Experiment/Clips/Goal p1/Test\g31.mp4
Saved embedding to F:/AIM Lab/Experiment/Clips/Goal Embeddings/Test\g31.npy
Processing video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g32.mp4
Opening video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g32.mp4
Total frames in video: 175
Successfully sampled 16 frames from F:/AIM Lab/Experiment/Clips/Goal p1/Test\g32.mp4
Saved embedding to F:/AIM Lab/Experiment/Clips/Goal Embeddings/Test\g32.npy
Processing video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g33.mp4
Opening video: F:/AIM Lab/Experiment/Clips/Goal p1/Test\g33.mp4
Total frames in video: 175
Successfully sampled 16 frames from F:/AIM Lab/Experiment/Clips/Goal p1/Test\g33.mp4
Saved embedding to F:/AIM Lab/Experi

In [35]:
def l2_normalize(vec):
    """
    Perform L2 normalization on a vector.
    
    Args:
        vec (np.ndarray): Input vector.
    
    Returns:
        np.ndarray: L2-normalized vector.
    """
    return vec / np.linalg.norm(vec)

def build_prototypes(goal_folder, nongoal_folder):
    """
    Build prototypes for goal and non-goal classes.
    
    Args:
        goal_folder (str): Path to the folder containing goal clip embeddings.
        nongoal_folder (str): Path to the folder containing non-goal clip embeddings.
    
    Returns:
        tuple: L2-normalized prototypes for goal and non-goal classes.
    """
    goal_vecs = []
    nongoal_vecs = []

    # Load goal embeddings
    for fname in os.listdir(goal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(goal_folder, fname))
            goal_vecs.append(vec)

    # Load non-goal embeddings
    for fname in os.listdir(nongoal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(nongoal_folder, fname))
            nongoal_vecs.append(vec)

    # Compute mean vectors
    proto_goal = np.mean(goal_vecs, axis=0)
    proto_nongoal = np.mean(nongoal_vecs, axis=0)

    # Normalize prototypes
    proto_goal = l2_normalize(proto_goal)
    proto_nongoal = l2_normalize(proto_nongoal)

    return proto_goal, proto_nongoal

def classify_clip(new_clip_vec, proto_goal, proto_nongoal, delta=0.04):
    """
    Classify a new clip based on similarity to prototypes.
    
    Args:
        new_clip_vec (np.ndarray): L2-normalized vector of the new clip.
        proto_goal (np.ndarray): Prototype for the goal class.
        proto_nongoal (np.ndarray): Prototype for the non-goal class.
        delta (float): Decision threshold.
    
    Returns:
        str: Predicted label ("Goal" or "Non-Goal").
    """
    # Normalize the new clip vector
    v = l2_normalize(new_clip_vec)

    # Compute similarities
    sim_goal = np.dot(v, proto_goal)
    sim_nongoal = np.dot(v, proto_nongoal)

    # Classify based on similarity
    label = "Goal" if sim_goal > (sim_nongoal + delta) else "Non-Goal"
    deltas = sim_goal - sim_nongoal
    # print("difference: ", deltas)
    # print("Sim_goal: ", sim_goal)
    # print("sim_nongoal: ", sim_nongoal + delta)
    return label

if __name__ == "__main__":
    # Paths to embedding folders
    goal_train_folder = "F:/AIM Lab/Experiment/Clips/Goal Embeddings/Train"
    nongoal_train_folder = "F:/AIM Lab/Experiment/Clips/NoGoal Embeddings/Train"
    goal_test_folder = "F:/AIM Lab/Experiment/Clips/Goal Embeddings/Test"
    nongoal_test_folder = "F:/AIM Lab/Experiment/Clips/NoGoal Embeddings/Test"

    # Build prototypes
    print("Building prototypes...")
    proto_goal, proto_nongoal = build_prototypes(goal_train_folder, nongoal_train_folder)

    # Classify test clips
    print("Classifying test clips...")
    for test_folder, label in [(goal_test_folder, "Goal"), (nongoal_test_folder, "Non-Goal")]:
        for fname in os.listdir(test_folder):
            if fname.endswith(".npy"):
                # Load test clip vector
                test_vec = np.load(os.path.join(test_folder, fname))

                # Classify the clip
                predicted_label = classify_clip(test_vec, proto_goal, proto_nongoal, 0.00)

                # Print results
                print(f"File: {fname}, True Label: {label}, Predicted Label: {predicted_label}")


Building prototypes...
Classifying test clips...
File: g31.npy, True Label: Goal, Predicted Label: Goal
File: g32.npy, True Label: Goal, Predicted Label: Goal
File: g33.npy, True Label: Goal, Predicted Label: Goal
File: g34.npy, True Label: Goal, Predicted Label: Goal
File: g35.npy, True Label: Goal, Predicted Label: Goal
File: g36.npy, True Label: Goal, Predicted Label: Goal
File: g37.npy, True Label: Goal, Predicted Label: Goal
File: ng31.npy, True Label: Non-Goal, Predicted Label: Non-Goal
File: ng32.npy, True Label: Non-Goal, Predicted Label: Non-Goal
File: ng33.npy, True Label: Non-Goal, Predicted Label: Non-Goal
File: ng34.npy, True Label: Non-Goal, Predicted Label: Goal
File: ng35.npy, True Label: Non-Goal, Predicted Label: Non-Goal
File: ng36.npy, True Label: Non-Goal, Predicted Label: Goal
File: ng37.npy, True Label: Non-Goal, Predicted Label: Non-Goal


In [34]:
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

def classify_clip2(new_clip_vec, proto_goal, proto_nongoal, delta=0.04):
    """
    Classify a new clip based on similarity to prototypes.
    """
    # Normalize the new clip vector
    v = l2_normalize(new_clip_vec)

    # Compute similarities
    sim_goal = np.dot(v, proto_goal)
    sim_nongoal = np.dot(v, proto_nongoal)

    # Classify based on similarity
    label = "Goal" if sim_goal > (sim_nongoal + delta) else "Non-Goal"
    similarity_scores = {"sim_goal": sim_goal, "sim_nongoal": sim_nongoal}

    return label, similarity_scores

def evaluate_classification_with_metrics(goal_test_folder: str, 
                                         nongoal_test_folder: str,
                                         proto_goal: np.ndarray,
                                         proto_nongoal: np.ndarray,
                                         threshold: float = 0.0) -> dict:
    """
    Evaluate classification performance and compute confusion matrix, precision, recall, and F₁ score.
    """
    y_true = []
    y_pred = []
    
    # Test goal clips
    print("\nTesting goal clips...")
    for fname in os.listdir(goal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(goal_test_folder, fname))
            predicted_label, _ = classify_clip2(test_embedding, proto_goal, proto_nongoal, threshold)
            
            y_true.append(1)  # True label: Goal
            y_pred.append(1 if predicted_label == "Goal" else 0)
    
    # Test non-goal clips
    print("\nTesting non-goal clips...")
    for fname in os.listdir(nongoal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(nongoal_test_folder, fname))
            predicted_label, _ = classify_clip2(test_embedding, proto_goal, proto_nongoal, threshold)
            
            y_true.append(0)  # True label: Non-Goal
            y_pred.append(1 if predicted_label == "Goal" else 0)
    
    # Compute metrics
    cm = confusion_matrix(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    
    print("\nConfusion Matrix:")
    print(cm)
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F₁ Score: {f1:.4f}")
    
    return {
        "confusion_matrix": cm,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

results = evaluate_classification_with_metrics(
    goal_test_folder=goal_test_folder, 
    nongoal_test_folder=nongoal_test_folder, 
    proto_goal=proto_goal, 
    proto_nongoal=proto_nongoal, 
    threshold=0.0  # Adjust the threshold as needed
)



Testing goal clips...

Testing non-goal clips...

Confusion Matrix:
[[5 2]
 [0 7]]
Precision: 0.7778
Recall: 1.0000
F₁ Score: 0.8750


## Cossine Similarity-based classification Using I3D 

In [None]:
import os
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import transforms
import sys
sys.path.append(r'F:\AIM Lab\Experiment\kinetics-i3d')
import i3d
from typing import List, Tuple
import random

class I3DFeatureExtractor:
    def __init__(self, model_path=None, device='cuda' if torch.cuda.is_available() else 'cpu'):
        """
        Initialize I3D model for feature extraction.
        
        Args:
            model_path: Path to pretrained I3D model (optional)
            device: Device to run the model on
        """
        self.device = device
        
        # Initialize I3D model
        self.model = i3d.InceptionI3d(400, in_channels=3)
        
        # Load pretrained weights if available
        if model_path and os.path.exists(model_path):
            checkpoint = torch.load(model_path, map_location=device)
            self.model.load_state_dict(checkpoint)
        
        # Remove the final classification layer to get features
        self.model.logits = torch.nn.Identity()
        self.model.to(device)
        self.model.eval()
        
        # Preprocessing transforms
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                               std=[0.229, 0.224, 0.225])
        ])
    
    def preprocess_frames(self, frames: List[np.ndarray], num_frames: int = 16) -> torch.Tensor:
        """
        Preprocess frames for I3D input.
        
        Args:
            frames: List of frames (H×W×C, BGR format)
            num_frames: Number of frames to use for I3D
            
        Returns:
            Preprocessed tensor of shape (1, 3, num_frames, H, W)
        """
        # Sample frames uniformly if we have more than needed
        if len(frames) > num_frames:
            indices = np.linspace(0, len(frames) - 1, num_frames, dtype=int)
            frames = [frames[i] for i in indices]
        elif len(frames) < num_frames:
            # Repeat last frame if we don't have enough
            while len(frames) < num_frames:
                frames.append(frames[-1])
        
        # Convert frames and apply transforms
        processed_frames = []
        for frame in frames:
            # Convert BGR to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Apply transforms
            frame_tensor = self.transform(frame_rgb)
            processed_frames.append(frame_tensor)
        
        # Stack frames: (num_frames, 3, H, W) -> (3, num_frames, H, W)
        video_tensor = torch.stack(processed_frames, dim=1)  # (3, num_frames, H, W)
        video_tensor = video_tensor.unsqueeze(0)  # Add batch dimension
        
        return video_tensor
    
    def extract_features(self, frames: List[np.ndarray]) -> np.ndarray:
        """
        Extract I3D features from video frames.
        
        Args:
            frames: List of video frames
            
        Returns:
            Feature vector of shape (1024,) or similar
        """
        # Preprocess frames
        video_tensor = self.preprocess_frames(frames)
        video_tensor = video_tensor.to(self.device)
        
        # Extract features
        with torch.no_grad():
            features = self.model(video_tensor)
            # Global average pooling if needed
            if len(features.shape) > 2:
                features = F.adaptive_avg_pool3d(features, 1).squeeze()
            else:
                features = features.squeeze()
        
        return features.cpu().numpy()

def sample_frames_from_video(video_path: str, num_samples: int = 32) -> List[np.ndarray]:
    """
    Uniformly sample frames from a video.
    
    Args:
        video_path: Path to video file
        num_samples: Number of frames to sample
        
    Returns:
        List of sampled frames
    """
    print(f"Opening video: {video_path}")
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open video {video_path}")
        return []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Total frames in video: {total_frames}")
    
    if total_frames == 0:
        print(f"Error: No frames found in video {video_path}")
        return []

    if total_frames < num_samples:
        indices = list(range(total_frames))
    else:
        indices = np.linspace(0, total_frames - 1, num=num_samples, dtype=int)

    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            print(f"Warning: Failed to read frame {idx} from {video_path}")
            continue
        frames.append(frame)
    
    cap.release()
    
    if not frames:
        print(f"Error: No frames sampled from video {video_path}")
    else:
        print(f"Successfully sampled {len(frames)} frames from {video_path}")
    
    return frames

def process_videos_to_i3d_embeddings(input_folder: str, output_folder: str, 
                                   feature_extractor: I3DFeatureExtractor,
                                   num_frames: int = 32):
    """
    Process all videos in a folder and generate I3D embeddings.
    
    Args:
        input_folder: Folder containing input videos
        output_folder: Folder to save embeddings
        feature_extractor: I3D feature extractor instance
        num_frames: Number of frames to use for each video
    """
    print(f"Processing folder: {input_folder}")
    if not os.path.exists(input_folder):
        print(f"Error: Folder does not exist: {input_folder}")
        return

    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        if not fname.lower().endswith('.mp4'):
            print(f"Skipping non-video file: {fname}")
            continue
        
        video_path = os.path.join(input_folder, fname)
        print(f"Processing video: {video_path}")
        
        # Sample frames from video
        frames = sample_frames_from_video(video_path, num_samples=num_frames)
        
        if frames:
            # Extract I3D features
            try:
                features = feature_extractor.extract_features(frames)
                
                # Save the embedding as a .npy file
                output_path = os.path.join(output_folder, f"{os.path.splitext(fname)[0]}.npy")
                np.save(output_path, features)
                print(f"Saved I3D embedding to {output_path} (shape: {features.shape})")
                
            except Exception as e:
                print(f"Error processing {fname}: {e}")
        else:
            print(f"Error: No frames sampled from {fname}")

def l2_normalize(vec: np.ndarray) -> np.ndarray:
    """
    Perform L2 normalization on a vector.
    
    Args:
        vec: Input vector
        
    Returns:
        L2-normalized vector
    """
    norm = np.linalg.norm(vec)
    if norm == 0:
        return vec
    return vec / norm

def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
    """
    Compute cosine similarity between two vectors.
    
    Args:
        vec1: First vector
        vec2: Second vector
        
    Returns:
        Cosine similarity value
    """
    # Normalize vectors
    vec1_norm = l2_normalize(vec1)
    vec2_norm = l2_normalize(vec2)
    
    # Compute cosine similarity
    return np.dot(vec1_norm, vec2_norm)

def build_i3d_prototypes(goal_folder: str, nongoal_folder: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Build prototypes for goal and non-goal classes using I3D embeddings.
    
    Args:
        goal_folder: Path to folder containing goal clip embeddings
        nongoal_folder: Path to folder containing non-goal clip embeddings
        
    Returns:
        Tuple of (goal_prototype, nongoal_prototype)
    """
    goal_vecs = []
    nongoal_vecs = []

    # Load goal embeddings
    print("Loading goal embeddings...")
    for fname in os.listdir(goal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(goal_folder, fname))
            goal_vecs.append(vec)
            print(f"Loaded goal embedding: {fname} (shape: {vec.shape})")

    # Load non-goal embeddings
    print("Loading non-goal embeddings...")
    for fname in os.listdir(nongoal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(nongoal_folder, fname))
            nongoal_vecs.append(vec)
            print(f"Loaded non-goal embedding: {fname} (shape: {vec.shape})")

    if not goal_vecs or not nongoal_vecs:
        raise ValueError("No embeddings found in one or both folders!")

    # Compute mean vectors (prototypes)
    proto_goal = np.mean(goal_vecs, axis=0)
    proto_nongoal = np.mean(nongoal_vecs, axis=0)

    # Normalize prototypes
    proto_goal = l2_normalize(proto_goal)
    proto_nongoal = l2_normalize(proto_nongoal)

    print(f"Goal prototype shape: {proto_goal.shape}")
    print(f"Non-goal prototype shape: {proto_nongoal.shape}")
    print(f"Built prototypes from {len(goal_vecs)} goal and {len(nongoal_vecs)} non-goal samples")

    return proto_goal, proto_nongoal

def classify_i3d_clip(clip_embedding: np.ndarray, 
                     proto_goal: np.ndarray, 
                     proto_nongoal: np.ndarray, 
                     threshold: float = 0.0) -> Tuple[str, dict]:
    """
    Classify a clip using cosine similarity to prototypes.
    
    Args:
        clip_embedding: I3D embedding of the clip to classify
        proto_goal: Goal class prototype
        proto_nongoal: Non-goal class prototype
        threshold: Decision threshold
        
    Returns:
        Tuple of (predicted_label, similarity_scores)
    """
    # Normalize the clip embedding
    clip_norm = l2_normalize(clip_embedding)

    # Compute cosine similarities
    sim_goal = cosine_similarity(clip_norm, proto_goal)
    sim_nongoal = cosine_similarity(clip_norm, proto_nongoal)

    # Classify based on similarity difference and threshold
    difference = sim_goal - sim_nongoal
    predicted_label = "Goal" if difference > threshold else "Non-Goal"
    
    similarity_scores = {
        'sim_goal': sim_goal,
        'sim_nongoal': sim_nongoal,
        'difference': difference,
        'threshold': threshold
    }

    return predicted_label, similarity_scores

def evaluate_i3d_classification(goal_test_folder: str, 
                               nongoal_test_folder: str,
                               proto_goal: np.ndarray,
                               proto_nongoal: np.ndarray,
                               threshold: float = 0.0) -> dict:
    """
    Evaluate I3D-based classification on test data.
    
    Args:
        goal_test_folder: Folder containing goal test embeddings
        nongoal_test_folder: Folder containing non-goal test embeddings
        proto_goal: Goal class prototype
        proto_nongoal: Non-goal class prototype
        threshold: Decision threshold
        
    Returns:
        Dictionary containing evaluation results
    """
    results = {
        'correct': 0,
        'total': 0,
        'goal_correct': 0,
        'goal_total': 0,
        'nongoal_correct': 0,
        'nongoal_total': 0,
        'predictions': []
    }
    
    # Test goal clips
    print("\nTesting goal clips...")
    for fname in os.listdir(goal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(goal_test_folder, fname))
            predicted_label, sim_scores = classify_i3d_clip(
                test_embedding, proto_goal, proto_nongoal, threshold
            )
            
            is_correct = predicted_label == "Goal"
            results['goal_total'] += 1
            results['total'] += 1
            
            if is_correct:
                results['goal_correct'] += 1
                results['correct'] += 1
            
            result_entry = {
                'file': fname,
                'true_label': 'Goal',
                'predicted_label': predicted_label,
                'correct': is_correct,
                'similarities': sim_scores
            }
            results['predictions'].append(result_entry)
            
            print(f"File: {fname}")
            print(f"  True: Goal, Predicted: {predicted_label}")
            print(f"  Sim Goal: {sim_scores['sim_goal']:.4f}, Sim Non-Goal: {sim_scores['sim_nongoal']:.4f}")
            print(f"  Difference: {sim_scores['difference']:.4f}, Correct: {is_correct}")
    
    # Test non-goal clips
    print("\nTesting non-goal clips...")
    for fname in os.listdir(nongoal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(nongoal_test_folder, fname))
            predicted_label, sim_scores = classify_i3d_clip(
                test_embedding, proto_goal, proto_nongoal, threshold
            )
            
            is_correct = predicted_label == "Non-Goal"
            results['nongoal_total'] += 1
            results['total'] += 1
            
            if is_correct:
                results['nongoal_correct'] += 1
                results['correct'] += 1
            
            result_entry = {
                'file': fname,
                'true_label': 'Non-Goal',
                'predicted_label': predicted_label,
                'correct': is_correct,
                'similarities': sim_scores
            }
            results['predictions'].append(result_entry)
            
            print(f"File: {fname}")
            print(f"  True: Non-Goal, Predicted: {predicted_label}")
            print(f"  Sim Goal: {sim_scores['sim_goal']:.4f}, Sim Non-Goal: {sim_scores['sim_nongoal']:.4f}")
            print(f"  Difference: {sim_scores['difference']:.4f}, Correct: {is_correct}")
    
    # Calculate metrics
    overall_accuracy = results['correct'] / results['total'] if results['total'] > 0 else 0
    goal_accuracy = results['goal_correct'] / results['goal_total'] if results['goal_total'] > 0 else 0
    nongoal_accuracy = results['nongoal_correct'] / results['nongoal_total'] if results['nongoal_total'] > 0 else 0
    
    print(f"\n{'='*50}")
    print("EVALUATION RESULTS")
    print(f"{'='*50}")
    print(f"Overall Accuracy: {overall_accuracy:.4f} ({results['correct']}/{results['total']})")
    print(f"Goal Accuracy: {goal_accuracy:.4f} ({results['goal_correct']}/{results['goal_total']})")
    print(f"Non-Goal Accuracy: {nongoal_accuracy:.4f} ({results['nongoal_correct']}/{results['nongoal_total']})")
    
    results['overall_accuracy'] = overall_accuracy
    results['goal_accuracy'] = goal_accuracy
    results['nongoal_accuracy'] = nongoal_accuracy
    
    return results

def split_embeddings_train_test(input_folder: str, train_folder: str, test_folder: str, 
                               train_ratio: float = 0.7):
    """
    Split embeddings into train and test sets.
    
    Args:
        input_folder: Folder containing all embeddings
        train_folder: Output folder for training embeddings
        test_folder: Output folder for test embeddings
        train_ratio: Ratio of data to use for training
    """
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    # Get all .npy files
    embedding_files = [f for f in os.listdir(input_folder) if f.endswith('.npy')]
    random.shuffle(embedding_files)
    
    # Split files
    split_idx = int(len(embedding_files) * train_ratio)
    train_files = embedding_files[:split_idx]
    test_files = embedding_files[split_idx:]
    
    # Copy files to respective folders
    import shutil
    
    for file in train_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(train_folder, file)
        shutil.copy2(src, dst)
    
    for file in test_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(test_folder, file)
        shutil.copy2(src, dst)
    
    print(f"Split {len(embedding_files)} files: {len(train_files)} train, {len(test_files)} test")

def main():
    """
    Main function to run I3D-based few-shot learning classification.
    """
    # Configuration
    base_path = "F:/AIM Lab/Experiment/Clips"
    
    # Input video folders
    goal_video_folder = os.path.join(base_path, "Goal p1")
    nongoal_video_folder = os.path.join(base_path, "NoGoal p1")
    
    # I3D embedding folders
    i3d_base_path = os.path.join(base_path, "I3D_Embeddings")
    goal_i3d_folder = os.path.join(i3d_base_path, "Goal")
    nongoal_i3d_folder = os.path.join(i3d_base_path, "NoGoal")
    
    # Train/test split folders
    goal_train_folder = os.path.join(i3d_base_path, "Goal_Train")
    goal_test_folder = os.path.join(i3d_base_path, "Goal_Test")
    nongoal_train_folder = os.path.join(i3d_base_path, "NoGoal_Train")
    nongoal_test_folder = os.path.join(i3d_base_path, "NoGoal_Test")
    
    # Initialize I3D feature extractor
    print("Initializing I3D feature extractor...")
    feature_extractor = I3DFeatureExtractor()
    
    # Step 1: Generate I3D embeddings for all videos
    print("\nStep 1: Generating I3D embeddings...")
    print("Processing goal videos...")
    process_videos_to_i3d_embeddings(goal_video_folder, goal_i3d_folder, feature_extractor)
    
    print("Processing non-goal videos...")
    process_videos_to_i3d_embeddings(nongoal_video_folder, nongoal_i3d_folder, feature_extractor)
    
    # Step 2: Split embeddings into train/test sets
    print("\nStep 2: Splitting embeddings into train/test sets...")
    split_embeddings_train_test(goal_i3d_folder, goal_train_folder, goal_test_folder, train_ratio=0.7)
    split_embeddings_train_test(nongoal_i3d_folder, nongoal_train_folder, nongoal_test_folder, train_ratio=0.7)
    
    # Step 3: Build prototypes from training data
    print("\nStep 3: Building prototypes from training data...")
    try:
        proto_goal, proto_nongoal = build_i3d_prototypes(goal_train_folder, nongoal_train_folder)
    except Exception as e:
        print(f"Error building prototypes: {e}")
        return
    
    # Step 4: Evaluate on test data with different thresholds
    print("\nStep 4: Evaluating classification performance...")
    thresholds = [0.0, 0.01, 0.02, 0.05, 0.1]
    
    best_accuracy = 0
    best_threshold = 0
    
    for threshold in thresholds:
        print(f"\nTesting with threshold: {threshold}")
        results = evaluate_i3d_classification(
            goal_test_folder, nongoal_test_folder, 
            proto_goal, proto_nongoal, threshold
        )
        
        if results['overall_accuracy'] > best_accuracy:
            best_accuracy = results['overall_accuracy']
            best_threshold = threshold
    
    print(f"\n{'='*50}")
    print("BEST RESULTS")
    print(f"{'='*50}")
    print(f"Best Threshold: {best_threshold}")
    print(f"Best Accuracy: {best_accuracy:.4f}")

if __name__ == "__main__":
    main()

Initializing I3D feature extractor...


TypeError: InceptionI3d.__init__() got an unexpected keyword argument 'in_channels'

## Cossine Similarity-based classification Using R(2+1)D 

In [38]:
import os
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models.video import r2plus1d_18
from typing import List, Tuple
import random
import shutil
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

class R2Plus1DFeatureExtractor:
    def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'):
        """
        Initialize R(2+1)D model for feature extraction.
        
        Args:
            device: Device to run the model on
        """
        self.device = device
        
        # Initialize R(2+1)D model
        self.model = r2plus1d_18(pretrained=True)
        
        # Remove the final classification layer to get features
        self.model.fc = torch.nn.Identity()
        self.model.to(device)
        self.model.eval()
        
        # Preprocessing transforms
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((112, 112)),  # R(2+1)D typically uses 112x112
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], 
                               std=[0.22803, 0.22145, 0.216989])  # Kinetics normalization
        ])
    
    def preprocess_frames(self, frames: List[np.ndarray], num_frames: int = 16) -> torch.Tensor:
        """
        Preprocess frames for R(2+1)D input.
        
        Args:
            frames: List of frames (H×W×C, BGR format)
            num_frames: Number of frames to use for R(2+1)D
            
        Returns:
            Preprocessed tensor of shape (1, 3, num_frames, H, W)
        """
        # Sample frames uniformly if we have more than needed
        if len(frames) > num_frames:
            indices = np.linspace(0, len(frames) - 1, num_frames, dtype=int)
            frames = [frames[i] for i in indices]
        elif len(frames) < num_frames:
            # Repeat last frame if we don't have enough
            while len(frames) < num_frames:
                frames.append(frames[-1])
        
        # Convert frames and apply transforms
        processed_frames = []
        for frame in frames:
            # Convert BGR to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Apply transforms
            frame_tensor = self.transform(frame_rgb)
            processed_frames.append(frame_tensor)
        
        # Stack frames: (num_frames, 3, H, W) -> (3, num_frames, H, W)
        video_tensor = torch.stack(processed_frames, dim=1)  # (3, num_frames, H, W)
        video_tensor = video_tensor.unsqueeze(0)  # Add batch dimension
        
        return video_tensor
    
    def extract_features(self, frames: List[np.ndarray]) -> np.ndarray:
        """
        Extract R(2+1)D features from video frames.
        
        Args:
            frames: List of video frames
            
        Returns:
            Feature vector of shape (512,) for R(2+1)D-18
        """
        # Preprocess frames
        video_tensor = self.preprocess_frames(frames)
        video_tensor = video_tensor.to(self.device)
        
        # Extract features
        with torch.no_grad():
            features = self.model(video_tensor)
            # Global average pooling if needed
            if len(features.shape) > 2:
                features = F.adaptive_avg_pool3d(features, 1).squeeze()
            else:
                features = features.squeeze()
        
        return features.cpu().numpy()

def sample_frames_from_video(video_path: str, num_samples: int = 32) -> List[np.ndarray]:
    """
    Uniformly sample frames from a video.
    
    Args:
        video_path: Path to video file
        num_samples: Number of frames to sample
        
    Returns:
        List of sampled frames
    """
    print(f"Opening video: {video_path}")
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open video {video_path}")
        return []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Total frames in video: {total_frames}")
    
    if total_frames == 0:
        print(f"Error: No frames found in video {video_path}")
        return []

    if total_frames < num_samples:
        indices = list(range(total_frames))
    else:
        indices = np.linspace(0, total_frames - 1, num=num_samples, dtype=int)

    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            print(f"Warning: Failed to read frame {idx} from {video_path}")
            continue
        frames.append(frame)
    
    cap.release()
    
    if not frames:
        print(f"Error: No frames sampled from video {video_path}")
    else:
        print(f"Successfully sampled {len(frames)} frames from {video_path}")
    
    return frames

def process_videos_to_r2plus1d_embeddings(input_folder: str, output_folder: str, 
                                         feature_extractor: R2Plus1DFeatureExtractor,
                                         num_frames: int = 32):
    """
    Process all videos in a folder and generate R(2+1)D embeddings.
    
    Args:
        input_folder: Folder containing input videos
        output_folder: Folder to save embeddings
        feature_extractor: R(2+1)D feature extractor instance
        num_frames: Number of frames to use for each video
    """
    print(f"Processing folder: {input_folder}")
    if not os.path.exists(input_folder):
        print(f"Error: Folder does not exist: {input_folder}")
        return

    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        if not fname.lower().endswith('.mp4'):
            print(f"Skipping non-video file: {fname}")
            continue
        
        video_path = os.path.join(input_folder, fname)
        print(f"Processing video: {video_path}")
        
        # Sample frames from video
        frames = sample_frames_from_video(video_path, num_samples=num_frames)
        
        if frames:
            # Extract R(2+1)D features
            try:
                features = feature_extractor.extract_features(frames)
                
                # Save the embedding as a .npy file
                output_path = os.path.join(output_folder, f"{os.path.splitext(fname)[0]}.npy")
                np.save(output_path, features)
                print(f"Saved R(2+1)D embedding to {output_path} (shape: {features.shape})")
                
            except Exception as e:
                print(f"Error processing {fname}: {e}")
        else:
            print(f"Error: No frames sampled from {fname}")

def l2_normalize(vec: np.ndarray) -> np.ndarray:
    """
    Perform L2 normalization on a vector.
    
    Args:
        vec: Input vector
        
    Returns:
        L2-normalized vector
    """
    norm = np.linalg.norm(vec)
    if norm == 0:
        return vec
    return vec / norm

def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
    """
    Compute cosine similarity between two vectors.
    
    Args:
        vec1: First vector
        vec2: Second vector
        
    Returns:
        Cosine similarity value
    """
    # Normalize vectors
    vec1_norm = l2_normalize(vec1)
    vec2_norm = l2_normalize(vec2)
    
    # Compute cosine similarity
    return np.dot(vec1_norm, vec2_norm)

def build_r2plus1d_prototypes(goal_folder: str, nongoal_folder: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Build prototypes for goal and non-goal classes using R(2+1)D embeddings.
    
    Args:
        goal_folder: Path to folder containing goal clip embeddings
        nongoal_folder: Path to folder containing non-goal clip embeddings
        
    Returns:
        Tuple of (goal_prototype, nongoal_prototype)
    """
    goal_vecs = []
    nongoal_vecs = []

    # Load goal embeddings
    print("Loading goal embeddings...")
    for fname in os.listdir(goal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(goal_folder, fname))
            goal_vecs.append(vec)
            print(f"Loaded goal embedding: {fname} (shape: {vec.shape})")

    # Load non-goal embeddings
    print("Loading non-goal embeddings...")
    for fname in os.listdir(nongoal_folder):
        if fname.endswith(".npy"):
            vec = np.load(os.path.join(nongoal_folder, fname))
            nongoal_vecs.append(vec)
            print(f"Loaded non-goal embedding: {fname} (shape: {vec.shape})")

    if not goal_vecs or not nongoal_vecs:
        raise ValueError("No embeddings found in one or both folders!")

    # Compute mean vectors (prototypes)
    proto_goal = np.mean(goal_vecs, axis=0)
    proto_nongoal = np.mean(nongoal_vecs, axis=0)

    # Normalize prototypes
    proto_goal = l2_normalize(proto_goal)
    proto_nongoal = l2_normalize(proto_nongoal)

    print(f"Goal prototype shape: {proto_goal.shape}")
    print(f"Non-goal prototype shape: {proto_nongoal.shape}")
    print(f"Built prototypes from {len(goal_vecs)} goal and {len(nongoal_vecs)} non-goal samples")

    return proto_goal, proto_nongoal

def classify_r2plus1d_clip(clip_embedding: np.ndarray, 
                          proto_goal: np.ndarray, 
                          proto_nongoal: np.ndarray, 
                          threshold: float = 0.0) -> Tuple[str, dict]:
    """
    Classify a clip using cosine similarity to prototypes.
    
    Args:
        clip_embedding: R(2+1)D embedding of the clip to classify
        proto_goal: Goal class prototype
        proto_nongoal: Non-goal class prototype
        threshold: Decision threshold
        
    Returns:
        Tuple of (predicted_label, similarity_scores)
    """
    # Normalize the clip embedding
    clip_norm = l2_normalize(clip_embedding)

    # Compute cosine similarities
    sim_goal = cosine_similarity(clip_norm, proto_goal)
    sim_nongoal = cosine_similarity(clip_norm, proto_nongoal)

    # Classify based on similarity difference and threshold
    difference = sim_goal - sim_nongoal
    predicted_label = "Goal" if difference > threshold else "Non-Goal"
    
    similarity_scores = {
        'sim_goal': sim_goal,
        'sim_nongoal': sim_nongoal,
        'difference': difference,
        'threshold': threshold
    }

    return predicted_label, similarity_scores

def evaluate_r2plus1d_classification(goal_test_folder: str, 
                                    nongoal_test_folder: str,
                                    proto_goal: np.ndarray,
                                    proto_nongoal: np.ndarray,
                                    threshold: float = 0.0) -> dict:
    """
    Evaluate R(2+1)D-based classification on test data.
    
    Args:
        goal_test_folder: Folder containing goal test embeddings
        nongoal_test_folder: Folder containing non-goal test embeddings
        proto_goal: Goal class prototype
        proto_nongoal: Non-goal class prototype
        threshold: Decision threshold
        
    Returns:
        Dictionary containing evaluation results
    """
    results = {
        'correct': 0,
        'total': 0,
        'goal_correct': 0,
        'goal_total': 0,
        'nongoal_correct': 0,
        'nongoal_total': 0,
        'predictions': []
    }
    
    # Test goal clips
    print("\nTesting goal clips...")
    for fname in os.listdir(goal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(goal_test_folder, fname))
            predicted_label, sim_scores = classify_r2plus1d_clip(
                test_embedding, proto_goal, proto_nongoal, threshold
            )
            
            is_correct = predicted_label == "Goal"
            results['goal_total'] += 1
            results['total'] += 1
            
            if is_correct:
                results['goal_correct'] += 1
                results['correct'] += 1
            
            result_entry = {
                'file': fname,
                'true_label': 'Goal',
                'predicted_label': predicted_label,
                'correct': is_correct,
                'similarities': sim_scores
            }
            results['predictions'].append(result_entry)
            
            print(f"File: {fname}")
            print(f"  True: Goal, Predicted: {predicted_label}")
            print(f"  Sim Goal: {sim_scores['sim_goal']:.4f}, Sim Non-Goal: {sim_scores['sim_nongoal']:.4f}")
            print(f"  Difference: {sim_scores['difference']:.4f}, Correct: {is_correct}")
    
    # Test non-goal clips
    print("\nTesting non-goal clips...")
    for fname in os.listdir(nongoal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(nongoal_test_folder, fname))
            predicted_label, sim_scores = classify_r2plus1d_clip(
                test_embedding, proto_goal, proto_nongoal, threshold
            )
            
            is_correct = predicted_label == "Non-Goal"
            results['nongoal_total'] += 1
            results['total'] += 1
            
            if is_correct:
                results['nongoal_correct'] += 1
                results['correct'] += 1
            
            result_entry = {
                'file': fname,
                'true_label': 'Non-Goal',
                'predicted_label': predicted_label,
                'correct': is_correct,
                'similarities': sim_scores
            }
            results['predictions'].append(result_entry)
            
            print(f"File: {fname}")
            print(f"  True: Non-Goal, Predicted: {predicted_label}")
            print(f"  Sim Goal: {sim_scores['sim_goal']:.4f}, Sim Non-Goal: {sim_scores['sim_nongoal']:.4f}")
            print(f"  Difference: {sim_scores['difference']:.4f}, Correct: {is_correct}")
    
    # Calculate metrics
    overall_accuracy = results['correct'] / results['total'] if results['total'] > 0 else 0
    goal_accuracy = results['goal_correct'] / results['goal_total'] if results['goal_total'] > 0 else 0
    nongoal_accuracy = results['nongoal_correct'] / results['nongoal_total'] if results['nongoal_total'] > 0 else 0
    
    print(f"\n{'='*50}")
    print("EVALUATION RESULTS")
    print(f"{'='*50}")
    print(f"Overall Accuracy: {overall_accuracy:.4f} ({results['correct']}/{results['total']})")
    print(f"Goal Accuracy: {goal_accuracy:.4f} ({results['goal_correct']}/{results['goal_total']})")
    print(f"Non-Goal Accuracy: {nongoal_accuracy:.4f} ({results['nongoal_correct']}/{results['nongoal_total']})")
    
    results['overall_accuracy'] = overall_accuracy
    results['goal_accuracy'] = goal_accuracy
    results['nongoal_accuracy'] = nongoal_accuracy
    
    return results

def evaluate_with_sklearn_metrics(goal_test_folder: str, 
                                 nongoal_test_folder: str,
                                 proto_goal: np.ndarray,
                                 proto_nongoal: np.ndarray,
                                 threshold: float = 0.0) -> dict:
    """
    Evaluate classification performance using sklearn metrics.
    """
    y_true = []
    y_pred = []
    
    # Test goal clips
    print("\nTesting goal clips...")
    for fname in os.listdir(goal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(goal_test_folder, fname))
            predicted_label, _ = classify_r2plus1d_clip(test_embedding, proto_goal, proto_nongoal, threshold)
            
            y_true.append(1)  # True label: Goal
            y_pred.append(1 if predicted_label == "Goal" else 0)
    
    # Test non-goal clips
    print("Testing non-goal clips...")
    for fname in os.listdir(nongoal_test_folder):
        if fname.endswith(".npy"):
            test_embedding = np.load(os.path.join(nongoal_test_folder, fname))
            predicted_label, _ = classify_r2plus1d_clip(test_embedding, proto_goal, proto_nongoal, threshold)
            
            y_true.append(0)  # True label: Non-Goal
            y_pred.append(1 if predicted_label == "Goal" else 0)
    
    # Compute metrics
    cm = confusion_matrix(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    
    print("\nConfusion Matrix:")
    print(cm)
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F₁ Score: {f1:.4f}")
    
    return {
        "confusion_matrix": cm,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

def split_embeddings_train_test(input_folder: str, train_folder: str, test_folder: str, 
                               train_ratio: float = 0.7):
    """
    Split embeddings into train and test sets.
    
    Args:
        input_folder: Folder containing all embeddings
        train_folder: Output folder for training embeddings
        test_folder: Output folder for test embeddings
        train_ratio: Ratio of data to use for training
    """
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    # Get all .npy files
    embedding_files = [f for f in os.listdir(input_folder) if f.endswith('.npy')]
    random.shuffle(embedding_files)
    
    # Split files
    split_idx = int(len(embedding_files) * train_ratio)
    train_files = embedding_files[:split_idx]
    test_files = embedding_files[split_idx:]
    
    # Copy files to respective folders
    for file in train_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(train_folder, file)
        shutil.copy2(src, dst)
    
    for file in test_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(test_folder, file)
        shutil.copy2(src, dst)
    
    print(f"Split {len(embedding_files)} files: {len(train_files)} train, {len(test_files)} test")

def main():
    """
    Main function to run R(2+1)D-based few-shot learning classification.
    """
    # Configuration
    base_path = "F:/AIM Lab/Experiment/Clips"
    
    # CORRECTED: Use the actual video folder paths with Train/Test subdirectories
    goal_train_videos = os.path.join(base_path, "Goal p1", "Train")
    goal_test_videos = os.path.join(base_path, "Goal p1", "Test")
    nongoal_train_videos = os.path.join(base_path, "NoGoal p1", "Train")
    nongoal_test_videos = os.path.join(base_path, "NoGoal p1", "Test")
    
    # R(2+1)D embedding folders
    r2plus1d_base_path = os.path.join(base_path, "R2Plus1D_Embeddings")
    goal_train_folder = os.path.join(r2plus1d_base_path, "Goal_Train")
    goal_test_folder = os.path.join(r2plus1d_base_path, "Goal_Test")
    nongoal_train_folder = os.path.join(r2plus1d_base_path, "NoGoal_Train")
    nongoal_test_folder = os.path.join(r2plus1d_base_path, "NoGoal_Test")
    
    # Initialize R(2+1)D feature extractor
    print("Initializing R(2+1)D feature extractor...")
    feature_extractor = R2Plus1DFeatureExtractor()
    
    # Step 1: Generate R(2+1)D embeddings for train and test videos separately
    print("\nStep 1: Generating R(2+1)D embeddings...")
    
    print("Processing goal training videos...")
    process_videos_to_r2plus1d_embeddings(goal_train_videos, goal_train_folder, feature_extractor)
    
    print("Processing goal test videos...")
    process_videos_to_r2plus1d_embeddings(goal_test_videos, goal_test_folder, feature_extractor)
    
    print("Processing non-goal training videos...")
    process_videos_to_r2plus1d_embeddings(nongoal_train_videos, nongoal_train_folder, feature_extractor)
    
    print("Processing non-goal test videos...")
    process_videos_to_r2plus1d_embeddings(nongoal_test_videos, nongoal_test_folder, feature_extractor)
    
    # Step 2: Build prototypes from training data
    print("\nStep 2: Building prototypes from training data...")
    try:
        proto_goal, proto_nongoal = build_r2plus1d_prototypes(goal_train_folder, nongoal_train_folder)
    except Exception as e:
        print(f"Error building prototypes: {e}")
        return
    
    # Step 3: Evaluate on test data with different thresholds
    print("\nStep 3: Evaluating classification performance...")
    thresholds = [0.0, 0.01, 0.02, 0.05, 0.1]
    
    best_accuracy = 0
    best_threshold = 0
    best_results = None
    
    for threshold in thresholds:
        print(f"\nTesting with threshold: {threshold}")
        results = evaluate_r2plus1d_classification(
            goal_test_folder, nongoal_test_folder, 
            proto_goal, proto_nongoal, threshold
        )
        
        if results['overall_accuracy'] > best_accuracy:
            best_accuracy = results['overall_accuracy']
            best_threshold = threshold
            best_results = results
    
    # Step 4: Detailed evaluation with sklearn metrics
    print(f"\nStep 4: Detailed evaluation with best threshold ({best_threshold})...")
    sklearn_metrics = evaluate_with_sklearn_metrics(
        goal_test_folder, nongoal_test_folder,
        proto_goal, proto_nongoal, best_threshold
    )
    
    print(f"\n{'='*50}")
    print("FINAL RESULTS")
    print(f"{'='*50}")
    print(f"Best Threshold: {best_threshold}")
    print(f"Best Accuracy: {best_accuracy:.4f}")
    print(f"Precision: {sklearn_metrics['precision']:.4f}")
    print(f"Recall: {sklearn_metrics['recall']:.4f}")
    print(f"F₁ Score: {sklearn_metrics['f1_score']:.4f}")

if __name__ == "__main__":
    main()

Initializing R(2+1)D feature extractor...





Step 1: Generating R(2+1)D embeddings...
Processing goal training videos...
Processing folder: F:/AIM Lab/Experiment/Clips\Goal p1\Train
Processing video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g1.mp4
Opening video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g1.mp4
Total frames in video: 175
Successfully sampled 32 frames from F:/AIM Lab/Experiment/Clips\Goal p1\Train\g1.mp4
Saved R(2+1)D embedding to F:/AIM Lab/Experiment/Clips\R2Plus1D_Embeddings\Goal_Train\g1.npy (shape: (512,))
Processing video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g10.mp4
Opening video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g10.mp4
Total frames in video: 175
Successfully sampled 32 frames from F:/AIM Lab/Experiment/Clips\Goal p1\Train\g10.mp4
Saved R(2+1)D embedding to F:/AIM Lab/Experiment/Clips\R2Plus1D_Embeddings\Goal_Train\g10.npy (shape: (512,))
Processing video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g11.mp4
Opening video: F:/AIM Lab/Experiment/Clips\Goal p1\Train\g11.mp4
Total frames in video