# Set the Working Directory

In [None]:
import os

working_dir = os.getcwd()

if os.path.isdir(working_dir):
    print("Working directory is ready!")
else:
    raise ValueError("Working directory does not exist")

# Set the Trace Directory

In [None]:
import yaml

# Load configuration file
config_path = os.path.join(working_dir, "configuration.yaml")
with open(config_path, "r") as f:
    config = yaml.safe_load(f)

# Resolve trace directory
trace_dir = os.path.join(working_dir, "data", config["working_trace"])

if os.path.isdir(trace_dir):
    print(f"‚úÖ Trace directory ready: {trace_dir}")
else:
    raise FileNotFoundError(f"‚ùå Trace directory not found: {trace_dir}\n")

In [None]:
from utils import read_file_ids

# Read file IDs from the trace directory
file_ids = read_file_ids(trace_dir=trace_dir, overwrite=False)

print(f"üìÅ Total video files found: {len(file_ids)}")

# Change to the `ImageBind` Directory

In [None]:
%cd {working_dir}/models/ImageBind

# Generate Video Visual/Audio Embeddings

In [None]:
import numpy as np
import torch

from imagebind import data
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType

# Select computation device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"‚úÖ Using device: {device}")

# Load the pretrained model
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)

print("‚úÖ Model initialized successfully.")

## Generate Video Visual Embeddings

In [None]:
feature_dir = os.path.join(trace_dir, "features", "video_visual_embedding")
os.makedirs(feature_dir, exist_ok=True)

for idx, file_id in enumerate(file_ids, start=1):
    output_file = os.path.join(feature_dir, f"{file_id}.npy")
    
    if os.path.exists(output_file):
        print(f"‚úÖ Embedding already exists: {file_id}.npy")
        continue

    video_path = os.path.join(trace_dir, "videos", f"{file_id}.mp4")
    print(f"üõ†Ô∏è [{idx}/{len(file_ids)}] Generating: {file_id}.npy")

    # Load and transform video
    inputs = {
        ModalityType.VISION: data.load_and_transform_video_data([video_path], device)
    }

    # Generate embeddings
    with torch.no_grad():
        embedding = model(inputs)

    # Extract and save embeddings
    video_embedding = embedding[ModalityType.VISION].detach().cpu().numpy()
    with open(output_file, "wb") as f:
        np.save(f, video_embedding)

    print(f"üì¶ Saved embedding: {file_id}.npy (shape: {video_embedding.shape})")

## Generate Video Audio Embeddings

In [None]:
feature_dir = os.path.join(trace_dir, "features", "video_audio_embedding")
os.makedirs(feature_dir, exist_ok=True)

for idx, file_id in enumerate(file_ids, start=1):
    output_file = os.path.join(feature_dir, f"{file_id}.npy")
    
    if os.path.exists(output_file):
        print(f"‚úÖ Embedding already exists: {file_id}.npy")
        continue

    audio_path = os.path.join(trace_dir, "audios", f"{file_id}.mp3")
    print(f"üõ†Ô∏è [{idx}/{len(file_ids)}] Generating: {file_id}.npy")

    # Load and transform audio
    inputs = {
        ModalityType.AUDIO: data.load_and_transform_audio_data([audio_path], device)
    }

    # Generate embeddings
    with torch.no_grad():
        embedding = model(inputs)

    # Extract and save embeddings
    audio_embedding = embedding[ModalityType.AUDIO].detach().cpu().numpy()
    with open(output_file, "wb") as f:
        np.save(f, audio_embedding)

    print(f"üì¶ Saved embedding: {file_id}.npy (shape: {audio_embedding.shape})")