## Fusion Model Implementation

### Loading Models and Setup Environment

## 

In [143]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split

from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse



import pandas as pd
import numpy as np


### Cross-Attention Block

In [144]:
class CrossAttentionBlock(nn.Module):
    def __init__(self, embed_dim, num_heads=4, dropout=0.1):
        super(CrossAttentionBlock, self).__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout, batch_first=True)
        self.norm = nn.LayerNorm(embed_dim)
        self.ff = nn.Sequential(
            nn.Linear(embed_dim, embed_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(embed_dim, embed_dim)
        )
        self.ff_norm = nn.LayerNorm(embed_dim)

    def forward(self, query, key_value):
        attn_output, _ = self.attn(query, key_value, key_value)
        out = self.norm(query + attn_output)
        out_ff = self.ff_norm(out + self.ff(out))
        return out_ff


### Adaptive Modality Selector

In [145]:

class AdaptiveSelector(nn.Module):
    def __init__(self, embed_dim, num_modalities=3):
        super(AdaptiveSelector, self).__init__()
        self.fc = nn.Linear(embed_dim * num_modalities, num_modalities)

    def forward(self, embeddings):
        combined = torch.cat(embeddings, dim=-1)
        weights = F.softmax(self.fc(combined), dim=-1)
        return weights



### Fusion Model

In [146]:

class FusionModel(nn.Module):
    def __init__(self, embed_dim, num_heads=4, num_slots=24):
        super(FusionModel, self).__init__()
        self.user_content_attn = CrossAttentionBlock(embed_dim, num_heads)
        self.user_context_attn = CrossAttentionBlock(embed_dim, num_heads)
        self.content_context_attn = CrossAttentionBlock(embed_dim, num_heads)
        self.selector = AdaptiveSelector(embed_dim, num_modalities=3)
        self.fc_out = nn.Sequential(
            nn.Linear(embed_dim, embed_dim),
            nn.ReLU(),
            nn.Linear(embed_dim, num_slots)
        )

    def forward(self, user_emb, content_emb, context_emb):
        user = user_emb.unsqueeze(1)
        content = content_emb.unsqueeze(1)
        context = context_emb.unsqueeze(1)

        user_refined = self.user_content_attn(user, content) + self.user_context_attn(user, context)
        content_refined = self.user_content_attn(content, user) + self.content_context_attn(content, context)
        context_refined = self.user_context_attn(context, user) + self.content_context_attn(context, content)

        user_refined = user_refined.squeeze(1)
        content_refined = content_refined.squeeze(1)
        context_refined = context_refined.squeeze(1)

        weights = self.selector([user_refined, content_refined, context_refined])
        fused_emb = (
            weights[:, 0:1] * user_refined +
            weights[:, 1:2] * content_refined +
            weights[:, 2:3] * context_refined
        )

        slot_scores = self.fc_out(fused_emb)
        heatmap = torch.sigmoid(slot_scores)
        return heatmap


### Custom Dataset

In [147]:
class FusionDataset(Dataset):
    def __init__(self, user_file,video_file, metadata_file):
        # Load input embeddings
        df_input_vid = pd.read_csv(video_file)
        df_input_user = pd.read_csv(user_file)
        self.user_embeddings = df_input_user.iloc[:, 1:].values.astype(np.float32)
        self.video_embeddings = df_input_vid.iloc[:,:384].values.astype(np.float32)
        self.slot_ids = df_input_vid["slot_id"].values.astype(np.int64)

        # Load metadata embeddings (fixed per row)
        df_metadata = pd.read_csv(metadata_file)
        self.metadata_embeddings = df_metadata.iloc[:, 1:].values.astype(np.float32)

        # Ensure alignment (row i in metadata corresponds to row i in input)
        assert self.metadata_embeddings.shape[0] == len(self.user_embeddings), "Metadata and input row count mismatch"

    def __len__(self):
        return len(self.user_embeddings)

    def __getitem__(self, idx):
        user_emb = torch.tensor(self.user_embeddings[idx])
        video_emb = torch.tensor(self.video_embeddings[idx])
        metadata_emb = torch.tensor(self.metadata_embeddings[idx])
        slot_id = torch.tensor(self.slot_ids[idx])
        return user_emb, video_emb, metadata_emb, slot_id


In [148]:
# df_meta = pd.read_csv('metadata_embeddings.csv')
# df_meta

In [149]:
# # Preprocessing the csv file

# import pandas as pd
# import ast

# # Load the CSV
# df = pd.read_csv("channel_embedding_results.csv")

# # Convert the string list in embedding_response back to a Python list
# df["embedding_response"] = df["embedding_response"].apply(ast.literal_eval)

# # Expand the list into separate columns
# embeddings_df = pd.DataFrame(df["embedding_response"].tolist())

# # Rename columns as embedding_0, embedding_1, ...
# embeddings_df = embeddings_df.add_prefix("embedding_")

# # # Concatenate channel_id with the expanded embeddings
# # final_df = pd.concat([df["channel_id"], embeddings_df], axis=1)

# # Save to new CSV
# embeddings_df.to_csv("channel_embeddings_expanded.csv", index=False)

# print("Expanded CSV saved as channel_embeddings_expanded.csv")


In [150]:
df  = pd.read_csv('channel_embeddings_expanded.csv')



row_to_duplicate = df.iloc[[0]]   # keep as DataFrame

# Duplicate it 100 times
duplicated_rows = pd.concat([row_to_duplicate] * 100, ignore_index=True)

# Append duplicated rows back to original DataFrame
df_extended = pd.concat([df, duplicated_rows], ignore_index=True)


In [151]:
df_vids_emb = df_extended.copy()
df_vids_emb['slot_id'] = 45
df_vids_emb

Unnamed: 0,channel_id,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_375,embedding_376,embedding_377,embedding_378,embedding_379,embedding_380,embedding_381,embedding_382,embedding_383,slot_id
0,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
1,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
2,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
3,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
4,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
97,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
98,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45
99,UCAo_wAxH1WT6rFmK5yCd1Cg,0.047816,0.066386,-0.03279,0.017043,-0.019379,0.012575,0.140922,0.0582,0.011558,...,0.014132,-0.038559,0.04619,-0.046468,0.093365,0.088916,-0.026842,-0.068403,0.104807,45


In [152]:
# df_vids_emb.to_csv("vid_embs_expanded.csv", index=False)
df_extended.to_csv("user_embs_expanded.csv", index=False)

In [153]:


# ---------------------------
# Hyperparameters
# ---------------------------
BATCH_SIZE = 128
NUM_EPOCHS = 10
LEARNING_RATE = 1e-4
EMBED_DIM = 384
NUM_HEADS = 4
NUM_SLOTS = 168


In [154]:

# ---------------------------
# Load Dataset
# ---------------------------
dataset = FusionDataset("user_embs_expanded.csv", "vid_embs_expanded.csv","metadata_embeddings_expanded.csv")

# Split dataset (80/10/10)
train_idx, temp_idx = train_test_split(np.arange(len(dataset)), test_size=0.2, random_state=42)
val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=42)

train_subset = torch.utils.data.Subset(dataset, train_idx)
val_subset = torch.utils.data.Subset(dataset, val_idx)
test_subset = torch.utils.data.Subset(dataset, test_idx)

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_subset, batch_size=BATCH_SIZE, shuffle=False)


AssertionError: Metadata and input row count mismatch

In [None]:

# ---------------------------
# Initialize Model
# ---------------------------

model = FusionModel(embed_dim=EMBED_DIM, num_heads=NUM_HEADS, num_slots=NUM_SLOTS)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# ---------------------------
# Loss and Optimizer
# ---------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ---------------------------
# Training Loop
# ---------------------------
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0
    for user_emb, video_emb, metadata_emb, slot_id in train_loader:
        user_emb, video_emb, metadata_emb, slot_id = user_emb.to(device), video_emb.to(device), metadata_emb.to(device), slot_id.to(device)

        optimizer.zero_grad()
        slot_scores = model(user_emb, video_emb, metadata_emb)
        loss = criterion(slot_scores, slot_id)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation
    model.eval()
    val_loss, top1_correct, top3_correct = 0, 0, 0
    with torch.no_grad():
        for user_emb, video_emb, metadata_emb, slot_id in val_loader:
            user_emb, video_emb, metadata_emb, slot_id = user_emb.to(device), video_emb.to(device), metadata_emb.to(device), slot_id.to(device)
            slot_scores = model(user_emb, video_emb, metadata_emb)
            val_loss += criterion(slot_scores, slot_id).item()

            probs = torch.softmax(slot_scores, dim=-1)
            top1_correct += (probs.argmax(dim=-1) == slot_id).sum().item()
            top3_correct += (torch.topk(probs, k=3, dim=-1).indices == slot_id.unsqueeze(1)).any(dim=1).sum().item()

    val_loss /= len(val_loader)
    top1_acc = top1_correct / len(val_subset)
    top3_acc = top3_correct / len(val_subset)
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {running_loss/len(train_loader):.4f} | Val Loss: {val_loss:.4f} | Top1 Acc: {top1_acc:.4f} | Top3 Acc: {top3_acc:.4f}")


In [None]:
torch.save(model.state_dict(), "fusion_model.pth")
print("Model saved to fusion_model.pth")


Model saved to fusion_model.pth


In [None]:
import mlflow

In [None]:
mlflow.set_experiment("FusionModelExperiment_3")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

with mlflow.start_run(run_name="FusionModelRun_3"):
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("num_epochs", NUM_EPOCHS)
    mlflow.log_param("learning_rate", LEARNING_RATE)
    mlflow.log_param("embed_dim", EMBED_DIM)
    mlflow.log_param("num_heads", NUM_HEADS)
    mlflow.log_param("num_slots", NUM_SLOTS)

    mlflow.pytorch.log_model(model, "fusion_model")

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss = 0
        for user_emb, video_emb, metadata_emb, slot_id in train_loader:
            user_emb, video_emb, metadata_emb, slot_id = user_emb.to(device), video_emb.to(device), metadata_emb.to(device), slot_id.to(device)

            optimizer.zero_grad()
            slot_scores = model(user_emb, video_emb, metadata_emb)
            loss = criterion(slot_scores, slot_id)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation
        model.eval()
        val_loss, top1_correct, top3_correct = 0, 0, 0
        with torch.no_grad():
            for user_emb, video_emb, metadata_emb, slot_id in val_loader:
                user_emb, video_emb, metadata_emb, slot_id = user_emb.to(device), video_emb.to(device), metadata_emb.to(device), slot_id.to(device)
                slot_scores = model(user_emb, video_emb, metadata_emb)
                val_loss += criterion(slot_scores, slot_id).item()

                probs = torch.softmax(slot_scores, dim=-1)
                top1_correct += (probs.argmax(dim=-1) == slot_id).sum().item()
                top3_correct += (torch.topk(probs, k=3, dim=-1).indices == slot_id.unsqueeze(1)).any(dim=1).sum().item()

        val_loss /= len(val_loader)
        top1_acc = top1_correct / len(val_subset)
        top3_acc = top3_correct / len(val_subset)

        mlflow.log_metric("train_loss", running_loss/len(train_loader), step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("top1_acc", top1_acc, step=epoch)
        mlflow.log_metric("top3_acc", top3_acc, step=epoch)

        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {running_loss/len(train_loader):.4f} | Val Loss: {val_loss:.4f} | Top1 Acc: {top1_acc:.4f} | Top3 Acc: {top3_acc:.4f}")

2025/09/30 14:41:55 INFO mlflow.tracking.fluent: Experiment with name 'FusionModelExperiment_2' does not exist. Creating a new experiment.


🏃 View run FusionModelRun_2 at: http://127.0.0.1:5000/#/experiments/870371538631987082/runs/dfebb4ef5f5f4d01975f28b175bc50de
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/870371538631987082


NameError: name 'model' is not defined