<a href="https://colab.research.google.com/github/blue-sketch/motifgnn-stylelink/blob/main/mgnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

# 1. Load Data
df = pd.read_csv('user_features.csv')

# 2. Extract and Normalize Features
# Drop user_id to get just the 387 features
feature_data = df.drop(columns=['user_id']).values
scaler = StandardScaler()
x_scaled = scaler.fit_transform(feature_data)
x_tensor = torch.tensor(x_scaled, dtype=torch.float32)

# 3. Define StyleLink Encoder (The "Vibe" Compressor)
# Input: 387 dims -> Output: 64 dims
class StyleLinkEncoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.BatchNorm1d(output_dim),  # Stabilizes values
            nn.ReLU()                    # Adds non-linearity
        )

    def forward(self, x):
        return self.encoder(x)

# 4. Generate the Vectors
# Initialize the layer (random weights for now, before training)
input_dim = x_tensor.shape[1]
model = StyleLinkEncoder(input_dim, 64)
model.eval() # Set to eval mode for inference

with torch.no_grad():
    vibe_vectors = model(x_tensor)

# 5. Show results
print(f"Input Shape: {x_tensor.shape}")
print(f"Output 'Vibe Vector' Shape: {vibe_vectors.shape}")
print("\nFirst User's Vibe Vector (First 10 values):")
print(vibe_vectors[0][:10])

# Create a dataframe for display
vibe_df = pd.DataFrame(vibe_vectors.numpy(), columns=[f'vibe_{i}' for i in range(64)])
vibe_df.insert(0, 'user_id', df['user_id'])
print("\nPreview of the 64-dimensional vectors:")
print(vibe_df.head())

Input Shape: torch.Size([388, 387])
Output 'Vibe Vector' Shape: torch.Size([388, 64])

First User's Vibe Vector (First 10 values):
tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6950, 1.0652, 0.2325,
        0.9271])

Preview of the 64-dimensional vectors:
   user_id    vibe_0    vibe_1    vibe_2    vibe_3    vibe_4    vibe_5  \
0       19  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1       58  0.000000  0.352296  0.000000  0.715390  0.822862  0.252960   
2       60  0.000000  0.000000  0.647893  1.498623  0.672642  0.000000   
3       76  0.313097  0.000000  0.561914  0.000000  0.076767  0.000000   
4      101  0.548518  0.079172  1.001337  0.000000  0.000000  0.909625   

     vibe_6    vibe_7    vibe_8  ...   vibe_54   vibe_55   vibe_56   vibe_57  \
0  0.694984  1.065150  0.232513  ...  0.000000  0.138096  0.839219  0.000000   
1  0.011015  1.045413  0.000000  ...  0.000000  0.000000  0.000000  0.495057   
2  0.000000  0.000000  0.000000  ...  0.000000 

In [None]:
import torch
import pandas as pd
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
import torch.nn as nn

print("üèóÔ∏è Building the VibeMatch Graph...")

# ==========================================
# STEP 1: RE-GENERATE VIBE VECTORS (64-DIM)
# ==========================================
# (We re-run this to ensure variables are fresh)
df_features = pd.read_csv('user_features.csv')
raw_x = df_features.drop(columns=['user_id', 'mapped_id'], errors='ignore').values

# Normalize
scaler = StandardScaler()
x_scaled = scaler.fit_transform(raw_x)
x_tensor = torch.tensor(x_scaled, dtype=torch.float32)

# Run StyleLink Encoder
class StyleLink(nn.Module):
    def __init__(self, input_dim, output_dim=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.BatchNorm1d(output_dim),
            nn.ReLU()
        )
    def forward(self, x):
        return self.encoder(x)

style_model = StyleLink(387, 64)
with torch.no_grad():
    vibe_vectors = style_model(x_tensor)

print(f"‚úÖ Nodes Ready: {vibe_vectors.shape[0]} Users with {vibe_vectors.shape[1]}-dim Vibe Vectors.")

# ==========================================
# STEP 2: MAP INTERACTIONS TO NODES
# ==========================================
df_interactions = pd.read_csv('interactions_final.csv')

# create map: {User_ID -> Index 0..N}
unique_users = df_features['user_id'].unique()
user_map = {uid: i for i, uid in enumerate(unique_users)}

# Filter & Map Edges
valid_edges = df_interactions[
    df_interactions['source'].isin(user_map) &
    df_interactions['target'].isin(user_map)
].copy()

source_idx = valid_edges['source'].map(user_map).values
target_idx = valid_edges['target'].map(user_map).values

edge_index = torch.tensor([source_idx, target_idx], dtype=torch.long)

print(f"‚úÖ Edges Ready: {edge_index.shape[1]} Interactions mapped.")

# ==========================================
# STEP 3: CREATE THE FINAL GRAPH OBJECT
# ==========================================
vibe_graph = Data(x=vibe_vectors, edge_index=edge_index)

print("\n=== üéØ VibeMatch Graph Object ===")
print(vibe_graph)
print("-------------------------------")
print(f"1. x (Features): {vibe_graph.x.shape}  <-- This is your StyleLink Output")
print(f"2. edge_index:   {vibe_graph.edge_index.shape}  <-- This is your MGNN Input")

üèóÔ∏è Building the VibeMatch Graph...
‚úÖ Nodes Ready: 388 Users with 64-dim Vibe Vectors.
‚úÖ Edges Ready: 1585 Interactions mapped.

=== üéØ VibeMatch Graph Object ===
Data(x=[388, 64], edge_index=[2, 1585])
-------------------------------
1. x (Features): torch.Size([388, 64])  <-- This is your StyleLink Output
2. edge_index:   torch.Size([2, 1585])  <-- This is your MGNN Input


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import to_undirected, negative_sampling, add_self_loops
from sklearn.metrics import roc_auc_score
import numpy as np

# ==========================================
# 1. SETUP & SAFETY CHECKS
# ==========================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üöÄ Setting up on {device}...")

# Check inputs from the previous step
if torch.isnan(vibe_graph.x).any():
    print("‚ö†Ô∏è Warning: NaNs found in Input Features! Fixing them...")
    vibe_graph.x = torch.nan_to_num(vibe_graph.x, nan=0.0)

# Move data to GPU
x = vibe_graph.x.to(device)
edge_index = vibe_graph.edge_index.to(device)

# ==========================================
# 2. GENERATE MOTIFS (Structure Views)
# ==========================================
def get_dual_motifs(edge_index, num_nodes):
    # Ensure every node has at least one connection (Self-loop) to prevent NaN in GCN
    edge_index_safe, _ = add_self_loops(edge_index, num_nodes=num_nodes)

    row, col = edge_index

    # Motif A: Reciprocal Matches (The "Dating" Signal)
    edge_set = set(zip(row.tolist(), col.tolist()))
    recip_edges = [[u, v] for u, v in edge_set if (v, u) in edge_set]

    if len(recip_edges) > 0:
        motif_recip = torch.tensor(recip_edges, dtype=torch.long).t().to(device)
    else:
        motif_recip = torch.empty((2, 0), dtype=torch.long).to(device)

    # Motif B: Clusters (The "Community" Signal)
    motif_cluster = to_undirected(edge_index_safe).to(device)

    return [motif_recip, motif_cluster]

# Split Train/Test
mask = torch.rand(edge_index.size(1)) < 0.85
train_edge_index = edge_index[:, mask]
test_edge_index = edge_index[:, ~mask]

# Generate Motifs for Training
train_motifs = get_dual_motifs(train_edge_index, num_nodes=x.size(0))

print(f"‚úÖ Motifs Generated: {train_motifs[0].size(1)} Matches, {train_motifs[1].size(1)} Cluster Edges.")

# ==========================================
# 3. DEFINE MGNN MODEL (NaN-Safe Version)
# ==========================================
class MotifConv(nn.Module):
    def __init__(self, in_channels, out_channels, num_motifs=2):
        super().__init__()
        self.convs = nn.ModuleList([GCNConv(in_channels, out_channels) for _ in range(num_motifs)])
        self.bn = nn.BatchNorm1d(out_channels) # <--- Critical for Stability

    def forward(self, x, motif_graphs):
        outs = []
        for i, conv in enumerate(self.convs):
            if i < len(motif_graphs) and motif_graphs[i].size(1) > 0:
                out = conv(x, motif_graphs[i])
                outs.append(out)
            else:
                outs.append(torch.zeros(x.size(0), conv.out_channels).to(x.device))

        # Sum outcomes
        out_sum = torch.stack(outs).sum(dim=0)
        return F.relu(self.bn(out_sum))

class VibeMatchMGNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super().__init__()
        # Note: Input is already 64-dim Vibe Vector from StyleLink
        self.mgnn1 = MotifConv(in_dim, hidden_dim)
        self.mgnn2 = MotifConv(hidden_dim, out_dim)

    def forward(self, x, motif_graphs):
        h = self.mgnn1(x, motif_graphs)
        h = self.mgnn2(h, motif_graphs)
        return h

# ==========================================
# 4. TRAINING LOOP
# ==========================================
model = VibeMatchMGNN(in_dim=64, hidden_dim=64, out_dim=32).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005) # Safe Learning Rate
criterion = torch.nn.BCEWithLogitsLoss()

print(f"\nüöÄ Training Started...")
print(f"{'Epoch':<10} | {'Loss':<10} | {'Test AUC':<10}")
print("-" * 40)

for epoch in range(201):
    model.train()
    optimizer.zero_grad()

    # 1. Forward Pass
    z = model(x, train_motifs)

    # 2. Loss Calculation
    pos_src, pos_dst = train_edge_index
    pos_scores = (z[pos_src] * z[pos_dst]).sum(dim=1)

    neg_src, neg_dst = negative_sampling(train_edge_index, num_nodes=x.size(0))
    neg_scores = (z[neg_src] * z[neg_dst]).sum(dim=1)

    # CLAMPING: Prevent Infinity/NaN in loss
    pos_scores = torch.clamp(pos_scores, -10, 10)
    neg_scores = torch.clamp(neg_scores, -10, 10)

    scores = torch.cat([pos_scores, neg_scores])
    labels = torch.cat([torch.ones_like(pos_scores), torch.zeros_like(neg_scores)])

    loss = criterion(scores, labels)

    if torch.isnan(loss):
        print("‚ö†Ô∏è Loss went NaN! Skipping step.")
        optimizer.zero_grad()
        continue

    loss.backward()

    # CLIPPING: Prevent Exploding Gradients
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

    optimizer.step()

    # 3. Evaluate
    if epoch % 20 == 0:
        model.eval()
        with torch.no_grad():
            z = model(x, train_motifs)

            src, dst = test_edge_index
            pos = (z[src] * z[dst]).sum(dim=1).cpu()
            n_src, n_dst = negative_sampling(test_edge_index, num_nodes=x.size(0))
            neg = (z[n_src] * z[n_dst]).sum(dim=1).cpu()

            # Safety check for Eval NaNs
            if torch.isnan(pos).any() or torch.isnan(neg).any():
                auc = 0.5 # Default fallback
            else:
                auc = roc_auc_score(
                    torch.cat([torch.ones_like(pos), torch.zeros_like(neg)]),
                    torch.cat([pos, neg])
                )

            print(f"{epoch:<10} | {loss.item():.4f}     | {auc:.4f}")

print("\n‚úÖ MGNN Model Trained Successfully.")

üöÄ Setting up on cuda...
‚úÖ Motifs Generated: 716 Matches, 2318 Cluster Edges.

üöÄ Training Started...
Epoch      | Loss       | Test AUC  
----------------------------------------
0          | 0.6931     | 0.5000
20         | 0.6931     | 0.5000
40         | 0.6931     | 0.5000
60         | 0.6931     | 0.5000
80         | 0.6931     | 0.5000
100        | 0.6931     | 0.5000
120        | 0.6931     | 0.5000
140        | 0.6931     | 0.5000
160        | 0.6931     | 0.5000
180        | 0.6931     | 0.5000
200        | 0.6931     | 0.5000

‚úÖ MGNN Model Trained Successfully.


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

# 1. Load Data
df = pd.read_csv('user_features.csv')
print(f"Data Loaded. Shape: {df.shape}")

# 2. Check Raw Features for constant columns (Variance = 0)
# Drop IDs first
raw_data = df.drop(columns=['user_id', 'mapped_id'], errors='ignore')
variances = raw_data.var()
zero_var_cols = variances[variances == 0].index.tolist()
print(f"Columns with Zero Variance: {len(zero_var_cols)}")
if len(zero_var_cols) > 0:
    print(f"Examples: {zero_var_cols[:5]}")
    # Fix: Drop them
    raw_data = raw_data.drop(columns=zero_var_cols)

# 3. Check for NaNs/Infs in Raw Data
print(f"NaNs in raw data: {raw_data.isna().sum().sum()}")
print(f"Infs in raw data: {np.isinf(raw_data).sum().sum()}")

# 4. Normalize
scaler = StandardScaler()
# Note: StandardScaler handles low variance by scaling to unit variance,
# but if var is exactly 0, it might produce 0/0 = NaN if not careful.
# Sklearn usually handles constant cols by setting them to 0, but let's verify.
x_scaled = scaler.fit_transform(raw_data.values)

print(f"Scaled Data Shape: {x_scaled.shape}")
print(f"NaNs in Scaled Data: {np.isnan(x_scaled).sum()}")
print(f"Infs in Scaled Data: {np.isinf(x_scaled).sum()}")

# 5. Run StyleLink (Encoder)
# Recreate the exact encoder logic
class StyleLink(nn.Module):
    def __init__(self, input_dim, output_dim=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.BatchNorm1d(output_dim),
            nn.ReLU()
        )
    def forward(self, x):
        return self.encoder(x)

input_dim = x_scaled.shape[1]
model = StyleLink(input_dim, 64)
# Initialize weights carefully (He initialization)
for m in model.modules():
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm1d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

x_tensor = torch.tensor(x_scaled, dtype=torch.float32)

model.eval()
with torch.no_grad():
    vibe_vectors = model(x_tensor)

print(f"Vibe Vectors NaNs: {torch.isnan(vibe_vectors).sum().item()}")
print(f"Vibe Vectors Max: {vibe_vectors.max().item()}")
print(f"Vibe Vectors Min: {vibe_vectors.min().item()}")
print(f"Vibe Vectors Mean: {vibe_vectors.mean().item()}")
print(f"Count of Exact Zeros: {(vibe_vectors == 0).sum().item()} / {vibe_vectors.numel()}")

# 6. Check if vectors are identical (collapsed mode)
# Calculate standard deviation across users for feature 0
print(f"Std Dev across users (Feature 0): {vibe_vectors[:, 0].std().item()}")

Data Loaded. Shape: (388, 388)
Columns with Zero Variance: 0
NaNs in raw data: 228
Infs in raw data: 0
Scaled Data Shape: (388, 387)
NaNs in Scaled Data: 228
Infs in Scaled Data: 0
Vibe Vectors NaNs: 64
Vibe Vectors Max: nan
Vibe Vectors Min: nan
Vibe Vectors Mean: nan
Count of Exact Zeros: 12385 / 24832
Std Dev across users (Feature 0): nan


In [None]:
# ==========================================
# FINAL "VIBEMATCH" REPAIR & TRAIN SCRIPT
# ==========================================
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.utils import to_undirected, negative_sampling, add_self_loops
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

print("üîß Starting Full Repair Pipeline...")

# ==========================================
# 1. LOAD & CLEAN DATA (The NaN Fix)
# ==========================================
df_features = pd.read_csv('user_features.csv')
print(f"   - Raw Data Shape: {df_features.shape}")

# Drop ID columns to isolate features
raw_x = df_features.drop(columns=['user_id', 'mapped_id'], errors='ignore')

# CRITICAL FIX: Fill NaNs with the column mean
# This prevents the "Input contains NaN" error
raw_x = raw_x.fillna(raw_x.mean())
print(f"   - NaNs fixed. Remaining NaNs: {raw_x.isna().sum().sum()}")

# Normalize (StandardScaler)
scaler = StandardScaler()
x_scaled = scaler.fit_transform(raw_x.values)
x_tensor = torch.tensor(x_scaled, dtype=torch.float32)

# ==========================================
# 2. RUN STYLELINK (Generate Vibe Vectors)
# ==========================================
class StyleLink(nn.Module):
    def __init__(self, input_dim, output_dim=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.BatchNorm1d(output_dim), # Stabilizes inputs
            nn.ReLU()
        )
    def forward(self, x):
        return self.encoder(x)

# Initialize and run StyleLink
style_model = StyleLink(input_dim=x_tensor.shape[1], output_dim=64)
style_model.eval() # Inference mode
with torch.no_grad():
    vibe_vectors = style_model(x_tensor)

print(f"‚úÖ Vibe Vectors Generated. Shape: {vibe_vectors.shape}")
print(f"   - Vibe NaNs: {torch.isnan(vibe_vectors).sum().item()} (Should be 0)")

# ==========================================
# 3. BUILD GRAPH (Map Interactions)
# ==========================================
df_interactions = pd.read_csv('interactions_final.csv')

# Create Mapping: User ID -> Index 0..N
unique_users = df_features['user_id'].unique()
user_map = {uid: i for i, uid in enumerate(unique_users)}
num_nodes = len(unique_users)

# Filter edges
valid_edges = df_interactions[
    df_interactions['source'].isin(user_map) &
    df_interactions['target'].isin(user_map)
].copy()

src = valid_edges['source'].map(user_map).values
dst = valid_edges['target'].map(user_map).values
edge_index = torch.tensor([src, dst], dtype=torch.long)

print(f"‚úÖ Graph Built: {num_nodes} Nodes, {edge_index.shape[1]} Edges.")

# ==========================================
# 4. DEFINE MGNN MODEL (No ReLU Trap)
# ==========================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class MotifConv(nn.Module):
    def __init__(self, in_channels, out_channels, num_motifs=2, act=True):
        super().__init__()
        self.convs = nn.ModuleList([GCNConv(in_channels, out_channels) for _ in range(num_motifs)])
        self.bn = nn.BatchNorm1d(out_channels)
        self.act = act # Toggle activation

    def forward(self, x, motif_graphs):
        outs = []
        for i, conv in enumerate(self.convs):
            if i < len(motif_graphs) and motif_graphs[i].size(1) > 0:
                out = conv(x, motif_graphs[i])
                outs.append(out)
            else:
                outs.append(torch.zeros(x.size(0), conv.out_channels).to(x.device))

        out_sum = torch.stack(outs).sum(dim=0)
        out = self.bn(out_sum)

        # CRITICAL FIX: Only apply ReLU if act=True
        # This prevents the "ReLU Trap" on the final layer
        if self.act:
            return F.relu(out)
        return out

class VibeMatchMGNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super().__init__()
        # Layer 1 (Hidden): ReLU is GOOD
        self.mgnn1 = MotifConv(in_dim, hidden_dim, act=True)
        # Layer 2 (Output): ReLU is BAD (Removed)
        self.mgnn2 = MotifConv(hidden_dim, out_dim, act=False)

    def forward(self, x, motif_graphs):
        h = self.mgnn1(x, motif_graphs)
        h = self.mgnn2(h, motif_graphs)
        return h

# ==========================================
# 5. TRAIN LOOP
# ==========================================
# Prepare Data
x = vibe_vectors.to(device)
edge_index = edge_index.to(device)

# Generate Motifs
def get_dual_motifs(edge_index, num_nodes):
    edge_index_safe, _ = add_self_loops(edge_index, num_nodes=num_nodes)
    row, col = edge_index

    # Motif 1: Reciprocal
    edge_set = set(zip(row.tolist(), col.tolist()))
    recip_edges = [[u, v] for u, v in edge_set if (v, u) in edge_set]
    motif_recip = torch.tensor(recip_edges, dtype=torch.long).t().to(device) if recip_edges else torch.empty((2, 0), dtype=torch.long).to(device)

    # Motif 2: Cluster
    motif_cluster = to_undirected(edge_index_safe).to(device)
    return [motif_recip, motif_cluster]

# Split & Motifs
mask = torch.rand(edge_index.size(1)) < 0.85
train_edge_index = edge_index[:, mask]
test_edge_index = edge_index[:, ~mask]
train_motifs = get_dual_motifs(train_edge_index, num_nodes)

# Initialize Model
model = VibeMatchMGNN(in_dim=64, hidden_dim=64, out_dim=32).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
criterion = torch.nn.BCEWithLogitsLoss()

print(f"\nüöÄ Training Started on {device}...")
print(f"{'Epoch':<10} | {'Loss':<10} | {'Test AUC':<10}")
print("-" * 40)

for epoch in range(301):
    model.train()
    optimizer.zero_grad()

    z = model(x, train_motifs)

    # Loss
    pos_src, pos_dst = train_edge_index
    neg_src, neg_dst = negative_sampling(train_edge_index, num_nodes=num_nodes)

    pos_scores = (z[pos_src] * z[pos_dst]).sum(dim=1)
    neg_scores = (z[neg_src] * z[neg_dst]).sum(dim=1)

    # Clamp for safety
    pos_scores = torch.clamp(pos_scores, -10, 10)
    neg_scores = torch.clamp(neg_scores, -10, 10)

    loss = criterion(torch.cat([pos_scores, neg_scores]),
                     torch.cat([torch.ones_like(pos_scores), torch.zeros_like(neg_scores)]))

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()

    # Eval
    if epoch % 50 == 0:
        model.eval()
        with torch.no_grad():
            z = model(x, train_motifs)
            src, dst = test_edge_index
            pos = (z[src] * z[dst]).sum(dim=1).cpu()
            n_src, n_dst = negative_sampling(test_edge_index, num_nodes=num_nodes)
            neg = (z[n_src] * z[n_dst]).sum(dim=1).cpu()

            auc = roc_auc_score(torch.cat([torch.ones_like(pos), torch.zeros_like(neg)]),
                                torch.cat([pos, neg]))
            print(f"{epoch:<10} | {loss.item():.4f}     | {auc:.4f}")

print("\n‚úÖ DONE! If AUC > 0.5, your VibeMatch Engine is alive!")

üîß Starting Full Repair Pipeline...
   - Raw Data Shape: (388, 388)
   - NaNs fixed. Remaining NaNs: 0
‚úÖ Vibe Vectors Generated. Shape: torch.Size([388, 64])
   - Vibe NaNs: 0 (Should be 0)
‚úÖ Graph Built: 388 Nodes, 1585 Edges.

üöÄ Training Started on cuda...
Epoch      | Loss       | Test AUC  
----------------------------------------
0          | 1.3155     | 0.6730
50         | 0.5605     | 0.8133
100        | 0.5002     | 0.8720
150        | 0.4710     | 0.8873
200        | 0.4347     | 0.8684
250        | 0.4275     | 0.8636
300        | 0.4275     | 0.8763

‚úÖ DONE! If AUC > 0.5, your VibeMatch Engine is alive!


In [None]:
# ==========================================
# 6. VIBEMATCH INFERENCE ENGINE
# ==========================================
import torch

# 1. Select a Test User (Pick any ID from your data)
# Let's pick the first user in the list
test_user_idx = 0
real_user_id = unique_users[test_user_idx]

print(f"üíò Generating Matches for User ID: {real_user_id}")

# 2. Get Final Embeddings from the Trained Model
model.eval()
with torch.no_grad():
    # Pass the features and graph through the trained MGNN
    embeddings = model(x, train_motifs)

# 3. Calculate "Compatibility Scores"
# (Dot product of Test User vs Everyone Else)
user_embedding = embeddings[test_user_idx]
scores = (embeddings @ user_embedding).cpu().numpy()

# 4. Filter Recommendations
# We don't want to recommend people they already swiped on!
existing_edges = edge_index[:, edge_index[0] == test_user_idx][1].cpu().numpy()
candidates = []

for other_idx in range(num_nodes):
    # Skip self and existing connections
    if other_idx == test_user_idx or other_idx in existing_edges:
        continue

    candidates.append((other_idx, scores[other_idx]))

# 5. Sort by Vibe Score (Highest First)
candidates.sort(key=lambda x: x[1], reverse=True)
top_matches = candidates[:5]

# 6. Display Results
print(f"\nüèÜ Top 5 Recommendations for User {real_user_id}:")
print(f"{'Rank':<5} | {'User ID':<10} | {'Vibe Score':<10}")
print("-" * 35)

for rank, (idx, score) in enumerate(top_matches, 1):
    match_id = unique_users[idx]
    print(f"#{rank:<4} | {match_id:<10} | {score:.4f}")

print("\n(Note: A high positive score means a strong predicted match!)")

üíò Generating Matches for User ID: 19

üèÜ Top 5 Recommendations for User 19:
Rank  | User ID    | Vibe Score
-----------------------------------
#1    | 6771       | 2.3066
#2    | 483        | 2.2237
#3    | 12766      | 2.1783
#4    | 1886       | 2.0066
#5    | 13222      | 1.8476

(Note: A high positive score means a strong predicted match!)


In [None]:
import torch
import pandas as pd
from google.colab import files

print("üíæ Saving VibeMatch Artifacts...")

# 1. Save the Trained Model (The Brain)
torch.save(model.state_dict(), 'vibematch_model.pth')
print("   - Model saved as 'vibematch_model.pth'")

# 2. Save the User Embeddings (The Data)
# We run the model one last time to get the freshest vectors
model.eval()
with torch.no_grad():
    final_embeddings = model(x, train_motifs)
torch.save(final_embeddings, 'vibe_embeddings.pt')
print("   - Embeddings saved as 'vibe_embeddings.pt'")

# 3. Save the Top Recommendations to CSV
# (Assuming 'candidates' list exists from the previous step)
rec_df = pd.DataFrame(candidates, columns=['user_idx', 'score'])
rec_df['user_id'] = rec_df['user_idx'].map(lambda i: unique_users[i])
rec_df = rec_df[['user_id', 'score']].head(50) # Top 50 matches
rec_df.to_csv('top_recommendations.csv', index=False)
print("   - Top 50 matches saved as 'top_recommendations.csv'")

# 4. Trigger Download (for Colab)
print("\n‚¨áÔ∏è Downloading files to your computer...")
files.download('vibematch_model.pth')
files.download('vibe_embeddings.pt')
files.download('top_recommendations.csv')

üíæ Saving VibeMatch Artifacts...
   - Model saved as 'vibematch_model.pth'
   - Embeddings saved as 'vibe_embeddings.pt'
   - Top 50 matches saved as 'top_recommendations.csv'

‚¨áÔ∏è Downloading files to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>