In [2]:
# !pip install torch

In [None]:
from torch_geometric.data import Data
import numpy as np
import pandas as pd
import torch
import os

In [4]:
output_dir = '/Users/ayeshamendoza/repos/fashion-recommender/data/output'
image_dir = '/Users/ayeshamendoza/repos/fashion-recommender/data/images/zara'

In [None]:


# Load CSV
clip_df = pd.read_csv(os.path.join(output_dir,"mvp_clip_image_embeddings.csv")) 
clip_embeddings = clip_df.drop(columns='filename').values  # convert to numpy array
print(clip_embeddings.shape)



(52, 512)


✅ 2. Normalize for cosine similarity / FAISS

In [None]:
from retrieval import normalize, fusion_retrieve, faiss_retrieve

clip_embeddings_norm = normalize(clip_embeddings)



✅ 3. Run Fusion Recommender

Load graph data and get `item_nodes`

In [23]:


graph_path = os.path.join(output_dir,"mvp_fashion_graph_data.pt")
data = torch.load(graph_path, weights_only=False)

# 📊 Inspect
print(data)
print(f"Node feature shape: {data.x.shape}")
print(f"Edge index shape: {data.edge_index.shape}")

print("Node type tensor shape:", data.node_type.shape)
print("Node types:", torch.unique(data.node_type, return_counts=True))


Data(x=[143, 512], edge_index=[2, 566], num_nodes=143, num_item_nodes=52, num_attr_nodes=91, node_type=[143])
Node feature shape: torch.Size([143, 512])
Edge index shape: torch.Size([2, 566])
Node type tensor shape: torch.Size([143])
Node types: (tensor([0, 1]), tensor([52, 91]))


In [29]:
item_node_indices = (data.node_type == 0).nonzero(as_tuple=True)[0]
item_node_indices = item_node_indices.tolist()

In [31]:
gcn_embeddings = torch.load(os.path.join(output_dir, "node_logits.pt")).detach().cpu()
gcn_item_embeddings = gcn_embeddings[item_node_indices]

# Convert to numpy if needed
gcn_item_embeddings = gcn_item_embeddings.numpy()
gcn_embeddings_norm = normalize(gcn_item_embeddings)

In [32]:
query_idx = 5
query_clip = clip_embeddings_norm[query_idx]
query_gcn = gcn_embeddings_norm[query_idx]

top_k = fusion_retrieve(
    query_clip, query_gcn,
    clip_embeddings_norm, gcn_embeddings_norm,
    item_ids=list(range(len(clip_embeddings))),
    k=5, alpha=0.7
)

print(top_k)

[(5, 1.000000011920929), (51, 0.9285026898439506), (35, 0.9218568474818171), (27, 0.918270451725304), (33, 0.9150771820937609)]


### Save Top-K Results

In [33]:
import pandas as pd

df_top_k = pd.DataFrame(top_k, columns=["item_index", "fusion_score"])
df_top_k.to_csv(os.path.join(output_dir, "top_k_fusion_results.csv"), index=False)


In [34]:
df_top_k

Unnamed: 0,item_index,fusion_score
0,5,1.0
1,51,0.928503
2,35,0.921857
3,27,0.91827
4,33,0.915077


In [52]:
import pickle

# Load the mapping
with open(os.path.join(output_dir, "item_to_idx.pkl"), "rb") as f:
    item_to_idx = pickle.load(f)

# Reverse it: idx → item
idx_to_item = {v: k for k, v in item_to_idx.items()}
sorted_items = [idx_to_item[i] for i in sorted(idx_to_item)]

# Save to CSV
pd.DataFrame({"filename": sorted_items}).to_csv(os.path.join(output_dir,"item_names.csv"), index=False)


In [53]:
sorted_items

['zara_01.jpg',
 'zara_02.jpg',
 'zara_03.jpg',
 'zara_04.jpg',
 'zara_05.jpg',
 'zara_06.jpg',
 'zara_07.jpg',
 'zara_08.jpg',
 'zara_09.jpg',
 'zara_10.jpg',
 'zara_11.jpg',
 'zara_12.jpg',
 'zara_13.jpg',
 'zara_14.jpg',
 'zara_15.jpg',
 'zara_16.jpg',
 'zara_17.jpg',
 'zara_18.jpg',
 'zara_19.jpg',
 'zara_20.jpg',
 'zara_21.jpg',
 'zara_22.jpg',
 'zara_23.jpg',
 'zara_24.jpg',
 'zara_25.jpg',
 'zara_26.jpg',
 'zara_27.jpg',
 'zara_28.jpg',
 'zara_29.jpg',
 'zara_30.jpg',
 'zara_31.jpg',
 'zara_32.jpg',
 'zara_33.jpg',
 'zara_34.jpg',
 'zara_35.jpg',
 'zara_36.jpg',
 'zara_37.jpg',
 'zara_38.jpg',
 'zara_39.jpg',
 'zara_40.jpg',
 'zara_41.jpg',
 'zara_42.jpg',
 'zara_43.jpg',
 'zara_44.jpg',
 'zara_45.jpg',
 'zara_46.jpg',
 'zara_47.jpg',
 'zara_48.jpg',
 'zara_49.jpg',
 'zara_50.jpg',
 'zara_51.jpg',
 'zara_53.jpg']

In [56]:
items_df = pd.read_csv(os.path.join(output_dir,"item_names.csv"))
list(items_df['filename'].values)

['zara_01.jpg',
 'zara_02.jpg',
 'zara_03.jpg',
 'zara_04.jpg',
 'zara_05.jpg',
 'zara_06.jpg',
 'zara_07.jpg',
 'zara_08.jpg',
 'zara_09.jpg',
 'zara_10.jpg',
 'zara_11.jpg',
 'zara_12.jpg',
 'zara_13.jpg',
 'zara_14.jpg',
 'zara_15.jpg',
 'zara_16.jpg',
 'zara_17.jpg',
 'zara_18.jpg',
 'zara_19.jpg',
 'zara_20.jpg',
 'zara_21.jpg',
 'zara_22.jpg',
 'zara_23.jpg',
 'zara_24.jpg',
 'zara_25.jpg',
 'zara_26.jpg',
 'zara_27.jpg',
 'zara_28.jpg',
 'zara_29.jpg',
 'zara_30.jpg',
 'zara_31.jpg',
 'zara_32.jpg',
 'zara_33.jpg',
 'zara_34.jpg',
 'zara_35.jpg',
 'zara_36.jpg',
 'zara_37.jpg',
 'zara_38.jpg',
 'zara_39.jpg',
 'zara_40.jpg',
 'zara_41.jpg',
 'zara_42.jpg',
 'zara_43.jpg',
 'zara_44.jpg',
 'zara_45.jpg',
 'zara_46.jpg',
 'zara_47.jpg',
 'zara_48.jpg',
 'zara_49.jpg',
 'zara_50.jpg',
 'zara_51.jpg',
 'zara_53.jpg']

In [38]:
df_top_k["item_name"] = [idx_to_item[img_idx] for img_idx in df_top_k["item_index"]]


### Sample `streamlit` visualization code
```
st.image("images/" + item_name)
st.text(f"Score: {score}")
```

FAISS Retrieval 

- Normalize CLIP Embeddings

In [41]:
# clip_embeddings_norm

from retrieval import faiss_retrieve

query_clip = clip_embeddings_norm[query_idx]
top_k_faiss = faiss_retrieve(query_clip, clip_embeddings_norm, list(range(len(clip_embeddings_norm))), k=5)

df_faiss = pd.DataFrame(top_k_faiss, columns=["item_index", "faiss_score"])
df_faiss.to_csv(os.path.join(output_dir,"top_k_faiss_results.csv"), index=False)

In [45]:
import streamlit as st

query_idx = st.slider("Select an item index", 0, len(clip_embeddings)-1, 5)
alpha = st.slider("Blend weight (visual vs graph)", 0.0, 1.0, 0.7)

query_clip = clip_embeddings_norm[query_idx]
query_gcn  = gcn_embeddings_norm[query_idx]

top_k = fusion_retrieve(query_clip, query_gcn, clip_embeddings_norm, gcn_embeddings_norm, item_ids, k=5, alpha=alpha)

for item_id, score in top_k:
    st.image(os.path.join(output_dir,idx_to_item[item_id]))
    st.caption(f"Fusion Score: {score:.4f}")




RuntimeError: Runtime hasn't been created!

Compare Fusion vs FAISS Results Side-by-Side

In [50]:
# Convert both to DataFrames
df_fusion = pd.DataFrame(top_k, columns=["item_index", "fusion_score"])
df_faiss = pd.DataFrame(top_k_faiss, columns=["item_index", "faiss_score"])

# Merge on item_index
df_compare = pd.merge(df_fusion, df_faiss, on="item_index", how="outer").sort_values(by="fusion_score", ascending=False)

# (Optional) Add item names
df_compare["item_name"] = [idx_to_item[i] for i in df_compare["item_index"]]

In [51]:
df_compare

Unnamed: 0,item_index,fusion_score,faiss_score,item_name
0,5,1.0,1.0,zara_06.jpg
4,51,0.928503,0.897866,zara_53.jpg
3,35,0.921857,0.888373,zara_36.jpg
1,27,0.91827,0.883251,zara_28.jpg
2,33,0.915077,0.878722,zara_34.jpg
