# HNSW Visualization with Federpy

This notebook visualizes the HNSW graph structure using `federpy`. It downloads the necessary embedding data using `gdown` (as seen in other project files) and displays nodes with associated images.

In [6]:
# Install necessary packages, including gdown for data download
!pip install federpy hnswlib h5py numpy pandas gdown

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting beautifulsoup4 (from gdown)
  Downloading beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>=1.6.1 (from beautifulsoup4->gdown)
  Using cached soupsieve-2.8-py3-none-any.whl.metadata (4.6 kB)
Collecting PySocks!=1.5.7,>=1.5.6 (from requests[socks]->gdown)
  Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Downloading beautifulsoup4-4.14.3-py3-none-any.whl (107 kB)
Using cached soupsieve-2.8-py3-none-any.whl (36 kB)
Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Installing collected packages: soupsieve, PySocks, beautifulsoup4, gdown
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4/4[0m [gdown]32m3/4[0m [gdown]fulsoup4]
[1A[2KSuccessfully installed PySocks-1.7.1 beautifulsoup4-4.14.3 gdown-5.2.0 soupsieve-2.8


In [1]:
import hnswlib
import numpy as np
import h5py
import os
import gdown
from federpy.federpy import FederPy
import random

## 1. Load Data
We download `Image_Embedded.h5` from Google Drive using the ID found in `hnsw_search/hnsw.ipynb` (`12R_DHBMOVFVEaPrjBEQXOd7tb14Isb1D`). This ensures we have a valid HDF5 file.

In [None]:
DATA_FILE = 'backend/temp/Image_Embedded.h5'
os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)

if not os.path.exists(DATA_FILE) or os.path.getsize(DATA_FILE) < 1000:
    print(f"Downloading {DATA_FILE}...")
    # ID found in hnsw_search/hnsw.ipynb for Image_Embedded.h5
    file_id = '12R_DHBMOVFVEaPrjBEQXOd7tb14Isb1D'
    url = f'https://drive.google.com/uc?id={file_id}'
    gdown.download(url, DATA_FILE, quiet=False)

NUM_ELEMENTS = 1000000  # Visualize a subset
DIM = 512           

embeddings = None
image_urls = None

print(f"Loading {DATA_FILE}...")
try:
    with h5py.File(DATA_FILE, 'r') as f:
        if 'embeddings' not in f or 'urls' not in f:
                raise ValueError("File missing 'embeddings' or 'urls' keys")
        
        # Check if we have enough data
        NUM_ELEMENTS = f['embeddings'].shape[0]
        # if total_items < NUM_ELEMENTS:
        #     print(f"Warning: Requested {NUM_ELEMENTS} elements but file only has {total_items}. Using all available.")
        #     NUM_ELEMENTS = total_items
        
        embeddings = f['embeddings'][:NUM_ELEMENTS]
        urls_raw = f['urls'][:NUM_ELEMENTS]
        
        image_urls = []
        for url in urls_raw:
            if isinstance(url, bytes):
                image_urls.append(url.decode('utf-8'))
            else:
                image_urls.append(str(url))
        
        DIM = embeddings.shape[1]
        print(f"Successfully loaded {len(embeddings)} items with dimension {DIM}.")

except OSError as e:
    raise OSError(f"Failed to open file {DATA_FILE}. It might be corrupted. Error: {e}")

Loading backend/temp/Image_Embedded.h5...


NameError: name 'total_items' is not defined

## 2. Build HNSW Index

In [None]:
INDEX_FILE = 'temp_index.bin'

print("Building HNSW index...")
p = hnswlib.Index(space='l2', dim=DIM)
p.init_index(max_elements=NUM_ELEMENTS, ef_construction=200, M=16)
p.add_items(embeddings, np.arange(NUM_ELEMENTS))
p.set_ef(50)
p.save_index(INDEX_FILE)
print(f"Index saved to {INDEX_FILE}")

Building HNSW index...


## 3. Visualize with FederPy

In [6]:
# Initialize FederPy with view parameters for images
view_params = {
    "width": 800,
    "height": 600,
    "mediaType": "image",
    "mediaUrls": image_urls
}

feder = FederPy(INDEX_FILE, 'hnswlib', **view_params)

print("Generating Overview...")
feder.overview()

Generating Overview...


## 4. Visualize Search

In [7]:
query_vector = np.random.rand(DIM).astype(np.float32)
k = 4

print("Visualizing Search...")
# feder.searchRandTestVec()
feder.searchByVec(query_vector)

Visualizing Search...
