# Lucide Icon Loader - Vector Search Indexer

This notebook fetches Lucide icon metadata, generates semantic embeddings, and stores them in Redis for vector search.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/YOUR_USERNAME/IconLoader/blob/main/IconLoader.ipynb)

## 1. Install Dependencies

First, install all required packages:

In [None]:
!pip install -q sentence-transformers redisvl redis requests numpy

## 2. Configuration

Set your Redis URL and other configuration options:

In [None]:
import os
from getpass import getpass

# Get Redis URL securely (won't be saved in notebook)
REDIS_URL = getpass('Enter your Redis URL (e.g., redis://default:password@host:6379): ')

# Configuration
INDEX_NAME = "lucide_icon_index"
KEY_PREFIX = "lucide:icon:"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIM = 384
LUCIDE_RAW_BASE = "https://raw.githubusercontent.com/lucide-icons/lucide/main/icons/"

# Test sentences
TEST_SENTENCES = [
    "Found 5 places offering a relaxing drink for your tour.",
    "I found 9 parks to enjoy nature's beauty nearby.",
    "There are 3 locations of cultural interest, ready to inspire.",
    "Found 8 exciting sports and activity locations around.",
    "Found 17 delicious food spots awaiting your hungry stomach.",
    "Discovered 4 historical landmarks worth visiting.",
    "Located 6 shopping centers for your retail therapy.",
    "Found 12 entertainment venues for a fun night out.",
    "There are 7 hotels offering comfortable accommodation.",
    "Spotted 10 scenic viewpoints for amazing photos."
]

print("✓ Configuration set")

## 3. Icon List

Define which icons to load (or upload icons.txt file):

In [None]:
# Option 1: Upload icons.txt file
from google.colab import files
import io

print("Upload your icons.txt file:")
uploaded = files.upload()

if 'icons.txt' in uploaded:
    icon_slugs = uploaded['icons.txt'].decode('utf-8').strip().split('\n')
    icon_slugs = [s.strip() for s in icon_slugs if s.strip() and not s.startswith('#')]
    print(f"✓ Loaded {len(icon_slugs)} icons from file")
else:
    # Option 2: Define icons directly
    icon_slugs = [
        "beer", "wine", "coffee", "martini",
        "trees", "tree-palm", "tent",
        "building-2", "landmark", "church",
        "binoculars", "locate", "navigation",
        "ice-cream-cone", "salad", "popcorn"
    ]
    print(f"✓ Using {len(icon_slugs)} predefined icons")

print(f"Icons to process: {', '.join(icon_slugs[:10])}{'...' if len(icon_slugs) > 10 else ''}")

## 4. Helper Functions

Define all the helper functions:

In [None]:
import json
import requests
from typing import List

def slug_to_component_name(slug: str) -> str:
    """Convert icon slug to component name (e.g., 'a-arrow-down' -> 'AArrowDown')"""
    parts = slug.split('-')
    return ''.join(p.capitalize() for p in parts)

def fetch_icon_metadata(slug: str) -> dict:
    """Fetch icon metadata JSON from Lucide GitHub"""
    url = f"{LUCIDE_RAW_BASE}{slug}.json"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()
    return resp.json()

def build_description(name: str, slug: str, tags: List[str]) -> str:
    """Build a semantically rich description string to embed"""
    tag_str = ", ".join(tags) if tags else "icon"
    base = f"{name} - {slug}; {tag_str}"
    return base

print("✓ Helper functions defined")

## 5. Connect to Redis & Create Index

Connect to Redis and set up the vector index:

In [None]:
import redis
from redisvl.index import SearchIndex

print("Connecting to Redis...")
redis_client = redis.from_url(REDIS_URL)

# Test connection
redis_client.ping()
print("✓ Connected to Redis")

# Clean existing lucide:* keys
print("\nCleaning existing lucide:* keys...")
keys = redis_client.keys("lucide:*")
if keys:
    redis_client.delete(*keys)
    print(f"✓ Deleted {len(keys)} existing keys")
else:
    print("✓ No existing keys to delete")

# Create index
print("\nCreating vector index...")
index_config = {
    "index": {
        "name": INDEX_NAME,
        "prefix": KEY_PREFIX,
        "storage_type": "hash",
    },
    "fields": [
        {"name": "name", "type": "tag"},
        {"name": "description", "type": "text"},
        {
            "name": "embedding",
            "type": "vector",
            "attrs": {
                "dims": EMBEDDING_DIM,
                "algorithm": "flat",
                "distance_metric": "cosine",
                "datatype": "float32",
            },
        },
    ],
}

index = SearchIndex.from_dict(index_config)
index.set_client(redis_client)

if not index.exists():
    index.create()
    print(f"✓ Created index '{INDEX_NAME}'")
else:
    print(f"✓ Using existing index '{INDEX_NAME}'")

## 6. Load Model & Process Icons

Load the embedding model and process all icons:

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer

print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
model = SentenceTransformer(EMBEDDING_MODEL_NAME)
print("✓ Model loaded\n")

docs = []

for i, slug in enumerate(icon_slugs, 1):
    component_name = slug_to_component_name(slug)
    
    print(f"[{i}/{len(icon_slugs)}] Processing '{slug}' ({component_name})...", end=" ")
    
    try:
        # Fetch metadata
        meta = fetch_icon_metadata(slug)
        tags = meta.get("tags", [])
        if not isinstance(tags, list):
            tags = list(tags) if tags else []
        
        # Build description
        description = build_description(component_name, slug, tags)
        
        # Generate embedding
        embedding_vector = model.encode(description, show_progress_bar=False).tolist()
        
        if len(embedding_vector) != EMBEDDING_DIM:
            raise ValueError(f"Embedding dimension mismatch: expected {EMBEDDING_DIM}, got {len(embedding_vector)}")
        
        # Convert to bytes
        embedding_bytes = np.array(embedding_vector, dtype=np.float32).tobytes()
        
        # Create document
        doc = {
            "name": slug,
            "description": description,
            "embedding": embedding_bytes,
        }
        docs.append(doc)
        
        print("✓")
        
    except Exception as e:
        print(f"✗ Error: {e}")

print(f"\n✓ Processed {len(docs)} icons successfully")

## 7. Index Icons in Redis

Store all processed icons in the Redis vector index:

In [None]:
if docs:
    print(f"Indexing {len(docs)} icons into Redis...")
    index.load(docs)
    print("✓ Done indexing Lucide icons")
else:
    print("⚠ No icons to index")

## 8. Test Vector Search

Run test queries to verify the vector search is working:

In [None]:
from redisvl.query import VectorQuery

print("=" * 60)
print("Testing vector search with sample sentences...")
print("=" * 60 + "\n")

for i, sentence in enumerate(TEST_SENTENCES, 1):
    # Encode the test sentence
    query_embedding = model.encode(sentence, show_progress_bar=False).tolist()
    query_bytes = np.array(query_embedding, dtype=np.float32).tobytes()
    
    # Perform vector search (top 1 result)
    query = VectorQuery(
        vector=query_bytes,
        vector_field_name="embedding",
        return_fields=["name", "description"],
        num_results=1
    )
    
    results = index.query(query)
    
    if results:
        top_icon = results[0].get("name", "N/A")
        print(f"[Test {i}] \"{sentence}\" => {top_icon}")
    else:
        print(f"[Test {i}] \"{sentence}\" => No results found")

print("\n" + "=" * 60)
print("✓ Testing complete!")
print("=" * 60)

## 9. Custom Search (Optional)

Try your own search queries:

In [None]:
# Enter your custom query
custom_query = input("Enter your search query: ")

if custom_query:
    query_embedding = model.encode(custom_query, show_progress_bar=False).tolist()
    query_bytes = np.array(query_embedding, dtype=np.float32).tobytes()
    
    query = VectorQuery(
        vector=query_bytes,
        vector_field_name="embedding",
        return_fields=["name", "description"],
        num_results=5
    )
    
    results = index.query(query)
    
    print(f"\nTop 5 results for: \"{custom_query}\"")
    print("-" * 60)
    for i, result in enumerate(results, 1):
        name = result.get("name", "N/A")
        description = result.get("description", "N/A")
        score = result.get("vector_distance", "N/A")
        print(f"{i}. {name} (score: {score})")
        print(f"   {description}")
else:
    print("No query entered")