In [1]:
!pip install faiss-cpu numpy



In [2]:
!pip install openai==0.28

Collecting openai==0.28
  Using cached openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.77.0
    Uninstalling openai-1.77.0:
      Successfully uninstalled openai-1.77.0
Successfully installed openai-0.28.0


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-openai 0.3.16 requires openai<2.0.0,>=1.68.2, but you have openai 0.28.0 which is incompatible.


In [3]:
import faiss
import requests
import numpy as np
import os

In [4]:
import os
import numpy as np
import faiss
import requests
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get OpenRouter API key
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not OPENROUTER_API_KEY:
    print("⚠️ Warning: OPENROUTER_API_KEY not found in environment variables")
else:
    print(f"✅ OpenRouter API key loaded successfully")

✅ OpenRouter API key loaded successfully


In [5]:
# Helper: Get embedding using OpenRouter API
def get_embedding(text):
    url = "https://openrouter.ai/api/v1/embeddings"
    
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "HTTP-Referer": "https://localhost",  # Required by OpenRouter
        "X-Title": "FAISS Demo"  # Optional, but good practice
    }
    
    data = {
        "model": "text-embedding-ada-002",  # Use the correct embedding model
        "input": text
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    # Debug information
    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        print(f"Response: {response.text}")
        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
        
    response_json = response.json()
    
    # Debug the response structure
    if 'data' not in response_json:
        print(f"Unexpected response format: {response_json}")
        # Use a fallback method if OpenRouter fails
        return get_fallback_embedding(text)
        
    embedding = response_json['data'][0]['embedding']
    return np.array(embedding, dtype=np.float32)

# Fallback method using a simple embedding technique
def get_fallback_embedding(text):
    print("Using fallback embedding method")
    # Create a simple hash-based embedding (not for production use)
    import hashlib
    # Create a fixed-size embedding of 1536 dimensions (same as OpenAI's)
    embedding = np.zeros(1536, dtype=np.float32)
    
    # Fill the embedding with hash-based values
    words = text.lower().split()
    for i, word in enumerate(words):
        hash_val = int(hashlib.md5(word.encode()).hexdigest(), 16)
        for j in range(min(10, len(word))):
            idx = (hash_val + j * i) % 1536
            embedding[idx] = (hash_val % 10000) / 10000.0
    
    # Normalize the embedding
    norm = np.linalg.norm(embedding)
    if norm > 0:
        embedding = embedding / norm
        
    return embedding

In [6]:
# Test the embedding function
try:
    test_text = "This is a test sentence."
    test_embedding = get_embedding(test_text)
    print(f"✅ Embedding test successful!")
    print(f"Embedding shape: {test_embedding.shape}")
    print(f"First 5 values: {test_embedding[:5]}")
except Exception as e:
    print(f"❌ Embedding test failed: {str(e)}")
    print("Falling back to hash-based embeddings for the demo")

# Example texts
texts = [
    "FAISS is a library for efficient similarity search.",
    "It is developed by Facebook AI Research.",
    "It supports cosine and L2 distance search.",
    "You can use FAISS with OpenRouter embeddings."
]

Error: 404
Response: {"error":{"message":"Not Found","code":404}}
❌ Embedding test failed: API request failed with status code 404: {"error":{"message":"Not Found","code":404}}
Falling back to hash-based embeddings for the demo


In [7]:
# Get embeddings with error handling
try:
    print("Generating embeddings for all texts...")
    embeddings = np.array([get_embedding(text) for text in texts])
    print(f"✅ Successfully generated {len(embeddings)} embeddings")
    print(f"Embedding dimensions: {embeddings.shape}")
except Exception as e:
    print(f"❌ Error generating embeddings: {str(e)}")
    print("Falling back to hash-based embeddings for all texts")
    embeddings = np.array([get_fallback_embedding(text) for text in texts])

Generating embeddings for all texts...
Error: 404
Response: {"error":{"message":"Not Found","code":404}}
❌ Error generating embeddings: API request failed with status code 404: {"error":{"message":"Not Found","code":404}}
Falling back to hash-based embeddings for all texts
Using fallback embedding method
Using fallback embedding method
Using fallback embedding method
Using fallback embedding method


In [8]:
embeddings

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [9]:
# Normalize if using cosine similarity
faiss.normalize_L2(embeddings)  # Optional: only if you're doing cosine similarity

In [10]:
# Create FAISS index
dimension = embeddings.shape[1]
print(dimension)

1536


In [11]:
index = faiss.IndexFlatIP(dimension)

In [12]:
index

<faiss.swigfaiss_avx2.IndexFlatIP; proxy of <Swig Object of type 'faiss::IndexFlatIP *' at 0x000001F87D8BAC90> >

In [13]:
# Add vectors to index
index.add(embeddings)

In [14]:
# Store original texts for lookup
text_id_map = {i: text for i, text in enumerate(texts)}

In [15]:
text_id_map

{0: 'FAISS is a library for efficient similarity search.',
 1: 'It is developed by Facebook AI Research.',
 2: 'It supports cosine and L2 distance search.',
 3: 'You can use FAISS with OpenRouter embeddings.'}

In [16]:
# Query example
query = "What is FAISS?"
query_vector = get_embedding(query)
faiss.normalize_L2(query_vector.reshape(1, -1))  # Normalize for cosine
# Search top 2 most similar
k = 2
distances, indices = index.search(query_vector.reshape(1, -1), k)


Error: 404
Response: {"error":{"message":"Not Found","code":404}}


Exception: API request failed with status code 404: {"error":{"message":"Not Found","code":404}}

In [None]:
# Display results
for i, idx in enumerate(indices[0]):
    print(f"Result {i+1}: {text_id_map[idx]} (Score: {distances[0][i]:.4f})")

Result 1: FAISS is a library for efficient similarity search. (Score: 0.8826)
Result 2: You can use FAISS with OpenAI embeddings. (Score: 0.8335)
