In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Set up HuggingFace API key (optional for local models)
huggingface_api_key = os.getenv("HUGGINGFACE_API_KEY")
if huggingface_api_key:
    os.environ["HUGGINGFACE_API_KEY"] = huggingface_api_key
    print("HuggingFace API key loaded successfully")
else:
    print("No HuggingFace API key found - using local models only")


HuggingFace API key loaded successfully


In [3]:
# Import required libraries
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np

# Create HuggingFace embeddings instance
# Using a lightweight model that works well locally
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},  # Use CPU to avoid GPU issues
    encode_kwargs={'normalize_embeddings': True}
)

print("HuggingFace embeddings model loaded successfully!")
print(f"Model: {embeddings.model_name}")
print(f"Model dimensions: {embeddings.client.get_sentence_embedding_dimension()}")


  from .autonotebook import tqdm as notebook_tqdm


HuggingFace embeddings model loaded successfully!
Model: sentence-transformers/all-MiniLM-L6-v2


AttributeError: 'HuggingFaceEmbeddings' object has no attribute 'client'

In [4]:
# Test the embeddings with sample text
sample_texts = [
    "This is a sample sentence for testing embeddings.",
    "Machine learning is fascinating and powerful.",
    "Natural language processing helps computers understand text."
]

print("Creating embeddings for sample texts...")
try:
    # Generate embeddings
    sample_embeddings = embeddings.embed_documents(sample_texts)
    
    print(f"Successfully created {len(sample_embeddings)} embeddings")
    print(f"Each embedding has {len(sample_embeddings[0])} dimensions")
    
    # Show similarity between first two texts
    similarity = np.dot(sample_embeddings[0], sample_embeddings[1])
    print(f"Similarity between first two texts: {similarity:.4f}")
    
except Exception as e:
    print(f"Error creating embeddings: {e}")
    print("This might be due to model download or initialization issues.")


Creating embeddings for sample texts...
Successfully created 3 embeddings
Each embedding has 384 dimensions
Similarity between first two texts: 0.1780


In [5]:
# Test single document embedding
single_text = "This is a single document for embedding."

print("Creating embedding for single document...")
try:
    single_embedding = embeddings.embed_query(single_text)
    print(f"Single embedding created successfully!")
    print(f"Embedding dimensions: {len(single_embedding)}")
    print(f"First 5 values: {single_embedding[:5]}")
    
except Exception as e:
    print(f"Error creating single embedding: {e}")


Creating embedding for single document...
Single embedding created successfully!
Embedding dimensions: 384
First 5 values: [-0.0375300757586956, 0.026673361659049988, 0.026375433430075645, 0.016167717054486275, 0.05736757442355156]


# HuggingFace Embeddings with LangChain

This notebook demonstrates how to use HuggingFace embeddings with LangChain.

## Key Features:
- Uses `sentence-transformers/all-MiniLM-L6-v2` model (lightweight and efficient)
- Works locally without requiring API keys
- Supports both document and query embeddings
- Includes error handling and testing examples

## Model Information:
- **Model**: sentence-transformers/all-MiniLM-L6-v2
- **Dimensions**: 384
- **Use Case**: General purpose text embeddings
- **Performance**: Fast inference, good quality for most tasks
