# LangChain with PostgreSQL pgvector

This notebook demonstrates how to use LangChain's pgvector integration for RAG applications with PostgreSQL and pgvector.

## 1. Setup and Imports

In [17]:
# Import required libraries
import os
import psycopg2
from langchain_community.vectorstores import PGVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain_community.embeddings import SentenceTransformerEmbeddings
import numpy as np
import pandas as pd

print("✅ Libraries imported successfully!")

ModuleNotFoundError: No module named 'langchain_community'

## 2. Database Connection Setup

In [None]:
# Database configuration
DB_CONFIG = {
    'host': 'localhost',
    'port': '5432',
    'database': 'rag_db',
    'user': 'rag_user',
    'password': 'rag_password'
}

# Create connection string for LangChain
CONNECTION_STRING = f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"

print("✅ Database connection string configured")
print(f"Connection: {CONNECTION_STRING.replace(DB_CONFIG['password'], '***')}")

## 3. Initialize Embeddings

In [None]:
# Option 1: Use OpenAI embeddings (requires API key)
# os.environ["OPENAI_API_KEY"] = "your-openai-api-key-here"
# embeddings = OpenAIEmbeddings()

# Option 2: Use local sentence transformers (free, no API key needed)
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

print("✅ Embeddings initialized")
print(f"Model: {embeddings.model_name}")

## 4. Create PGVector Store

In [None]:
# Create PGVector store
try:
    vectorstore = PGVector(
        connection_string=CONNECTION_STRING,
        embedding_function=embeddings,
        collection_name="langchain_docs",
        pre_delete_collection=True  # Delete existing collection if it exists
    )
    print("✅ PGVector store created successfully!")
except Exception as e:
    print(f"❌ Error creating vector store: {e}")

## 5. Add Documents to Vector Store

In [None]:
# Create sample documents
documents = [
    Document(
        page_content="PostgreSQL is a powerful, open source object-relational database system with over 35 years of active development.",
        metadata={"source": "postgres_wiki", "topic": "database"}
    ),
    Document(
        page_content="pgvector is a PostgreSQL extension for vector similarity search. It supports L2 distance, inner product, and cosine distance.",
        metadata={"source": "pgvector_docs", "topic": "vector_search"}
    ),
    Document(
        page_content="LangChain is a framework for developing applications powered by language models. It provides components for working with LLMs.",
        metadata={"source": "langchain_docs", "topic": "framework"}
    ),
    Document(
        page_content="RAG (Retrieval-Augmented Generation) combines retrieval from a knowledge base with generative AI to provide more accurate responses.",
        metadata={"source": "ai_concepts", "topic": "rag"}
    ),
    Document(
        page_content="Vector embeddings are numerical representations of text that capture semantic meaning. They enable similarity search and clustering.",
        metadata={"source": "ml_concepts", "topic": "embeddings"}
    )
]

# Add documents to vector store
try:
    vectorstore.add_documents(documents)
    print(f"✅ Added {len(documents)} documents to vector store")
except Exception as e:
    print(f"❌ Error adding documents: {e}")

## 6. Perform Similarity Search

In [None]:
# Perform similarity search
query = "What is vector similarity search?"

try:
    results = vectorstore.similarity_search(query, k=3)
    
    print(f"🔍 Query: '{query}'")
    print(f"📊 Found {len(results)} similar documents:")
    print("-" * 60)
    
    for i, doc in enumerate(results, 1):
        print(f"{i}. Similarity Score: {doc.metadata.get('score', 'N/A')}")
        print(f"   Content: {doc.page_content}")
        print(f"   Source: {doc.metadata.get('source', 'Unknown')}")
        print(f"   Topic: {doc.metadata.get('topic', 'Unknown')}")
        print()
        
except Exception as e:
    print(f"❌ Error in similarity search: {e}")

## 7. Similarity Search with Scores

In [None]:
# Search with similarity scores
query = "How does PostgreSQL work with AI?"

try:
    results_with_scores = vectorstore.similarity_search_with_score(query, k=3)
    
    print(f"🔍 Query: '{query}'")
    print(f"📊 Found {len(results_with_scores)} documents with scores:")
    print("-" * 60)
    
    for i, (doc, score) in enumerate(results_with_scores, 1):
        print(f"{i}. Similarity Score: {score:.4f}")
        print(f"   Content: {doc.page_content}")
        print(f"   Source: {doc.metadata.get('source', 'Unknown')}")
        print(f"   Topic: {doc.metadata.get('topic', 'Unknown')}")
        print()
        
except Exception as e:
    print(f"❌ Error in similarity search with scores: {e}")

## 8. Filter Search by Metadata

In [None]:
# Search with metadata filter
query = "database systems"

try:
    # Filter by topic
    results = vectorstore.similarity_search(
        query, 
        k=5,
        filter={"topic": "database"}
    )
    
    print(f"🔍 Query: '{query}' (filtered by topic: database)")
    print(f"📊 Found {len(results)} filtered documents:")
    print("-" * 60)
    
    for i, doc in enumerate(results, 1):
        print(f"{i}. Content: {doc.page_content}")
        print(f"   Metadata: {doc.metadata}")
        print()
        
except Exception as e:
    print(f"❌ Error in filtered search: {e}")

## 9. Maximum Marginal Relevance (MMR) Search

In [None]:
# MMR search for diversity
query = "machine learning and databases"

try:
    results = vectorstore.max_marginal_relevance_search(
        query, 
        k=3,
        fetch_k=10  # Fetch more documents for diversity
    )
    
    print(f"🔍 MMR Query: '{query}'")
    print(f"📊 Found {len(results)} diverse documents:")
    print("-" * 60)
    
    for i, doc in enumerate(results, 1):
        print(f"{i}. Content: {doc.page_content}")
        print(f"   Source: {doc.metadata.get('source', 'Unknown')}")
        print(f"   Topic: {doc.metadata.get('topic', 'Unknown')}")
        print()
        
except Exception as e:
    print(f"❌ Error in MMR search: {e}")

## 10. View Vector Store Statistics

In [None]:
# Get collection statistics
try:
    # Connect to database to get statistics
    conn = psycopg2.connect(**DB_CONFIG)
    cursor = conn.cursor()
    
    # Get document count
    cursor.execute("""
        SELECT COUNT(*) 
        FROM langchain_pg_embedding 
        WHERE collection_id = (
            SELECT uuid FROM langchain_pg_collection WHERE name = 'langchain_docs'
        )
    """)
    
    doc_count = cursor.fetchone()[0]
    print(f"📊 Vector Store Statistics:")
    print(f"   Collection: langchain_docs")
    print(f"   Documents: {doc_count}")
    print(f"   Embedding Model: {embeddings.model_name}")
    
    # Get sample embeddings
    cursor.execute("""
        SELECT cembeddings 
        FROM langchain_pg_embedding 
        WHERE collection_id = (
            SELECT uuid FROM langchain_pg_collection WHERE name = 'langchain_docs'
        )
        LIMIT 1
    """)
    
    sample_embedding = cursor.fetchone()
    if sample_embedding:
        embedding_dim = len(sample_embedding[0])
        print(f"   Embedding Dimension: {embedding_dim}")
    
    cursor.close()
    conn.close()
    
except Exception as e:
    print(f"❌ Error getting statistics: {e}")

## 11. Cleanup

In [None]:
# Clean up resources
try:
    if 'vectorstore' in locals():
        # Note: PGVector doesn't have a close method, but we can clean up the collection
        print("✅ Vector store operations completed")
    
    print("✅ Notebook completed successfully!")
    print("\n📝 Summary:")
    print("- Created PGVector store with LangChain")
    print("- Added sample documents with embeddings")
    print("- Performed similarity search operations")
    print("- Demonstrated metadata filtering")
    print("- Used MMR for diverse results")
    
except Exception as e:
    print(f"❌ Error during cleanup: {e}")