In [9]:
!pip install -q "numpy<2.0" --force-reinstall

In [10]:
!pip install -q langchain==0.3.13 langchain-google-genai==2.0.5 langchain-community==0.3.13 faiss-cpu==1.8.0

In [34]:
# ============================================================
# STEP 2: Import Dependencies
# ============================================================
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.memory import (
    ConversationBufferMemory,
    ConversationSummaryMemory,
    ConversationBufferWindowMemory,
    VectorStoreRetrieverMemory
)
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
import os

# ============================================================
# STEP 3: Set Up API Key and Initialize LLM
# ============================================================
# Set your Google API key directly (get it from: https://makersuite.google.com/app/apikey)
os.environ["GOOGLE_API_KEY"] = ""  # Replace with your actual key

# Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.3)
print("✓ Gemini model initialized!")

✓ Gemini model initialized!


In [35]:
# ============================================================
# QUICK REVIEW: Buffer Memory
# Remembers everything in the session
# ============================================================
print("\n=== BUFFER MEMORY EXAMPLE ===")

buffer_memory = ConversationBufferMemory(return_messages=True)
conversation = ConversationChain(llm=llm, memory=buffer_memory, verbose=True)

print(conversation.run("Hi, I am allergic to peanuts."))
print(conversation.run("Can I eat peanut butter?"))
# AI remembers the allergy from previous turn!


=== BUFFER MEMORY EXAMPLE ===


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[]
Human: Hi, I am allergic to peanuts.
AI:[0m

[1m> Finished chain.[0m
Oh, that's really important to know! Thank you so much for telling me.

As an AI, I don't have a persistent memory of past conversations, so each time we chat, it's like a fresh start for me. This means I won't 'remember' your allergy in future, separate conversations unless you tell me again.

**However, for *this* conversation right now, I will absolutely keep your peanut allergy in mind!** If you ask me about recipes, food recommendations, ingredient lists, or anything where peanuts might be a concern, I will do my very best to:

*   **Flag 

In [37]:
# ============================================================
# QUICK REVIEW: Summary Memory
# Summarizes conversation over time
# ============================================================
print("\n=== SUMMARY MEMORY EXAMPLE ===")

summary_memory = ConversationSummaryMemory(llm=llm)
conversation = ConversationChain(llm=llm, memory=summary_memory, verbose=True)

print(conversation.run("I usually eat oatmeal for breakfast."))
print(conversation.run("Now I want to switch to eggs in the morning."))
print(conversation.run("What did I say about my breakfast habits?"))
# AI uses summarized version of conversation history


=== SUMMARY MEMORY EXAMPLE ===


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: I usually eat oatmeal for breakfast.
AI:[0m


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash
Please retry in 27.466302627s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 20
}

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash
Please retry in 25.094941892s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 20
}
, retry_delay {
  seconds: 25
}
]

In [33]:
# ============================================================
# MAIN FOCUS: Vector Store Retriever Memory with FAISS
# FAISS = Facebook AI Similarity Search (fast & efficient!)
# This is the BEST option for long-term memory in production!
# ============================================================
print("\n=== VECTOR STORE RETRIEVER MEMORY WITH FAISS ===")

# Step 1: Set up embedding model (converts text to vectors)
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Step 2: Create FAISS vector store (like a smart database)
# Start with an initial text to create the index
vectorstore = FAISS.from_texts(
    ["Initial memory store"],  # Need at least one text to initialize
    embedding_model
)

# Step 3: Create retriever memory (searches for relevant memories)
retriever_memory = VectorStoreRetrieverMemory(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2})  # Returns top 2 matches
)

print("✓ FAISS vector store created!")


=== VECTOR STORE RETRIEVER MEMORY WITH FAISS ===


GoogleGenerativeAIError: Error embedding content: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
Please retry in 4.563242211s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
, retry_delay {
  seconds: 4
}
]

In [20]:
# Example 1 : Storing and Retreiving a single memory!
retriever_memory.save_context(
    {"input": "Hi, I am allergic to peanuts."},
    {"output":"I'll remmeber your allery"}
)

print("Query - Can i eat peanut butter?")
result = retriever_memory.load_memory_variables({"input":"Can I eat peanut butter?"})
print(result)

NameError: name 'retriever_memory' is not defined

In [21]:
# Example 2 : Storing multiple memoories :

retriever_memory.save_context(
    {"input": "I drink coffee every morning"},
    {"output": "Got it, you're a morning coffee person"}
)


retriever_memory.save_context(
    {"input": "I switched to tea in the mornings"},
    {"output": "Noted, you now prefer tea over coffee"}
)

result = retriever_memory.load_memory_variables({"input": "What do I drink in the morning?"})
print(result)

NameError: name 'retriever_memory' is not defined

In [22]:
# ============================================================
# EXAMPLE 3: Semantic Search Power
# FAISS finds semantically similar content, not just keywords!
# ============================================================
print("\n--- Example 3: Semantic Search ---")

# Create a fresh FAISS store for this example
vectorstore_preferences = FAISS.from_texts(
    ["preference store"],
    embedding_model
)

retriever_memory_pref = VectorStoreRetrieverMemory(
    retriever=vectorstore_preferences.as_retriever(search_kwargs={"k": 2})
)

# Store various preferences
retriever_memory_pref.save_context(
    {"input": "I love Italian cuisine"},
    {"output": "Italian food is your favorite"}
)
retriever_memory_pref.save_context(
    {"input": "Pizza is my favorite food"},
    {"output": "Pizza tops your food list"}
)
retriever_memory_pref.save_context(
    {"input": "I enjoy hiking on weekends"},
    {"output": "Weekend hiking enthusiast"}
)
retriever_memory_pref.save_context(
    {"input": "I play guitar"},
    {"output": "You're a guitar player"}
)

# Query about food - finds related memories even without exact keywords!
print("\nQuery: What kind of food do I like?")
result = retriever_memory_pref.load_memory_variables({"input": "What kind of food do I like?"})
print("Retrieved:", result)


--- Example 3: Semantic Search ---


GoogleGenerativeAIError: Error embedding content: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
Please retry in 33.052549991s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
, retry_delay {
  seconds: 33
}
]

In [23]:
# ============================================================
# EXAMPLE 4: Saving and Loading FAISS Index (Persistence)
# Unlike Chroma, FAISS requires manual save/load
# ============================================================
print("\n--- Example 4: Saving FAISS Index to Disk ---")

# Save the FAISS index to a file
vectorstore_preferences.save_local("faiss_index")
print("✓ FAISS index saved to 'faiss_index' folder")

# Load it back later (in a new session)
loaded_vectorstore = FAISS.load_local(
    "faiss_index",
    embedding_model,
    allow_dangerous_deserialization=True  # Required for loading
)
print("✓ FAISS index loaded from disk")

# Test that loaded index still works
loaded_retriever = VectorStoreRetrieverMemory(
    retriever=loaded_vectorstore.as_retriever(search_kwargs={"k": 2})
)
result = loaded_retriever.load_memory_variables({"input": "What food do I like?"})
print("Retrieved from loaded index:", result)


--- Example 4: Saving FAISS Index to Disk ---


NameError: name 'vectorstore_preferences' is not defined

In [24]:
# EXAMPLE 5: Combining FAISS Memory with Conversation Chain
# Create a new FAISS store for chatbot
chatbot_vectorstore = FAISS.from_texts(
    ["chatbot memory"],
    embedding_model
)

chatbot_memory = VectorStoreRetrieverMemory(
    retriever=chatbot_vectorstore.as_retriever(search_kwargs={"k": 3})
)

# Store some user facts
chatbot_memory.save_context(
    {"input": "My name is Sarah"},
    {"output": "Nice to meet you, Sarah!"}
)
chatbot_memory.save_context(
    {"input": "I work as a data scientist"},
    {"output": "Data science is an exciting field!"}
)
chatbot_memory.save_context(
    {"input": "I live in San Francisco"},
    {"output": "San Francisco is a beautiful city!"}
)

template = """
You are a helpful assistant with long-term memory.

Relevant conversation history:
{history}

Current conversation:
Human: {input}
AI:
"""

GoogleGenerativeAIError: Error embedding content: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0, model: embedding-001
Please retry in 53.623689808s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerUserPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "embedding-001"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
, retry_delay {
  seconds: 53
}
]

In [25]:
prompt = PromptTemplate(input_variables = ["history", "input"],
                        template = template)

NameError: name 'template' is not defined

In [26]:
conv_with_memory = ConversationChain(llm=llm,
                                     prompt=prompt,
                                     verbose=True,
                                     memory =chatbot_memory)

NameError: name 'prompt' is not defined

In [27]:
print(conv_with_memory.run("Where do I work?"))

NameError: name 'conv_with_memory' is not defined

In [28]:
print(conv_with_memory.run("What's my name? Which city do I live in?"))

NameError: name 'conv_with_memory' is not defined