In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Movie Chatbot Tutorial 
This codelab provides an introduction to using Memorystore for Redis and LangChain. It walks through how to connect to and use Memorystore for Redis as a vector store, document loader, and chat history store. The codelab also provides a dataset of movie titles from Netflix that you can use to experiment with the tools.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googleapis/langchain-google-memorystore-redis-python/blob/main/samples/langchain_quick_start.ipynb)

# Download the Netflix Dataset

In [15]:
from google.cloud import storage

# Initialize the Google Cloud Storage client
gcs_client = storage.Client()

bucket_name = "cloud-samples-data"
source_blob_name = "langchain/netflix_titles_compute_embeddings.csv"
destination_file_name = "./netflix_titles_compute_embeddings.csv"

# Get the bucket and blob (file) from GCS
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(source_blob_name)

# Download the file to the local destination
blob.download_to_filename(destination_file_name)

# Load the Data as LangChain Documents

In [None]:
import csv
from langchain_core.documents.base import Document

# Path to the CSV file
csv_file_path = "./netflix_titles_compute_embeddings.csv"

# Initialize a list to hold the Document objects
docs = []

# Determine metadata fields by reading the CSV headers
with open(csv_file_path, mode="r", encoding="utf-8") as file:
    reader = csv.reader(file)
    headers = next(reader, None)
    # Exclude 'description' from metadata fields
    metadata_fields = [header for header in headers if header != "description"]

# Read the CSV file and construct Document objects
with open(csv_file_path, mode="r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Construct the content to include the entire row
        content = " | ".join([f"{key}: {value}" for key, value in row.items()])
        # Construct metadata by excluding 'description' field
        metadata = {field: row[field] for field in metadata_fields}
        # Create the Document object with the entire row as content and the rest as metadata
        doc = Document(page_content=content, metadata=metadata)
        docs.append(doc)

# Import and Initialize an Embeddings Service

In [None]:
import os
from langchain_google_vertexai import VertexAIEmbeddings

PROJECT_ID = "my_project_id"  # @param {type:"string"}
embeddings_service = VertexAIEmbeddings(
    model_name="textembedding-gecko@latest", project=f"{PROJECT_ID}"
)

# Set Up a Connection to a Memorystore for Redis Instance

In [18]:
import redis

REDIS_HOST = "127.0.0.1:6379"  # @param {type:"string"}
client = redis.from_url(f"redis://{REDIS_HOST}")

# Initialize the Vector Index in the Memorystore for Redis

In [21]:
from langchain_google_memorystore_redis import (
    DistanceStrategy,
    HNSWConfig,
    RedisVectorStore,
)

index_config = HNSWConfig(
    name="netflix_complete:", distance_strategy=DistanceStrategy.COSINE, vector_size=768
)

RedisVectorStore.init_index(client=client, index_config=index_config)

# Instantiate a Vector Store Object

In [22]:
vector_store = RedisVectorStore(
    client=client, index_name="netflix_complete:", embeddings=embeddings_service
)

# Add Documents to the Vector Store

In [23]:
ids = vector_store.add_documents(docs)

# Initialize Memorystore for Redis as `ChatMessageHistory`

In [9]:
from langchain_google_memorystore_redis import MemorystoreChatMessageHistory

chat_history = MemorystoreChatMessageHistory(
    client=client,
    session_id="my_session",
)

# Build a Movie Question-Answering Chatbot

In [24]:
from langchain_google_vertexai import VertexAIEmbeddings, VertexAI
from langchain_core.messages import AIMessage, HumanMessage
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryBufferMemory
from langchain_core.prompts import PromptTemplate
from langchain_google_memorystore_redis import MemorystoreChatMessageHistory

# Suppress all deprecation warnings
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

# Prepare some prompt templates for the ConversationalRetrievalChain
prompt = PromptTemplate(
    template="""Use all the information from the context and the conversation history to answer new question. If you see the answer in previous conversation history or the context. \
Answer it with clarifying the source information. If you don't see it in the context or the chat history, just say you \
didn't find the answer in the given data. Don't make things up.

Previous conversation history from the questioner. "Human" was the user who's asking the new question. "Assistant" was you as the assistant:
```{chat_history}
```

Vector search result of the new question:
```{context}
```

New Question:
```{question}```

Answer:""",
    input_variables=["context", "question", "chat_history"],
)
condense_question_prompt_passthrough = PromptTemplate(
    template="""Repeat the following question:
{question}
""",
    input_variables=["question"],
)

# Intialize retriever, llm and memory for the chain
retriever = vector_store.as_retriever(
    search_type="mmr", search_kwargs={"k": 5, "lambda_mult": 0.8}
)
llm = VertexAI(model_name="gemini-pro", project=f"{PROJECT_ID}")

chat_history.clear()

memory = ConversationSummaryBufferMemory(
    llm=llm,
    chat_memory=chat_history,
    output_key="answer",
    memory_key="chat_history",
    return_messages=True,
)

# create the ConversationalRetrievalChain
rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    verbose=False,
    memory=memory,
    condense_question_prompt=condense_question_prompt_passthrough,
    combine_docs_chain_kwargs={"prompt": prompt},
)

# Ask Your Chatbot Movie Questions!

In [27]:
# ask some questions
q = "What movie was Brad Pitt in?"
ans = rag_chain({"question": q, "chat_history": chat_history})["answer"]
print(f"Question: {q}\nAnswer: {ans}\n")

q = "How about Jonny Depp?"
ans = rag_chain({"question": q, "chat_history": chat_history})["answer"]
print(f"Question: {q}\nAnswer: {ans}\n")

q = "Are there movies about animals?"
ans = rag_chain({"question": q, "chat_history": chat_history})["answer"]
print(f"Question: {q}\nAnswer: {ans}\n")