In [31]:
from langchain.memory import ConversationBufferMemory
from langchain.agents import ConversationalChatAgent, AgentExecutor
import os
from langchain.chat_models import ChatOpenAI
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
from dotenv import load_dotenv
from typing import Dict
import boto3

from langchain_community.llms import SagemakerEndpoint
import json

# Chatbot with LLM and RAG

A book and movie assistant chatbot using Python, LangChain, embedding-based retrieval strategies, and OpenAI's GPT-3.5

# Step 0: Deploy LLM  endpoint

In [5]:
#  Retrieve the OpenAI API key and temperature from environment variables
from dotenv import load_dotenv
load_dotenv("env.txt")
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_TEMPERATURE = float(os.getenv('OPENAI_TEMPERATURE'))
HUGGING_FACE_TOKEN = os.getenv('HUGGING_FACE_TOKEN')

In [3]:
#hub['HUGGING_FACE_HUB_TOKEN']

In [16]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'meta-llama/Meta-Llama-3-8B-Instruct',
	'SM_NUM_GPUS': json.dumps(1),
	'HUGGING_FACE_HUB_TOKEN': HUGGING_FACE_TOKEN
}

#assert hub['HUGGING_FACE_HUB_TOKEN'] != '<REPLACE WITH YOUR TOKEN>', "You have to provide a token."

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="2.0.2"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "My name is Clara and I am",
})

------------!

[{'generated_text': "My name is Clara and I am a wedding photographer based in the heart of Europe, having grown up in England and studied in France, I've developed a love for capturing the beauty in a mix of cultures and languages.\nMy approach to photography is simple: I believe in telling a story through images. I capture the raw emotions, the laughter, the tears and the unforgettable moments of your special day. From the romantic countryside settings to the urban cityscapes, I find beauty in every corner and bring it to life with my lens.\n"}]

In [94]:
test_prompt = f"""
<|begine_of_text|><|start_header_id|>system<|end_header_id|>

You are an intent classifier who identify intent of a question from three topics: movies, books and others. Only return the intent<|eot_id|>
<|start_header_id|>user<|end_header_id|>

recommend me a book?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

response = predictor.predict({"inputs": test_prompt})
# response = predictor.predict({
# 	"inputs": "You are an intent classifier. There are three topics: movies, books and others. Now identify the intent of this question into one of the topics, 'recommend me a movie'. Please return only one word",
# })
print(response[0]["generated_text"])


<|begine_of_text|><|start_header_id|>system<|end_header_id|>

You are an intent classifier who identify intent of a question from three topics: movies, books and others. Only return the intent<|eot_id|>
<|start_header_id|>user<|end_header_id|>

recommend me a book?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"book"


# Step 1: Preparation of the Corpus
- The book corpus is based on the [CMU Book Summary Corpus](https://www.kaggle.com/datasets/ymaricar/cmu-book-summary-dataset).
- The movie corpus is based on the [CMU Movie Summary Corpus](https://www.cs.cmu.edu/~ark/personas/).
- For simplicity, I've already processed and saved the corpus snippets into **data/BookSummaries/book.json** and **data/MovieSummaries/movie.json**. If you prefer, you can download the original corpus and parse it yourself.

# Step 2 Vector Space
- Embedding-Based Retrieval: LangChain supports various retrieval strategies. If LangChain's built-in retrieval mechanisms do not mean your embedding-based retrieval requirement, you can integrate LlamaIndex or a similar tool that can create and query embeddings of your corpus.

In [66]:
# %load vector_space.py
# vector_space.py
import json
from pathlib import Path
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from tqdm import tqdm
from langchain_community.llms import SagemakerEndpoint

class EmbeddingManager:
    def __init__(self, model_name):
        self.embeddings = HuggingFaceEmbeddings(model_name=model_name)
    
    def get_embeddings(self):
        return self.embeddings

class VectorSpaceManager:
    def __init__(self, embedding_manager):
        self.embedding_manager = embedding_manager
        self.embeddings = self.embedding_manager.get_embeddings()

    def create_vector_space(self, documents):
        vector_store = FAISS.from_documents(self._split_docs(documents[:2]), self.embeddings)

        with tqdm(total=len(documents), desc="Creating vector space") as pbar:
            batch_size = 100
            for i in range(2, len(documents), batch_size):
                batch_documents = documents[i:i+batch_size]
                tempt_vector_store = FAISS.from_documents(self._split_docs(batch_documents), self.embeddings)
                vector_store.merge_from(tempt_vector_store)
                pbar.update(len(batch_documents))
        #vector_store = FAISS.from_documents(self._split_docs(documents), self.embeddings)

        return vector_store

    def save_vector_space(self, vector_store, save_path):
        print(f"Saving vector space to {save_path}...")
        vector_store.save_local(save_path)
        print(f"Finished!")

    def load_vector_space(self, save_path):
        print(f"Lodaing vector space from {save_path}")
        return FAISS.load_local(save_path, self.embeddings, allow_dangerous_deserialization=True)
    def _split_docs(self, docs: list):
        # split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
        separators=[" ", "\n", ","],
        chunk_size=1000,
        chunk_overlap=0,
        )
        split_docs = text_splitter.split_documents(docs)
        return split_docs


class DataLoader:
    def __init__(self, json_file_path):
        self.json_file_path = json_file_path

    def load_data(self):
        data = json.loads(Path(self.json_file_path).read_text())
        return data

    def create_documents(self, length=None):
        data = self.load_data()
        if length is None:
            length = len(data)
        
        documents = [
            Document(
                page_content=self.get_page_content(item),
                metadata=item
            )
            for item in data[:length]
        ]
        return documents

    def get_page_content(self, item):
        raise NotImplementedError("Subclasses must implement get_page_content method")
    

class BookDataLoader(DataLoader):
    def get_page_content(self, item):
        return f"{item['title']} {item['author']} {item['publication_date']} {item['description']} {' '.join(item['genres'])}"
    

class MovieDataLoader(DataLoader):
    def get_page_content(self, item):
        # {movie_id, title, release_date, supported_languages, movie_countries, movie_genres_list, movie_actor_list, summary}
        # Use all the fields to create the page content
        return f"{item['title']} {item['release_date']} {item['summary']} {' '.join(item['movie_genres_list'])} {' '.join(item['movie_actor_list'])}"

def process_data(json_file_path, model_name, save_path, data_loader_class, length=None):
    # Initialize the embedding manager with the chosen model
    embedding_manager = EmbeddingManager(model_name)

    # Initialize the vector space manager with the embedding manager
    vector_space_manager = VectorSpaceManager(embedding_manager)

    # Load data and create documents
    data_loader = data_loader_class(json_file_path)
    documents = data_loader.create_documents(length=length)

    # Create and save the vector space
    vector_store = vector_space_manager.create_vector_space(documents)
    vector_space_manager.save_vector_space(vector_store, save_path)

    # Load the vector space and perform a search
    vector_store = vector_space_manager.load_vector_space(save_path)
    query = "The Hobbit"
    search_results = vector_store.search(query, k=2, search_type="similarity")
    print(search_results)

# Example usage
if __name__ == "__main__":
    # Book
    json_file_path = 'data/BookSummaries/book.json'  # Replace with the actual JSON file path
    #model_name = "sentence-transformers/all-MiniLM-L6-v2"
    #model_name = "mixedbread-ai/mxbai-embed-large-v1"
    #model_name = "intfloat/e5-base-v2"
    model_name = "BAAI/bge-small-en-v1.5"
    save_path = 'data/book_vector_store'  # Replace with the actual save path
    process_data(json_file_path, model_name, save_path, BookDataLoader, 100)

    # Movie
    json_file_path = 'data/MovieSummaries/movie.json'  # Replace with the actual JSON file path
    #model_name = "intfloat/e5-base-v2"
    model_name = "BAAI/bge-small-en-v1.5"
    #model_name = "sentence-transformers/all-MiniLM-L6-v2"
    #model_name = "mixedbread-ai/mxbai-embed-large-v1"
    save_path = 'data/movie_vector_store'  # Replace with the actual save path
    process_data(json_file_path, model_name, save_path, MovieDataLoader, 100)


Creating vector space:  98%|█████████▊| 98/100 [01:11<00:01,  1.36it/s]


Saving vector space to data/book_vector_store...
Finished!
Lodaing vector space from data/book_vector_store
[Document(page_content='The Hobbit J. R. R. Tolkien 1937  Gandalf tricks Bilbo into hosting a party for Thorin and his band of dwarves, who sing of reclaiming the Lonely Mountain and its vast treasure from the dragon Smaug. When the music ends, Gandalf unveils a map showing a secret door into the Mountain and proposes that the dumbfounded Bilbo serve as the expedition\'s "burglar". The dwarves ridicule the idea, but Bilbo, indignant, joins despite himself. The group travel into the wild, where Gandalf saves the company from trolls and leads them to Rivendell, where Elrond reveals more secrets from the map. Passing over the Misty Mountains, they are caught by goblins and driven deep underground. Although Gandalf rescues them, Bilbo gets separated from the others as they flee the goblins. Lost in the goblin tunnels, he stumbles across a mysterious ring and then encounters Gollum, w

Creating vector space:  98%|█████████▊| 98/100 [00:11<00:00,  8.90it/s]

Saving vector space to data/movie_vector_store...
Finished!
Lodaing vector space from data/movie_vector_store
[Document(page_content="This convinces Xi that he has reached the edge of the world, and he throws the bottle off the cliff (this scene was filmed at God's Window in Eastern Transvaal, South Africa . Xi then returns to his band and a warm welcome from his family. Adventure Action/Adventure Indie World cinema Cult Adventure Comedy Comedy Slapstick Nic De Jager Michael Thys Fanyana H. Sidumo Brian O'Shaughnessy Vera Blacker Joe Seakatsie Ken Gampu Jamie Uys Sandra Prinsloo Paddy O'Byrne Louw Verwey Marius Weyers N!xau", metadata={'movie_id': '261237', 'title': 'The Gods Must Be Crazy', 'release_date': '1980', 'supported_languages': ['Afrikaans Language', 'English Language'], 'movie_countries': ['South Africa'], 'movie_genres_list': ['Adventure', 'Action/Adventure', 'Indie', 'World cinema', 'Cult', 'Adventure Comedy', 'Comedy', 'Slapstick'], 'movie_actor_list': ['Nic De Jager', 'M




# Step 3: Chatbot Logic and User Interaction
- User Query Handling: Design the chatbot to accept user input, which could range from specific questions about books to general requests for recommendations.
- Query to Embedding: Convert the user query into an embedding and use the retrieval strategy to find the most relevant book summaries.
- Interaction with GPT-3.5: Send the user query and retrieved book summaries to GPT-3.5 to generate a coherent and contextually appropriate response.
- Response Generation: Combine the LLM's output with the retrieved information to generate a final response to the user.

In [115]:
class TopicClassifier:
    def __init__(self, llm):
        """
        Initializes a TopicClassifier object.

        Parameters:
        llm (LanguageModel): The language model used for classification.

        Returns:
        None
        """
        self.llm = llm
        self.topics = ["movies", "books", "others"]


    def classify(self, query):
        """
        Classifies a given query into one of the predefined topics.

        Parameters:
        query (str): The query to be classified.

        Returns:
        str: The classified topic.
        """
        # prompt = f"""
        # You are a content intent identifier. There are three intent for you: '{','.join(self.topics)}'. 
        # Now identify the intent of below question: {query}. Only return intent in one word.
        # """
        prompt = f"""
        <|begine_of_text|><|start_header_id|>system<|end_header_id|>

        You are an intent classifier who identify intent of a question from three topics: '{','.join(self.topics)}'. Only return the intent<|eot_id|>
        <|start_header_id|>user<|end_header_id|>

        {query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

        """
        # prompt = f"Classify the following question into one of these topics: '{','.join(self.topics)}': '{query}'"
        response = self.llm.predict(text=prompt, max_tokens=10)
        topic = response.split('<|end_header_id|>')[-1].strip().lower()
        # topic = response.strip().lower()
        return topic

In [116]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS  
from langchain.agents import Tool
from langchain import PromptTemplate

class ToolManager:
    def __init__(self, llm, movies_vector_path, books_vector_path, embeddings):
        self.llm = llm
        self.movies_vector_path = movies_vector_path
        self.books_vector_path = books_vector_path
        self.embeddings = embeddings
        self.tools = self._initialize_tools()

    def _initialize_tools(self):
        # Load FAISS vector stores for movies and books
        movies_vector_store = FAISS.load_local(self.movies_vector_path, self.embeddings, allow_dangerous_deserialization=True)
        books_vector_store = FAISS.load_local(self.books_vector_path, self.embeddings, allow_dangerous_deserialization=True)

        # Define prompt template
        prompt_template = """If the context is not relevant, 
        please answer the question by using your own knowledge about the topic
        
        {context}
        
        Question: {question}
        """
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

        # Initialize RetrievalQA tools with the FAISS vector stores
        movies_qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=movies_vector_store.as_retriever(search_kwargs={"k": 3}), chain_type_kwargs={"prompt": PROMPT})
        books_qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=books_vector_store.as_retriever(search_kwargs={"k": 3}), chain_type_kwargs={"prompt": PROMPT})

        # Return a dictionary of tools for movies and books
        return {
            "movies": Tool(name="MoviesTool", func=movies_qa.run, description="Retrieve movie information."),
            "books": Tool(name="BooksTool", func=books_qa.run, description="Retrieve book information.")
        }

    def get_tool(self, topic):
        return self.tools.get(topic)


In [117]:
from langchain.memory import ConversationBufferMemory
from langchain.agents import ConversationalChatAgent, AgentExecutor

class ChatAgent:
    def __init__(self, llm, tool_manager):
        self.llm = llm
        self.tool_manager = tool_manager
        self.memory = ConversationBufferMemory(memory_key="chat_history",input_key="input", return_messages=True)
        self.agent = ConversationalChatAgent.from_llm_and_tools(llm=self.llm, tools=list(self.tool_manager.tools.values()), system_message="You are a smart assistant whose main goal is to recommend amazing books and movies to users. Provide helpful, **short** and concise recommendations with a touch of fun!")
        self.chat_agent = AgentExecutor.from_agent_and_tools(agent=self.agent, tools=list(self.tool_manager.tools.values()), verbose=True, memory=self.memory, 
                                                             handle_parsing_errors = True)

    def get_response(self, query, topic_classifier):
        """
        Get the response from the chat agent based on the given query and topic classifier.

        Args:
            query (str): The user's query.
            topic_classifier (TopicClassifier): The topic classifier used to classify the query.

        Returns:
            dict: A dictionary containing the response generated by the chat agent.
                  The response is stored under the key "answer".
        """
        topic = topic_classifier.classify(query)
        tool_name = None if topic == "other" else topic.capitalize() + "Tool"

        try:
            response = self.chat_agent.run(input=query, tool_name=tool_name) if tool_name else self.llm.generate(prompt=query)
        except ValueError as e:
            response = str(e)

        return {"answer": response}

In [118]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        #input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
        request = {'inputs': prompt,
                    "parameters": { "do_sample": True,
                                "top_p": 0.9,
                                "temperature": 0.85,
                                "max_new_tokens": 1024,
                                "stop": ["<|endoftext|>", "</s>"],
                                "early_stopping": True}}

        input_str = json.dumps(request)
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        #return response_json[0]["generated_text"]
    
        if 'generated_text' in response_json[0]:
            return response_json[0]['generated_text']
        #response_json[0]['generated_text']
        else:
            # Return an empty string if the 'generated_text' key does not exist
            return response_json


content_handler = ContentHandler()

In [119]:
#Initialize the ChatOpenAI model
# llm = ChatOpenAI(
#     openai_api_key=OPENAI_API_KEY,
#     model='gpt-3.5-turbo',
#     temperature=OPENAI_TEMPERATURE,
# )

llm=SagemakerEndpoint(
        endpoint_name="huggingface-pytorch-tgi-inference-2024-05-17-22-28-38-164",
        #credentials_profile_name="credentials-profile-name",
        region_name="us-east-1",
        model_kwargs={"temperature": 1e-10},
        content_handler=content_handler,
)




In [125]:
# from langchain import LLMChain
# prompt_template = """If the context is not relevant, 
#         please answer the question by using your own knowledge about the topic
        
        
#         Question: {question}
#         """
# PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# llm_chain = LLMChain(
#      llm=llm,
#      prompt=PROMPT
#  )


# llm_chain.run({"Movie?"})
print(topic_classifier.classify("recommend me something I can read on kindle"))

"books"


In [120]:
from vector_space import EmbeddingManager

# Initialize components
book_vector_store_path = "data/book_vector_store"
movie_vector_store_path = "data/movie_vector_store"
#embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"
#embeddings_model_name = "intfloat/e5-base-v2"
embeddings_model_name = "BAAI/bge-small-en-v1.5"
embeddings = EmbeddingManager(embeddings_model_name).get_embeddings()
tool_manager = ToolManager(llm, movie_vector_store_path, book_vector_store_path,embeddings)
topic_classifier = TopicClassifier(llm)
chat_agent = ChatAgent(llm, tool_manager)



# Step 4: Testing and Iteration
- Prototype Testing: Test the chatbot with a range of queries to ensure it retrieves relevant information and that LLM generates appropriate responses.
- Iterative Improvement: Based on testing result, refine the retrieval strategy, prompt design, and response processing to improve the chatbot's accuracy and user experience.

In [48]:
print("Chatbot is ready to talk! Type 'quit' to exit.")
    
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break

    response = chat_agent.get_response(user_input, topic_classifier)
    print(f"You: {user_input}")
    print(f"Chatbot: {response['answer']}")

Chatbot is ready to talk! Type 'quit' to exit.


You:  movie




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCould not parse LLM output: System: You are a smart assistant whose main goal is to recommend amazing books and movies to users. Provide helpful, **short** and concise recommendations with a touch of fun!
Human: TOOLS
------
Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:

> MoviesTool: Retrieve movie information.
> BooksTool: Retrieve book information.

RESPONSE FORMAT INSTRUCTIONS
----------------------------

When responding to me, please output a response in one of two formats:

**Option 1:**
Use this if you want the human to use a tool.
Markdown code snippet formatted in the following schema:

```json
{
    "action": string, \\ The action to take. Must be one of MoviesTool, BooksTool
    "action_input": string \\ The input to the action
}
```

**Option #2:**
Use this if you want to respond directly to the human. 

KeyboardInterrupt: Interrupted by user

In [34]:
 ChatAgent(llm).get_response('hello')

TypeError: ChatAgent.__init__() missing 1 required positional argument: 'tool_manager'