<a href="https://colab.research.google.com/github/ben854719/Star-Wars-The-Last-Jedi/blob/main/Build_Your_Chatbot_Gemini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Langchaine to google colab.
!pip install --quiet langchain
!pip install --quiet langchain-google-genai
!pip install --quiet chromadb
!pip install langchain-community

In [None]:
# Import Google Key.
import os
import getpass
os.environ['GOOGLE_API_KEY'] = getpass.getpass('GeminiAPI_key')

In [None]:
# import LangChain libraries
from langchain import PromptTemplate
from langchain import hub
from langchain.docstore.document import Document
from langchain.document_loaders import WebBaseLoader
from langchain.schema import StrOutputParser
from langchain.schema.prompt_template import format_document
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import Chroma


In [None]:
# Import the dataset from the web (Star Wars: The Last Jedi)
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Star_Wars:_The_Last_Jedi")
docs = loader.load()

In [None]:
# Extract the text from the website data content.
text_content = docs[0].page_content

# To select the required content.
text_content_0 = text_content.split("==References==")[0]
text_content_1 = text_content_0.split("==External Links==")[0]
text_content_2 = text_content_1.split("==See also==")[0]
final_text = text_content_2.split("==Notes==")[0]
print(final_text)

# Convert the text to LangChain's to the document format.
docs = [Document(page_content=final_text, metadata={"source":"Star Wars: The Last Jedi"})]





In [None]:
# Import Gemini chatbot.
from langchain_google_genai import GoogleGenerativeAIEmbeddings

gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [None]:
# Save to disk.
VectorStore = Chroma.from_documents(
    documents=docs,
    embedding=gemini_embeddings,
    persist_directory="./chroma_db"
)

In [None]:
from re import search
# Load from disk.
VectorStore_disk = Chroma(
    persist_directory="./chroma_db",
    embedding_function=gemini_embeddings
)

retriever = VectorStore_disk.as_retriever(search_kwargs={"k": 1})
print(len(retriever.get_relevant_documents("MMLU")))

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-pro", temparature=0.1, top_p=0.8, top_k=40)

In [None]:
from langchain_core.prompts import PromptTemplate

# Prompt template to query Gemini.
llm_prompt_template = """ You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum. Keep the answer as concise as possible.\n
Question: {question} \nContext: {context} \nAnswer:"""

# Use keyword arguments to initialize PromptTemplate
llm_prompt = PromptTemplate(template=llm_prompt_template, input_variables=["question", "context"])

print(llm_prompt)

In [None]:
# Combine data from documents to readable string format.
def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | llm_prompt
    | llm
    | StrOutputParser()
)


In [None]:
# Prompt the model. I am ready to ask question to Gemini about Star Wars: The Last Jedi
rag_chain.invoke("What is the movie about?")

In [None]:
rag_chain.invoke("What is the main character in the movie?")

In [None]:
rag_chain.invoke("Where was the movie shot?")

In [None]:
rag_chain.invoke("Who is the producer and the director of the movie?")

In [None]:
rag_chain.invoke("What was the rating of the movie?")

In [None]:
rag_chain.invoke("Did the movie have any special effects?")

In [None]:
rag_chain.invoke("What are the names of the characters of the movie?")