# Conversational Chatbots

## Installing essential libraries

In [None]:
!pip install -qU langchain langchain-openai langchain-google-genai

## Storing API

- Get OpenAI API key: https://platform.openai.com/account/api-keys
- Get Google AI API key: https://ai.google.dev/

In [None]:
import os
from getpass import getpass

os.environ['OPENAI_API_KEY'] = "sk-"

In [None]:
os.environ['GOOGLE_API_KEY'] = ""

## Using models from OpenAI and Google

In [None]:
# Using OpenAI Models (GPT 3.5)

from langchain_openai import ChatOpenAI
gpt3_model = ChatOpenAI(model = "gpt-3.5-turbo-0125" , temperature=0.0)


# Using Google Models (Gemini Pro)
from langchain_google_genai import ChatGoogleGenerativeAI
gemini_model = ChatGoogleGenerativeAI(model = "gemini-pro")

In [None]:
gpt3_model.invoke('Who is the first black president of USA?')

AIMessage(content='Barack Obama is the first black president of the United States.', response_metadata={'finish_reason': 'stop', 'logprobs': None})

In [None]:
gpt3_model.invoke("When was he born?")

AIMessage(content="I'm sorry, could you please provide me with the name of the person you are referring to so I can give you an accurate answer?", response_metadata={'finish_reason': 'stop', 'logprobs': None})

In [None]:
gemini_model.invoke("Who is the first black president of USA?")
# gemini_model.invoke("Who is the first African-American president of USA?")

BlockedPromptException: block_reason: SAFETY
safety_ratings {
  category: HARM_CATEGORY_SEXUALLY_EXPLICIT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_HATE_SPEECH
  probability: HIGH
}
safety_ratings {
  category: HARM_CATEGORY_HARASSMENT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_DANGEROUS_CONTENT
  probability: NEGLIGIBLE
}


In [None]:
gemini_model.invoke("Who is the first African-American president of USA?")

AIMessage(content='Barack Obama', response_metadata={'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]})

In [None]:
gemini_model.invoke("When was he born?")

AIMessage(content="The context does not provide information about the person's birth date, so I cannot answer this question from the provided context.", response_metadata={'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]})

In [None]:
gemini_model.invoke("When did World War I start?")

Adding memory to the chain

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory

In [None]:
memory = ConversationBufferMemory(k = 30)
# memory = ConversationSummaryMemory()

conversation = ConversationChain(
    llm=gpt3_model,
    memory = memory
)


In [None]:
conversation.predict(input='Who is the first black president of USA?')

'The first black president of the United States was Barack Obama. He served as the 44th president from 2009 to 2017. He was born on August 4, 1961, in Honolulu, Hawaii. Obama is a member of the Democratic Party and was the first African American to hold the office of President of the United States.'

In [None]:
conversation.predict(input = "When was he born?")

'Barack Obama was born on August 4, 1961.'

In [None]:
conversation.predict(input = "When did his tenure end?")

## Document Loaders

Combining language models with your own text data is a powerful way to differentiate them. The first step in doing this is to load the data into “documents” - a fancy way of say some pieces of text.

https://python.langchain.com/docs/modules/data_connection/document_loaders/

Installing required loaders

In [None]:
!pip install pypdf

In [None]:
from langchain.document_loaders import PyPDFLoader, TextLoader, NotionDirectoryLoader

pdf_path = "/content/financial_crisis.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
print(pages[0].page_content)

## Embeddings and Indexes

Embeddings and indexes are essential components in the Langchain framework for working with large language models (LLMs). They help in structuring documents and representing information in a numerical format to facilitate interaction with LLMs.

In [None]:
!pip install -qU tiktoken faiss-cpu

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import FAISS

faiss_index = FAISS.from_documents(pages, embeddings)

In [None]:
import re

query = "What is the document about?"
num_results = 2  # Number of results you want to retrieve
search_type = "similarity"  # Choose between "similarity" and "hybrid"

results = faiss_index.search(query, search_type, k=num_results)

for result in results:
    output_text = re.sub(r'(?<=\w) (?=\w)', '', result.page_content)
    print(output_text)