In [None]:
!pip install llama-index #install llama-index because its used for Retrieval Augmented Generation(RAG)
!pip install openai
!pip install langchain-community #install langchain-community because its used for CSVLoader to
!pip install langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.3.21-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-core<1.0.0,>=0.3.64 (from langchain-openai)
  Downloading langchain_core-0.3.64-py3-none-any.whl.metadata (5.8 kB)
Collecting langsmith<0.4,>=0.3.45 (from langchain-core<1.0.0,>=0.3.64->langchain-openai)
  Downloading langsmith-0.3.45-py3-none-any.whl.metadata (15 kB)
Downloading langchain_openai-0.3.21-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.2/65.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.64-py3-none-any.whl (438 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.1/438.1 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langsmith-0.3.45-py3-none-any.whl (363 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.0/363.0 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langsmith, langchain-core

In [None]:
import openai
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
import pandas as pd
import os
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import random
from langchain.memory import ChatMessageHistory #Used to store conversations in a memory list
from langchain_core.runnables.history import RunnableWithMessageHistory #RunnableWithMessageHistory is used to read and update the chat message history that contains the entire conversation
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder #ChatPromptTemplate is used to create prompt templates for chat models, while MessagesPlaceholder is a prompt template that assumes a variable is already in the list of messages
from llama_index.core.evaluation import (
    DatasetGenerator,
    FaithfulnessEvaluator, #Faithfulness evaluator is used to find out if a response was hallucinated by measuring if a response from a query engine matches any source nodes
    RelevancyEvaluator
)
from langchain_community.document_loaders.csv_loader import CSVLoader #used to convert each row in the CSV file to a document where 1 document is 1 row in the CSV file
from google.colab import drive #import drive so we can use the titanic train data set in this notebook
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_path = "drive/MyDrive/RAG_practice_AI_Dev_Data_Folder"
data = pd.read_csv("drive/MyDrive/RAG_practice_AI_Dev_Data_Folder/train.csv")
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
documents = SimpleDirectoryReader(data_path).load_data()#load the csv file to llama index so it'll have an id_ attribute so we can use DatasetGenerator on it
print("Attributes of documents[0] is:", dir(documents[0]))
print("The attributes of documents is:", dir(documents))
print("Value of documents[0] is:", documents[0])

Attributes of documents[0] is: ['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_computed_fields__', '__pydantic_core_schema__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_setattr_handlers__', '__pydant

In [None]:
num_eval_questions = 25

eval_documents = documents[0:20]#Get 20 documents from the list of documents
data_generator = DatasetGenerator.from_documents(eval_documents)#Generate dataset from documents
eval_questions = data_generator.generate_questions_from_nodes()#Generate questions for each document
eval_questions
#k_eval_questions = random.sample(eval_questions, num_eval_questions)

In [None]:
#Create store list which stores chat message history to keep track of the conversation
store = {}

def get_chat_history(session_id: str):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [None]:
#Create a chat prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant."),#The system will say You are a helpful AI assistant.
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}")#The person will say whatever is put the message_here part of prompt.invoke("input": " message_here")
])

prompt_val = prompt.invoke(
   {
       "history": [("human", "what's 5 + 2"), ("ai", "5 + 2 is 7")], #Set the value for MessagesPlaceholder(variable_name="history"), to this
       "input": "now multiply that by 4" #Set the value for ("human", "{input}")
   }
)
#prompt_val = prompt.invoke({"input": "Hello there"})
#prompt.format_messages(
 #   history=[
  #      ("system", "You are an AI assistant."),
   #     ("human", "Hello!"),
    #])
prompt_val

ChatPromptValue(messages=[SystemMessage(content='You are a helpful AI assistant.', additional_kwargs={}, response_metadata={}), HumanMessage(content="what's 5 + 2", additional_kwargs={}, response_metadata={}), AIMessage(content='5 + 2 is 7', additional_kwargs={}, response_metadata={}), HumanMessage(content='now multiply that by 4', additional_kwargs={}, response_metadata={})])

In [None]:
#Instantiate the OpenAI API key as an environment variable because OpenAIEmbeddings() function doesn't take the api key as a parameter
api_key = ""
os.environ["OPENAI_API_KEY"] = api_key
#llm = OpenAI(model="gpt-4-turbo-preview")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [None]:
response = llm.complete("What gender is Mr Owens Harris?")
print(response)

AttributeError: 'ChatOpenAI' object has no attribute 'complete'

In [None]:
chain_with_history = RunnableWithMessageHistory(
    chain,
    get_chat_history,
    input_messages_key="input",
    history_messages_key="history"
)