https://python.langchain.com/docs/use_cases/question_answering/chat_history


In [29]:
# Requires:
# !pip install langchain==0.0.350
# !pip install docarray tiktoken
# !pip install openai
# !pip install bs4
# !pip install chromadb
# !pip install langchainhub
# !pip install faiss-cpu

# import libraries

In [4]:
import os
import yaml
import openai
# import getpass

# import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.prompts import ChatPromptTemplate

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS

# set OpenAI key

In [6]:
with open('../api.yaml', 'r') as f:
    api = yaml.safe_load(f)
openai.api_key = api['openai']
os.environ['OPENAI_API_KEY'] = api['openai']

# read texts of a lecture meeting

In [8]:
with open('../text_files/softbank_world_2023_whisper_large.txt', 'r') as f:
    text_all = f.read()

# texts split

In [9]:
text_splitter = CharacterTextSplitter(
    chunk_size=300, 
    chunk_overlap=20)
splits_texts = text_splitter.split_text(text_all)

# create vector sotre

In [10]:
vectorstore = FAISS.from_texts(
    texts=splits_texts,  
    embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

# create prompt

In [11]:
template = """Answer the question in Japanese,  based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# set llm model

In [12]:
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0)

# create first chain for parallel sequence

In [13]:
setup_and_retrieval = RunnableParallel(
    {'context': retriever, 'question': RunnablePassthrough()}
)

# set rag_chain

In [14]:
rag_chain = setup_and_retrieval | prompt | llm | StrOutputParser()

# check input schema

In [15]:
rag_chain.input_schema.schema()

{'title': 'RunnableParallel<context,question>Input', 'type': 'string'}

In [16]:
rag_chain.invoke("講演者はAGIがいつ頃実現しそうだと言っていますか?日本語で答えてください。")

'講演者はAGIが10年以内に実現されるであろうと言っています。'

In [17]:
rag_chain.invoke('講演者はチャットGPTを使ったことがない人に何をした方がいいと言っていますか？')

'講演者は、チャットGPTを使ったことがない人に対して、「人生を悔い改めた方がいい」と言っています。また、「自分自身の人生を自分は一体どういう考えで今まで生きてきたんだと」と自問自答するように促しています。'