## Langchain Chain - OpenAI

1. OPENAI
查看是否有額度
- https://platform.openai.com/account/billing/overview
- https://platform.openai.com/usage

2. LANGCHAIN 手冊
https://python.langchain.com/docs/modules/chains/

3. 學習LANGCHAIN -> CHAIN

## 初始環境設定

In [None]:
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path
current_foldr=!pwd
current_foldr=current_foldr[0]
current_foldr

## 確認CUDA版本, 以及否能使用GPU
若無gpu 請點選右側->已連線->變更執行階段類型->T4 Gpu

In [None]:
!nvidia-smi
import torch
torch.cuda.is_available()

## 安裝套件

In [None]:
## For colab
!pip install chromadb cohere gdown kaleido langchain openai pyngrok pypdf python-dotenv sentence-transformers tiktoken -q

### OPENAI API KEY

In [None]:
# OPENAPI KEY method 1

!echo "OPENAI_API_KEY=sk-xxxxxxxxxxxxx" > .env
from dotenv import load_dotenv
load_dotenv() # loads env variables

In [None]:
# OPENAPI KEY  method 2

import os
os.environ["OPENAI_API_KEY"] = "sk-xxxxxxxxxxxxx"

In [None]:
# OPENAPI KEY  method 3

import os
from typing import TextIO
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass()

### 1. LLMChain (llm + prompt)
LLMChain is most basic chain in Langchai

In [None]:
# Library
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

# LLM Model
llm = OpenAI(temperature=0.7)

# Prompt template
prompt = PromptTemplate(
    input_variables=["input"],
    template="what are the 5 most {input} cities in the world?",
)

# Chain
chain=LLMChain(llm=llm,prompt=prompt)

# RUN Chain
result=chain.run('populated')
print(result)

### ^^ 練習, 更換 prompt

In [None]:
## 練習將 prompt 更換為 prompt01 and prompt02
prompt01 = PromptTemplate(
    input_variables=["topic"],
    template="Give me a tweet idea on {topic}?",
)

prompt02 = PromptTemplate(
    input_variables=["topic1", "topic2"],
    template="Give me a tweet idea on {topic1} and {topic2}?",
)

## 2. Sequential Chain  (llm + prompt)
A sequential chain works by combining two or more chains.

In [None]:
# Load library
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.chains import SequentialChain

# LLM Model
llm = OpenAI(temperature=0.7)

# 1. TOPIC Template
template = """Write a blog outline given a topic.

Topic: {topic}"""
prompt = PromptTemplate(input_variables=["topic"], template=template)
outline_chain = LLMChain(llm=llm, prompt=prompt, output_key="outline")
print(prompt.format(topic="寫出台南旅遊規劃大綱"))


# 2. Outline Template
template = """Write a blog article based on the below outline.

Outline:
{outline}"""
prompt = PromptTemplate(input_variables=["outline"], template=template)
article_chain = LLMChain(llm=llm, prompt=prompt, output_key="article")
print(prompt.format(outline="從旅遊規劃大綱, 寫出台南旅遊遊記"))

# Sequential Chain
overall_chain = SequentialChain(
    chains=[outline_chain, article_chain],
    input_variables=["topic"],
    output_variables=["outline", "article"],
    verbose=True)

# Chain Run
result=overall_chain({"topic":"台南旅遊規劃"})

# Result
print(result["topic"])
print(result["outline"])
print(result["article"])

## 3. Retrieval QA chain  (model + prompt + documents + vectordb)

Retrieval QA chain is considered one of the most important helping with doing QA over your document data

In [None]:
!mkdir -p data/pdf/
!gdown 1AldhEWVCtcE50XARgSnXR0azZ965nNmT -O data/pdf/

In [None]:
# Load library
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 文件解析
pdf_file='./data/pdf/e2729e76-29a0-4be5-9eef-67809b05d6b9.pdf'
loader= PyPDFLoader(pdf_file)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

# 向量資料庫
embeddings = OpenAIEmbeddings()
vectortdb = Chroma.from_documents(texts, embeddings)
#DB_PATH = 'vectorstore/db_chroma'
#vectortdb = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=DB_PATH)

# Load DB
#embeddings = OpenAIEmbeddings()
#DB_PATH = 'vectorstore/db_chroma'
#vectortdb = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)

#: Test Search in Vector DB
query = "請說明櫃公司如何進行資產管理?"
source_documents=vectortdb.similarity_search(query, k=3)

for i, doc in enumerate(source_documents):
    page_content=source_documents[i].page_content
    page=source_documents[i].metadata["page"]
    source=source_documents[i].metadata["source"]
    file = os.path.basename(source) 
    print("Source: "+file+", Page "+str(page+1) )
    print(page_content)
    print("\n\n")

In [None]:
#  RetrievalQA Chain 搜尋
#llm=OpenAI(temperature=0.7)
llm = ChatOpenAI(model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectortdb.as_retriever(),
    return_source_documents=True,
    verbose=True
)

# Search
query = "請說明櫃公司如何進行資產管理?"
llm_response = chain(query)
print(llm_response['query'])
print(llm_response['result'])
print(llm_response['source_documents'])

## 4. RetrievalQAWithSourcesChain  (model + prompt + documents + vectordb)
Retrieval QA chain is considered one of the most important helping with doing QA over your document data

In [None]:
# Load library
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 文件解析
pdf_file='./data/pdf/e2729e76-29a0-4be5-9eef-67809b05d6b9.pdf'
loader= PyPDFLoader(pdf_file)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

# 向量資料庫
embeddings = OpenAIEmbeddings()
vectortdb = Chroma.from_documents(texts, embeddings)
#DB_PATH = 'vectorstore/db_chroma'
#vectortdb = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=DB_PATH)

# Load DB
#embeddings = OpenAIEmbeddings()
#DB_PATH = 'vectorstore/db_chroma'
#vectortdb = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)

#: Test Search in Vector DB
query = "請說明櫃公司如何進行資產管理?"
source_documents=vectortdb.similarity_search(query, k=3)

for i, doc in enumerate(source_documents):
    page_content=source_documents[i].page_content
    page=source_documents[i].metadata["page"]
    source=source_documents[i].metadata["source"]
    file = os.path.basename(source) 
    print("Source: "+file+", Page "+str(page+1) )
    print(page_content)
    print("\n\n")

In [None]:
#  RetrievalQAWithSourcesChain Chain 搜尋 + PROMPT
from langchain.chains import RetrievalQAWithSourcesChain

template = '''
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

{summaries}

Respond in the persona of 財務專家

Question: {question}
Answer:
'''

prompt = PromptTemplate(input_variables=["summaries ","question"], template=template)

# Initialise RetrievalQA Chain
chain = RetrievalQAWithSourcesChain.from_chain_type(
    #llm=OpenAI(temperature=0.7),
    llm = ChatOpenAI(model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True),
    chain_type="stuff",
    retriever=vectortdb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
    verbose=True
)

# Search
query = "請說明櫃公司如何進行資產管理?"
llm_response = chain(query)
print(llm_response['question'])
print(llm_response['answer'])
print(llm_response['sources'])
print(llm_response['source_documents'])


In [None]:
# RetrievalQAWithSourcesChain 頁碼解析
import os
source_documents=llm_response['source_documents'];

for i, doc in enumerate(source_documents):
    page_content=(llm_response['source_documents'][i].page_content)
    page=(llm_response['source_documents'][i].metadata["page"])
    source=llm_response['source_documents'][i].metadata["source"]
    file = os.path.basename(source) 
    print("SOURCE: "+file+", PAGE: "+str(page) )

## 5. Create Memory Chain

In [None]:
from langchain import ConversationChain, OpenAI, PromptTemplate, LLMChain
from langchain.memory import ConversationBufferWindowMemory

In [None]:
# Customize the LLM template
template = """Assistant is a large language model trained by OpenAI.

{history}
Human: {human_input}
Assistant:"""

prompt = PromptTemplate(input_variables=["history", "human_input"], template=template)

print(prompt.format(human_input="my_human_input", history="my_history" ))

In [None]:
# Create memory chain1
chain = LLMChain(llm=llm,prompt=prompt,memory=ConversationBufferWindowMemory(k=2))

# Predict a sentence using the chatgpt chain
output = chain.run(human_input="請依序列出聯邦學習的重點")

# Display the model's response
print(output)

In [None]:
# Create memory chain2
output = chain.run(human_input="請將以上的重點做一個結論")

# Display the model's response
print(output)

In [None]:
# Create memory chain3
output = chain.run(human_input="請將以上的總結, 規劃未來執行的方向")

# Display the model's response
print(output)