In [None]:
# 初始環境設定
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin:/usr/ubuntu_bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path

In [None]:
## 安裝套件
!pip install sentence-transformers -q
!pip install pypdf pyngrok  openai gdown chainlit langchain chromadb pdfplumber tiktoken  cohere -q

In [None]:
# read pdf
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

pdf_file='./data/1Q23-EPR-with-Tables-FINAL.pdf'
loader= PyPDFLoader(pdf_file)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100
)
docs = text_splitter.split_documents(documents)

for i, doc in enumerate(docs):
    doc.metadata["source"] = f"source_{i}"

In [None]:
# embedding
from langchain.embeddings import HuggingFaceEmbeddings

Embeddings_ID="/work/u00cjz00/slurm_jobs/github/models/embedding/all-MiniLM-L6-v2"
embeddings=HuggingFaceEmbeddings(model_name=Embeddings_ID)

In [None]:
# DB
from langchain.vectorstores import Chroma

persist_directory="./chromadb/"
search_engine = Chroma.from_documents(
    documents=docs, # text data that you want to embed and store
    embedding=embeddings, # used to convert the documents into embeddings
    #persist_directory=persist_directory, # this tells Chroma where to store its data
    collection_name="mydb" #  gives a name to the collection of embeddings, which will be helpful for retrieving specific groups of embeddings later.
)

In [None]:
%%bash
# 3. Download model
mkdir -p Llama-7B-Chat-GPTQ
HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download TheBloke/Llama-2-7B-Chat-GPTQ --local-dir Llama-7B-Chat-GPTQ --local-dir-use-symlinks False

In [None]:
# llm model
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

MODEL_ID = "./Llama-7B-Chat-GPTQ"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0})

In [None]:
# RAG Chain
from langchain.chains import RetrievalQAWithSourcesChain
from c01_prompts import EXAMPLE_PROMPT, PROMPT, WELCOME_MESSAGE
chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=search_engine.as_retriever(max_tokens_limit=4097),
    return_source_documents=True
)

In [None]:
## DO QA Search
query = "What's the results for the reporter quarter? Please describe in the following order using bullet points - revenue, gross margin, opex, op margin, net income, and EPS. INclude both gaap and non-gaap numbers. Please also include quarter over quarter changes."
llm_response = chain(query)
llm_response

In [None]:
## DO QA Search
query = "What's the results for the reporter quarter? Please describe in the following order using bullet points - revenue, gross margin, opex, op margin, net income, and EPS. INclude both gaap and non-gaap numbers. Please also include quarter over quarter changes."
llm_response = chain(query)
print(llm_response['question'].strip())
print('\n\n\n')
print(llm_response['answer'].strip())
print('\n\n\n')
print(llm_response['sources'].strip())
print('\n\n\n')
print(llm_response['source_documents'])

## DEMO

In [None]:
!chainlit run c01_app.py -w

In [None]:
!ngrok config add-authtoken 1wbrOL2j8XoIkeqLu26tSNIOb6D_4cq3RGaNcGTt7v6G4nQX5

In [None]:
from pyngrok import ngrok
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)

In [None]:
ngrok.kill()

In [None]:
!ps -ef |grep chainlit | awk '{print $2}' | xargs kill -9
!ps -ef |grep ngrok | awk '{print $2}' | xargs kill -9