In [1]:
!pip install langchain
!pip install sentence-transformers
!pip install pypdf
!pip install chromadb
!pip install -q -U bitsandbytes
!pip install ctransformers[cuda]
!pip install safetensors
!pip install accelerate

Collecting langchain
  Downloading langchain-0.0.350-py3-none-any.whl (809 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.1/809.1 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.2 (from langchain)
  Downloading langchain_community-0.0.3-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1 (from langchain)
  Downloading langchain_core-0.1.0-py3-none-any.whl (189 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.1/189.1 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langsmith<0.1.0,>=0.0.63 (from langchain)
  Downloading langsmith-0.

In [2]:
import torch
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [3]:
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

In [4]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
model4bit = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)

config.json:   0%|          | 0.00/545 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [5]:
pipeline = pipeline(
    "text-generation",
    model=model4bit,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=1500,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id= tokenizer.eos_token_id
)

In [6]:
from langchain import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=pipeline)

In [7]:
from langchain import PromptTemplate, LLMChain

template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
Answer the question below from the context below:
{context}
{question} [/INST]
"""
question_p = """What is the date for the announcement"""
context_p = """ On August 10, it was announced that its subsidiary, JSW Neo Energy, has agreed to acquire a portfolio encompassing 1753 megawatts of renewable energy generation capacity from Mytrah Energy India Pvt Ltd for Rs 10,530 crore."""

prompt = PromptTemplate(template=template, input_variables=["question", "context"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
response = llm_chain.run({"question": question_p, "context": context_p})

In [8]:
response

'\nAugust 10'

In [9]:
import chromadb
from chromadb.config import Settings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [12]:
from langchain.document_loaders import PyPDFLoader
pdf_loader = PyPDFLoader('/content/drive/MyDrive/ResearchProjects/LLMChat/Rsys_India_EmployeeHandbook_01_Apr_2019_Ver3.1 1.pdf')
pages = pdf_loader.load()

In [13]:
print(len(pages))

33


In [14]:
print(pages[0].metadata)

{'source': '/content/drive/MyDrive/ResearchProjects/LLMChat/Rsys_India_EmployeeHandbook_01_Apr_2019_Ver3.1 1.pdf', 'page': 0}


In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
splits = text_splitter.split_documents(pages)

In [16]:
st_model_id = "sentence-transformers/all-mpnet-base-v2"

In [17]:
model_kwargs = {"device": "cuda"}

In [18]:
embedding_model = HuggingFaceEmbeddings(model_name=st_model_id, model_kwargs=model_kwargs)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [19]:
chroma_db = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [20]:
retriever = chroma_db.as_retriever()

In [21]:
retriever_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, verbose=True)

In [22]:
text_query = "how does the performance of an employee analysed in Radisys"
retriever_qa.run(text_query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


" The performance of an employee at Radisys is analyzed on a monthly basis by the manager to review the employee's progress vis-à-vis the set goals, provide feedback, and identify developmental needs. The formal review process includes a mid-term review called 'Positive Touch Base' followed by an Annual Performance Review, which is the basis for annual salary revision. The company recognizes excellent performance and achievement through rewards that are creative, flexible, and meaningful. Radisys is an Equal Opportunity Employer and makes employment decisions on the basis of merit, prohibiting any form of harassment and ensuring equal opportunity in all aspects of employment."

In [24]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [28]:
!pip install gradio==3.48

Collecting gradio==3.48
  Downloading gradio-3.48.0-py3-none-any.whl (20.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.3/20.3 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
Collecting gradio-client==0.6.1 (from gradio==3.48)
  Downloading gradio_client-0.6.1-py3-none-any.whl (299 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m299.2/299.2 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: gradio-client, gradio
  Attempting uninstall: gradio-client
    Found existing installation: gradio_client 0.7.2
    Uninstalling gradio_client-0.7.2:
      Successfully uninstalled gradio_client-0.7.2
  Attempting uninstall: gradio
    Found existing installation: gradio 4.9.0
    Uninstalling gradio-4.9.0:
      Successfully uninstalled gradio-4.9.0
Successfully installed gradio-3.48.0 gradio-client-0.6.1


In [26]:
def llm_agent(prompt:str) -> str:
    return retriever_qa.run(prompt)

In [29]:
import gradio as gr
iface = gr.Interface(fn=llm_agent, inputs=gr.inputs.Textbox(lines=5, placeholder="Type your query here..."),
                     outputs=gr.outputs.Textbox(), title="Radisys HR Tool",
                     description="Get answers to all your queries realted to your company.")

iface.launch()

  iface = gr.Interface(fn=llm_agent, inputs=gr.inputs.Textbox(lines=5, placeholder="Type your query here..."),
  iface = gr.Interface(fn=llm_agent, inputs=gr.inputs.Textbox(lines=5, placeholder="Type your query here..."),
  iface = gr.Interface(fn=llm_agent, inputs=gr.inputs.Textbox(lines=5, placeholder="Type your query here..."),
  outputs=gr.outputs.Textbox(), title="Radisys HR Tool",


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5cd3611a1c66f7b39f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


