In [None]:
# Installing the packages needed for building the code.

!python.exe -m pip install --upgrade pip
%pip install -q -f -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
%pip install -q -f -U transformers
%pip install -q -f -U langchain
%pip install -q -f -U accelerate
%pip install -q -f -U sentencepiece
%pip install -q -f -U tiktoken
%pip install -q -f -U sentence_transformers
%pip install -q -f -U pandas
%pip install -q -f -U tabulate

# This code creates the environment for Langchain to use your local LLM as a Chat Model

In [None]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
from langchain.chat_models.base import BaseChatModel
from langchain.schema import BaseMessage, AIMessage, HumanMessage, SystemMessage, ChatResult, ChatGeneration
from typing import Optional, List
import pandas as pd

In [None]:
class LocalChatModel(BaseChatModel):
    tokenizer: LlamaTokenizer
    model: LlamaForCausalLM
    device: str
    other_kwargs: dict

    def get_prompt(self, messages: List[BaseMessage])->str:
        prompt = []
        for message in messages:
            if isinstance(message, SystemMessage):
                prepend = "SYSTEM: "
            elif isinstance(message, HumanMessage):
                prepend = "USER: "
            elif isinstance(message, AIMessage):
                prepend = "ASSISTANT: "
            prompt.append(prepend + message.content)
        prompt.append("ASSISTANT: ")
        return "\n".join(prompt)



    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]]=None)->ChatResult: # type: ignore
        # print(messages)
        prompt = self.get_prompt(messages)
        # print(prompt)
        inputs = self.tokenizer(prompt, return_tensors='pt') # type: ignore

        outputs = self.model.generate(inputs.input_ids.to(self.device), **self.other_kwargs) # type: ignore
        generated_text = self.tokenizer.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0] # type: ignore
        ai_message = AIMessage(content=generated_text.strip())
        chat_result = ChatResult(generations=[ChatGeneration(message=ai_message)])
        #print(chat_result)
        return chat_result

    def _agenerate(self):
        pass
    
    def _llm_type(self):
        pass


In [None]:
model_path = "./models/Llama-2-7b-chat-hf" # You will need to download and place the model files in the working directory / models / Llama-2-7b-chat-hf folder

In [None]:
tokenizer = LlamaTokenizer.from_pretrained(model_path)

In [None]:
model = LlamaForCausalLM.from_pretrained(
    model_path,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map='auto',
    local_files_only=True
)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device) # type: ignore

In [None]:
# Sample code to check if the LLM is working

chat = LocalChatModel(tokenizer=tokenizer, model=model, device=device, other_kwargs=dict(max_new_tokens=512))
response = chat([
    SystemMessage(content="You are a helpful assistant that specializes in Python code. Once answer is given, do not add another USER query."),
    HumanMessage(content="Program that prints hello world.")
]).content

In [None]:
print(response)

# From here begins the process of loading the csv to Chroma DB
## For this section, I found the below link very helpful
### https://towardsai.net/p/machine-learning/query-your-dataframes-with-powerful-large-language-models-using-langchain

In [None]:
df = pd.read_csv('employee_reviews.csv')

In [None]:
df.head()

In [None]:
df['Merged Column'] = "Feedback for the employee in department of " + df['Department'] + ". It's strengths are " + df['Strengths'] + ". These are the weaknesses, " + df['Weaknesses'] + '. Apart from this, here are some training needs ' + df['Training Needs']

In [None]:
print(df['Merged Column'][3])

In [None]:
from langchain.document_loaders import DataFrameLoader

In [None]:
from langchain.vectorstores import Chroma

In [None]:
df_loader = DataFrameLoader(df, page_content_column='Merged Column')

In [None]:
df_document = df_loader.load()
display(df_document)

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings


In [None]:
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
texts = text_splitter.split_documents(df_document)

In [None]:
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
chromadb_index = Chroma.from_documents(
 texts, embedding_function, persist_directory='./input'
)

# From here we start querying the data

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

In [None]:
retriever = chromadb_index.as_retriever()

In [None]:
document_qa = RetrievalQA.from_chain_type(
 llm=chat, chain_type="stuff", retriever=retriever
)

In [None]:
response = document_qa.run("What training needs are needed for people in Admin?")

In [None]:
print(response)

# As you can see I start getting results but the quality still needs improvement so trying a QA Chain instead

In [None]:
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=chat,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True,
                                  verbose=True)

In [None]:
query = "What are the training needs for operations?"
llm_response = qa_chain(query)


In [None]:
print(llm_response['result'])

# This code is still not perfect but I feel it's in the right direction.
# Happy to get inputs from my peers here if there's a better way to approach this.