In [1]:
!pip -q install langchain tiktoken chromadb pypdf transformers InstructorEmbedding sentence-transformers
!pip -q install accelerate bitsandbytes==0.39.0 sentencepiece Xformers
!pip -q install -U git+https://github.com/huggingface/peft.git
!pip -q install -U einops==0.6.1
!pip -q install jq
!pip -q install llama-cpp-python

In [2]:
!pip show langchain

Name: langchain
Version: 0.0.247
Summary: Building applications with LLMs through composability
Home-page: https://www.github.com/hwchase17/langchain
Author: 
Author-email: 
License: MIT
Location: /opt/conda/lib/python3.10/site-packages
Requires: aiohttp, async-timeout, dataclasses-json, langsmith, numexpr, numpy, openapi-schema-pydantic, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 


## QA Retrieval No Open AI - WizardLM




# LangChain multi-doc retriever with ChromaDB

***New Points***
- Multiple Files - PDFs
- ChromaDB
- Local LLM
- Instuctor Embeddings


In [3]:
!wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin

--2023-07-29 05:10:59--  https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin
Resolving huggingface.co (huggingface.co)... 65.8.11.59, 65.8.11.106, 65.8.11.102, ...
Connecting to huggingface.co (huggingface.co)|65.8.11.59|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/30/e3/30e3aca7233f7337633262ff6d59dd98559ecd8982e7419b39752c8d0daae1ca/45833e0b59c8fe80676c664f556031fc411da8856e0716ac7b8ed201b7221c08?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-chat.ggmlv3.q2_K.bin%3B+filename%3D%22llama-2-7b-chat.ggmlv3.q2_K.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1690866660&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MDg2NjY2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8zMC9lMy8zMGUzYWNhNzIzM2Y3MzM3NjMzMjYyZmY2ZDU5ZGQ5ODU1OWVjZDg5ODJlNzQxOWIzOTc1MmM4ZDBkYWFlM

## Setting up LangChain


In [4]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.llms import LlamaCpp

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

## Load multiple and process documents

In [6]:
from langchain.document_loaders import JSONLoader

import json
from pathlib import Path
from pprint import pprint

file_path = 'pjpi.jsonl'

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["DOI"] = record.get("DOI")
    metadata["Citation Title"] = record.get("Citation Title")
    return metadata

loader = JSONLoader(
    file_path=file_path,
    jq_schema='.',
    content_key="Abstract",
    metadata_func=metadata_func,
    json_lines=True,
)

data = loader.load()

pprint(data[:5])

# documents = loader.load()

[Document(page_content="Work engagement has a contribution to the success of an organization. Governmental institutions are often criticized for showing less satisfying performance and less effective services due to the misconducts of the civil servants such as coming late to work, leaving at working hours, and being less enthusiastic at work. This study aimed to examine civil servants' working conditions using three variables, namely leader-member exchange, psychological well-being, and work engagement. The measuring tools used referred to the Leader-Member Exchange Multidimensional (Liden &amp; Maslyn, 1998), Psychological Well-being Scale (Ryff, 1989) and Utrecht Work Engagement Scale (Schaufeli &amp; Bakker, 2004). There were 80 civil servants involved in this study as the subjects. They were selected using simple random sampling technique. Statistical techniques used in this study Structural Equation Modeling-Partial Least Square (SEM-PLS) with Warp PLS 5.0 Software. The result in

In [7]:
len(data)

77

In [8]:
#splitting the text into
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
texts = text_splitter.split_documents(data)

## HF Instructor Embeddings

In [9]:
from langchain.embeddings import LlamaCppEmbeddings

llama_embeddings = LlamaCppEmbeddings(model_path="llama-2-7b-chat.ggmlv3.q2_K.bin")

llama.cpp: loading model from llama-2-7b-chat.ggmlv3.q2_K.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 10 (mostly Q2_K)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 3035.66 MB (+  512.00 MB per state)
llama_new_context_with_model: kv self size  =  512.00 MB
AVX = 1 | AVX

## create the DB

In [10]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the nmew embeddings being used
embedding = llama_embeddings

vectordb = Chroma.from_documents(
    documents=texts,
    embedding=embedding,
    persist_directory=persist_directory,
)


llama_print_timings:        load time =  1159.31 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  5328.75 ms /    32 tokens (  166.52 ms per token,     6.01 tokens per second)
llama_print_timings:        eval time =   143.53 ms /     1 runs   (  143.53 ms per token,     6.97 tokens per second)
llama_print_timings:       total time =  5490.98 ms

llama_print_timings:        load time =  1159.31 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time = 10137.64 ms /    44 tokens (  230.40 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time = 10164.92 ms

llama_print_timings:        load time =  1159.31 ms
llama_print_timings:   

## Make a retriever

In [11]:
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

## Make a chain

In [13]:
from langchain.prompts import PromptTemplate

llm = LlamaCpp(
    model_path="llama-2-7b-chat.ggmlv3.q2_K.bin",
    input={"temperature": 0.75, "max_length": 2000, "top_p": 1},
    verbose=True,
)

# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    # chain_type_kwargs={"prompt": PROMPT}
    )

llama.cpp: loading model from llama-2-7b-chat.ggmlv3.q2_K.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 10 (mostly Q2_K)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 3035.66 MB (+  256.00 MB per state)
llama_new_context_with_model: kv self size  =  256.00 MB
AVX = 1 | AVX

In [14]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:


In [18]:
## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['Citation Title'])
        print(source.metadata['DOI'])

In [19]:
# full example
query = "Does work engagement has a contribution to the success of an organization?"
llm_response = qa_chain(query)
process_llm_response(llm_response)


llama_print_timings:        load time =  1159.31 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  1884.92 ms /    15 tokens (  125.66 ms per token,     7.96 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1889.72 ms
Llama.generate: prefix-match hit


 Yes, I agree that work engagement is a predictor of successful outcomes.


Sources:
Forgiveness as a mediator on the effect of self-compassion on the ego depletion
10.21580/pjpp.v4i2.3814
Resilience in health workers: The role of social support  and calling
10.21580/pjpp.v7i1.9287
The effects of positive spillover and work-family conflict on female academics’ psychological well-being
10.21580/pjpp.v4i2.3522
Effect of safety climate on safety behavior in employees: The mediation of safety motivation
10.21580/pjpp.v4i2.3346
Pay it forward: Can perceived behavioral control to pass on scholarship aid be predicted by various narcissism?
10.21580/pjpp.v7i2.11768



llama_print_timings:        load time =  1144.49 ms
llama_print_timings:      sample time =    20.00 ms /    18 runs   (    1.11 ms per token,   899.78 tokens per second)
llama_print_timings: prompt eval time = 31055.22 ms /   244 tokens (  127.28 ms per token,     7.86 tokens per second)
llama_print_timings:        eval time =  2991.50 ms /    17 runs   (  175.97 ms per token,     5.68 tokens per second)
llama_print_timings:       total time = 34192.40 ms


In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)