In [24]:
import os
import io
import requests
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.document_loaders import PyPDFLoader
from PyPDF2 import PdfReader

from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import LlamaCppEmbeddings
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.memory import ConversationBufferMemory
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA

from langchain.agents import AgentType, Tool, initialize_agent

from dotenv import load_dotenv

load_dotenv()

True

In [6]:
MODEL_PATH = "/mnt/c/Users/Danny/Downloads/mistral-7b-instruct-v0.2.Q5_K_M.gguf"

In [9]:
model = LlamaCpp(
        model_path=MODEL_PATH,
        temperature=0.75,
        n_gpu_layers = -1,
        n_batch = 512,
        max_tokens=2000,
        top_p=1,
        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
        verbose=True,  # Verbose is required to pass to the callback manager
        n_ctx=2048
    )

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /mnt/c/Users/Danny/Downloads/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader

### PDF

In [2]:
from typing_extensions import Concatenate
loader = PdfReader('/mnt/c/Users/Danny/Downloads/V-Cred Offer Letter - Danial Mirza.pdf')
raw_text = ''
for i, page in enumerate(loader.pages):
    content = page.extract_text()
    if content:
        raw_text += content
raw_text



In [3]:
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [7]:
document_search = Chroma.from_texts(texts, LlamaCppEmbeddings(model_path=MODEL_PATH))

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /mnt/c/Users/Danny/Downloads/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader

In [10]:
from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(llm=model, chain_type="stuff")

In [11]:
query = "What will happen if resign after 2 weeks?"
docs = document_search.similarity_search(query)
chain.run(input_documents=docs, question=query)


llama_print_timings:        load time =    1021.87 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =    1121.39 ms /     8 tokens (  140.17 ms per token,     7.13 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =    1122.52 ms /     9 tokens
  warn_deprecated(


 Based on the information provided in the employment agreement, if you resign after two weeks, your employment may be terminated without notice by the Company. Additionally, you may not be entitled to any severance pay or other benefits. It is recommended that you carefully review your employment agreement and consult with a legal professional if you have any questions or concerns regarding your resignation or the potential consequences thereof.


llama_print_timings:        load time =   41865.54 ms
llama_print_timings:      sample time =      13.42 ms /    80 runs   (    0.17 ms per token,  5961.70 tokens per second)
llama_print_timings: prompt eval time =   77729.81 ms /   953 tokens (   81.56 ms per token,    12.26 tokens per second)
llama_print_timings:        eval time =   22497.40 ms /    79 runs   (  284.78 ms per token,     3.51 tokens per second)
llama_print_timings:       total time =  100571.38 ms /  1032 tokens


' Based on the information provided in the employment agreement, if you resign after two weeks, your employment may be terminated without notice by the Company. Additionally, you may not be entitled to any severance pay or other benefits. It is recommended that you carefully review your employment agreement and consult with a legal professional if you have any questions or concerns regarding your resignation or the potential consequences thereof.'

In [14]:
search = GoogleSearchAPIWrapper()


def top5_results(query):
    return search.results(query, 5)


tools = Tool(
    name="Google Search Snippets",
    description="Search Google for recent results.",
    func=top5_results,
)

In [15]:
tools.run("Obama's first name?")[0]

{'title': "Barack Obama's birth certificate",
 'link': 'https://obamawhitehouse.archives.gov/sites/default/files/rss_viewer/birth-certificate-long-form.pdf',
 'snippet': 'Full Maiden Name of Mother. STANLEY. 15. Age of Mother 16. 18. Yes. No. 9. Race of Father. OBAMA. African. Usual Occupation. 12b. Student. DUNHAM. Kind of\xa0...'}

In [28]:
memory = ConversationBufferMemory(return_messages=True)

In [32]:
str(docs[0].page_content)

"• Termination:  \no Either party may terminate this employment arrangement  at any time  by giving \nthe other  written notice as follows:  \n▪ The employee may terminate the employment by providing the \nCompany with one (1) month's written notice of termination . \n▪ The Company reserves the right to terminate the employment with \none (1) week's written notice.  \no Notwithstanding the above, this employment  may be terminated forthwith by \nthe Company  without notice if you are guilty of inter alia misconduct, dereliction \nof duty, or any other reason deemed reasonable at the Company’s sole \ndiscretion.  \no Upon termination, the salary will be pro -rated using the following calculation:  \nDay rate = Salary/working days in a month  \nPro-rated salary = Day rate x number of days worked in a month  \no Upon termination, you agree not to disclose or make use of any confidential \ninformation acquired in relation to your employment and will no longer make \nyourself out as having 

### Internet

In [21]:
search_result = RetrievalQA.from_chain_type(
    llm=model, chain_type="stuff", retriever=document_search.as_retriever()
)

In [23]:
tools = [Tool(
    name="Google Search Snippets",
    description="Search Google for recent results.",
    func=top5_results,
)]
tools += [
    Tool(
        name="Google Search Result",
        func=search_result.run,
        description="useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.",
    ),   
]


In [25]:
agent = initialize_agent(
    tools, model, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)
agent.run(
    "What is V-Cred?"
)

  warn_deprecated(
Llama.generate: prefix-match hit




[1m> Entering new AgentExecutor chain...[0m
 I need to find out what V-Cred is
Action: Google Search Result
Action Input: V-Cred definition


llama_print_timings:        load time =   41865.54 ms
llama_print_timings:      sample time =       4.78 ms /    28 runs   (    0.17 ms per token,  5856.52 tokens per second)
llama_print_timings: prompt eval time =   19320.92 ms /   190 tokens (  101.69 ms per token,     9.83 tokens per second)
llama_print_timings:        eval time =    7390.72 ms /    27 runs   (  273.73 ms per token,     3.65 tokens per second)
llama_print_timings:       total time =   26815.06 ms /   217 tokens


[32;1m[1;3m I need to find out what V-Cred is
Action: Google Search Result
Action Input: V-Cred definition[0m


llama_print_timings:        load time =    1021.87 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =     551.26 ms /     5 tokens (  110.25 ms per token,     9.07 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =     551.89 ms /     6 tokens
Llama.generate: prefix-match hit


 V-Cred is a term used in the field of Natural Language Processing (NLP) and Machine Learning (ML) for credential verification. V-Cred refers to a system or model designed to analyze and validate various types of credentials such as degrees, certificates, licenses, and professional memberships. The V-Cred system uses advanced NLP and ML techniques such as named entity recognition (NER), part-of-speech (POS) tagging, and deep learning models to extract relevant information from credential documents and databases. The V-Cred system also uses various data sources such as government databases, educational institutions, and professional organizations to cross-reference and validate credential information. The V-Cred system provides credential verification services to various industries such as education, healthcare, finance, and government. The V-Cred system helps organizations to streamline their credential verification processes, reduce fraud and errors, improve compliance with regulatory


llama_print_timings:        load time =   41865.54 ms
llama_print_timings:      sample time =      40.00 ms /   205 runs   (    0.20 ms per token,  5125.13 tokens per second)
llama_print_timings: prompt eval time =   69986.25 ms /   877 tokens (   79.80 ms per token,    12.53 tokens per second)
llama_print_timings:        eval time =   72049.00 ms /   204 runs   (  353.18 ms per token,     2.83 tokens per second)
llama_print_timings:       total time =  142955.08 ms /  1081 tokens
Llama.generate: prefix-match hit



Observation: [33;1m[1;3m V-Cred is a term used in the field of Natural Language Processing (NLP) and Machine Learning (ML) for credential verification. V-Cred refers to a system or model designed to analyze and validate various types of credentials such as degrees, certificates, licenses, and professional memberships. The V-Cred system uses advanced NLP and ML techniques such as named entity recognition (NER), part-of-speech (POS) tagging, and deep learning models to extract relevant information from credential documents and databases. The V-Cred system also uses various data sources such as government databases, educational institutions, and professional organizations to cross-reference and validate credential information. The V-Cred system provides credential verification services to various industries such as education, healthcare, finance, and government. The V-Cred system helps organizations to streamline their credential verification processes, reduce fraud and errors, improve


llama_print_timings:        load time =   41865.54 ms
llama_print_timings:      sample time =      39.52 ms /   215 runs   (    0.18 ms per token,  5440.97 tokens per second)
llama_print_timings: prompt eval time =   33160.62 ms /   427 tokens (   77.66 ms per token,    12.88 tokens per second)
llama_print_timings:        eval time =   63086.84 ms /   214 runs   (  294.80 ms per token,     3.39 tokens per second)
llama_print_timings:       total time =   97136.97 ms /   641 tokens


[32;1m[1;3m I now know what V-Cred is and what it does
Final Answer: V-Cred is a term used in the field of Natural Language Processing (NLP) and Machine Learning (ML) for credential verification. It refers to a system or model designed to analyze and validate various types of credentials such as degrees, certificates, licenses, and professional memberships. The V-Cred system uses advanced NLP and ML techniques such as named entity recognition (NER), part-of-speech (POS) tagging, and deep learning models to extract relevant information from credential documents and databases. The V-Cred system also uses various data sources such as government databases, educational institutions, and professional organizations to cross-reference and validate credential information. The V-Cred system provides credential verification services to various industries such as education, healthcare, finance, and government. It helps organizations to streamline their credential verification processes, reduce f

'V-Cred is a term used in the field of Natural Language Processing (NLP) and Machine Learning (ML) for credential verification. It refers to a system or model designed to analyze and validate various types of credentials such as degrees, certificates, licenses, and professional memberships. The V-Cred system uses advanced NLP and ML techniques such as named entity recognition (NER), part-of-speech (POS) tagging, and deep learning models to extract relevant information from credential documents and databases. The V-Cred system also uses various data sources such as government databases, educational institutions, and professional organizations to cross-reference and validate credential information. The V-Cred system provides credential verification services to various industries such as education, healthcare, finance, and government. It helps organizations to streamline their credential verification processes, reduce fraud and errors, improve compliance with regulatory requirements, and 