In [31]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = str(os.getenv("LANGCHAIN_API_KEY"))
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com" # Update with your API URL if using a hosted instance of Langsmith.
project_name = "2.pdf_chat_router_issue_assistant" # Update with your project name

In [33]:
# We have some hard-coded examples here.
examples = [
("What is the name of the company that provides the 'Sparsh' policies?", "The name of the company that provides the 'Sparsh' policies is Infosys Limited."),
("What types of agreements must be signed upon employment with Infosys Limited?", "Upon employment with Infosys Limited, one must sign certain mandatory agreements, including but not limited to the Confidentiality, Intellectual Property Rights, the Code of Business Conduct and Ethics."),
("What type of information is considered confidential at Infosys Limited?", "At Infosys Limited, confidential information includes but is not limited to current and future business information of the Company, its clients, suppliers, or employees."),
("What is the role of the individual in this executive employment contract?", "The role of the individual in this executive employment contract is Chief Operating Officer of Infosys Limited."),
("Where is the work location of the individual in this executive employment contract?", "The work location of the individual in this executive employment contract is Bangalore, India."),
("What is the definition of 'affiliate' in this executive employment contract?", "In this executive employment contract, 'affiliate' means any entity that controls, is controlled by, or is under common control with the Company. For purposes of this Agreement, 'control' means possessing, directly or indirectly, the power to direct or cause the direction of the management, policies, or operations of an entity."),
("What is the non-solicitation clause in this executive employment contract?", "The non-solicitation clause in this executive employment contract states that upon termination of the individual's relationship with the Company, they shall not solicit, induce, recruit, or encourage any Company employee to leave the Company, or take away such employees, or attempt to do so for themselves or for any other person or entity."),
("What is the definition of 'Competitor' in this executive employment contract?", "In this executive employment contract, 'Competitor' includes but is not limited to the following entities and their wholly owned subsidiaries: Tata Consultancy Services Limited, Accenture Limited, International Business Machines Corporation, Cognizant Technology Solutions Corporation, Wipro Limited, Tech Mahindra Limited, Capgemini."),
("What is the annual increase policy at Infosys Limited?", "Annual increments to components of an employee's compensation at Infosys Limited are determined on an annual basis by the Board or the Committee at its sole discretion, taking into account the Company’s prior years’ audited financial performance and independent compensation benchmarks."),
("What is the National Pension Scheme offered by Infosys Limited?", "Infosys Limited offers all its India based employees the option to contribute towards the National Pension Scheme, which is an optional retirement benefit introduced by the Government of India for all its citizens. It enables accumulation of retirement corpus during active employment with add-on tax breaks."),
("What is the consequence of a breach of confidentiality at Infosys Limited?", "A breach of confidentiality at Infosys Limited may result in legal action against the employee, including injunctive relief and monetary damages."),
("What is the policy regarding taxes at Infosys Limited?", "All payments at Infosys Limited are subject to applicable taxes and statutory withholding."),
("What is the policy regarding the termination of employment at Infosys Limited?", "The policy regarding the termination of employment at Infosys Limited is outlined in section 12 of the executive employment contract, which includes provisions for resignation, termination for cause, and termination without cause.")
]

In [34]:
from langsmith import Client

client = Client()

In [35]:
import uuid

dataset_name = f"Retrieval QA Questions {str(uuid.uuid4())}"
dataset = client.create_dataset(dataset_name=dataset_name)
for q, a in examples:
    client.create_example(inputs={"question": q}, outputs={"answer": a}, dataset_id=dataset.id)

In [36]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model = "C:/Users/arasu/Workspace/Projects/GenAI/models/MBZUAILaMini-Flan-T5-248M/"
tokenizer = AutoTokenizer.from_pretrained(model,truncation=True)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model)
from transformers import  pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
pipe = pipeline(
        'text2text-generation',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 256,
        do_sample = True,
        temperature = 0.3,
        top_p= 0.95
    )
llm = HuggingFacePipeline(pipeline=pipe)

In [37]:
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="C:/Users/arasu/Workspace/Projects/GenAI/embeddings/hkunlp_instructor-large")


load INSTRUCTOR_Transformer
max_seq_length  512


In [38]:
from langchain.vectorstores import Chroma
db_path = "vector_db"
vector_db = Chroma(persist_directory=db_path,embedding_function=instructor_embeddings)

In [39]:
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

In [41]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

from datetime import datetime
prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are friendly customer care assistant"
            " trying to help user on the context provided."),
            ("system", "{context}"),
            ("human","{question}")
        ]
    ).partial(time=str(datetime.now()))
    
response_generator = (
    prompt 
    | llm 
    | StrOutputParser()
)

In [42]:
# The full chain looks like the following
from operator import itemgetter

chain = (
    # The runnable map here routes the original inputs to a context and a question dictionary to pass to the response generator
    {
        "context": itemgetter("question") | retriever | (lambda docs: "\n".join([doc.page_content for doc in docs])),
        "question": itemgetter("question")
    }
    | response_generator
)

In [43]:
for tok in chain.stream({"question": "What is the role of the individual in this executive employment contract?"}):
    print(tok, end="", flush=True)

The individual's role in this executive employment contract is Chief Operating Officer of Infosys Limited.

In [44]:
from langchain.smith import RunEvalConfig

eval_config = RunEvalConfig(
    evaluators=["qa"],
    # If you want to configure the eval LLM:
    eval_llm=llm
)

In [45]:
_ = await client.arun_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory=lambda: chain,
    evaluation=eval_config,
)

View the evaluation results for project 'damp-attitude-63' at:
https://smith.langchain.com/o/9e5b74d3-3844-5299-8661-dcadae2c46dc/datasets/5fbc064c-48bb-402a-8d14-115a8691edfa/compare?selectedSessions=1bfcab8c-70f0-4088-9156-750e2264e454

View all tests for Dataset Retrieval QA Questions 9dde8242-490b-461f-8618-37ad5bc42001 at:
https://smith.langchain.com/o/9e5b74d3-3844-5299-8661-dcadae2c46dc/datasets/5fbc064c-48bb-402a-8d14-115a8691edfa
[------------------------------------------------->] 13/13