In [1]:
# If you don not have virtual env, intalls requirements un comment the below line
# ! pip install -r requirements.txt


In [2]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import ChatPromptTemplate

## 1. SETUP APIS

In [3]:
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from ragas import evaluate



  from .autonotebook import tqdm as notebook_tqdm


In [4]:
sources = ['https://en.wikipedia.org/wiki/EPAM_Systems' , "https://www.epam.com/about", "https://www.linkedin.com/company/epam-systems/"]


load_dotenv()
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") 
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME") 
AZURE_OPENAI_EMB_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT_NAME") 
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
OPENAI_API_KEY =  os.getenv("OPENAI_API_KEY")


llm = AzureChatOpenAI(
    openai_api_version=AZURE_OPENAI_API_VERSION,
    azure_deployment=AZURE_OPENAI_CHAT_DEPLOYMENT_NAME,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

azure_embeddings = AzureOpenAIEmbeddings(
    openai_api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    azure_deployment=AZURE_OPENAI_EMB_DEPLOYMENT_NAME,
    model=AZURE_OPENAI_EMB_DEPLOYMENT_NAME,
)


documents_to_return = 3
tavily_search_no_contex = TavilySearchResults(max_results=documents_to_return)
tavily_search_contex = TavilySearchResults(max_results=documents_to_return, include_domains=sources)

In [28]:
question = "who win the current USA presidential elections in 2024?"

In [29]:
llm.invoke(question)

AIMessage(content="I'm sorry, but as an AI language model, I don't have access to real-time information or the ability to predict future events. Therefore, I cannot provide you with the winner of the 2024 US presidential elections.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 19, 'total_tokens': 65, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-35-turbo-16k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-898f5aab-e1ab-4795-a568-ae013d489f64-0', usage_metadata={'input_tokens': 19, 'output_tokens': 46, 'total_tokens': 65, 'input_token_details': {}, 'output_token_details': {}})

In [30]:
search_docs_nc = tavily_search_no_contex.invoke(question)
search_docs_nc

[{'url': 'https://www.wsj.com/election/2024/general/president',
  'content': 'Donald Trump has won the 2024 Presidential Election. Donald Trump has won the 2024 Presidential Election. ... Latest calls. Awaiting first race calls... Electoral map. View as U.S. map.'},
 {'url': 'https://www.bbc.com/news/election/2024/us/results',
  'content': 'US Presidential Election Results 2024 - BBC News Close menu BBC News Kamala Harris of the Democrat party has 0 electoral college votes. Donald Trump of the Republican party has 0 electoral college votes. Kamala Harris of the Democrat party has 158,810 votes (38.4%) Donald Trump of the Republican party has 249,225 votes (60.2%) US presidential election results 2024 US election 2024 Voting in some states is particularly hard to predict, with polls showing they could be won by the Republicans or the Democratic party. The battleground states that could decide the 2024 presidential election are: Voters in 11 states will also elect a governor. US election

In [32]:
from typing import List
def parse_tavily_input(tavily_input:list, output_srt=True) -> str|List[str]:
    results_str = '\n\n'
    results_list = []
    for n, sample in enumerate(tavily_input):
        url = sample['url']
        content = sample['content']
        results_str += f'url {n}: {url} \nContext {n}: {content}\n\n'
        results_list.append(content)
    return results_str if output_srt else results_list

context_str = parse_tavily_input(search_docs_nc)
print(context_str)



url 0: https://www.wsj.com/election/2024/general/president 
Context 0: Donald Trump has won the 2024 Presidential Election. Donald Trump has won the 2024 Presidential Election. ... Latest calls. Awaiting first race calls... Electoral map. View as U.S. map.

url 1: https://www.bbc.com/news/election/2024/us/results 
Context 1: US Presidential Election Results 2024 - BBC News Close menu BBC News Kamala Harris of the Democrat party has 0 electoral college votes. Donald Trump of the Republican party has 0 electoral college votes. Kamala Harris of the Democrat party has 158,810 votes (38.4%) Donald Trump of the Republican party has 249,225 votes (60.2%) US presidential election results 2024 US election 2024 Voting in some states is particularly hard to predict, with polls showing they could be won by the Republicans or the Democratic party. The battleground states that could decide the 2024 presidential election are: Voters in 11 states will also elect a governor. US election polls: Who is

In [33]:
type(context_str)

str

In [35]:
context_list = parse_tavily_input(search_docs_nc, output_srt=False)
print(context_list)
len(context_list)

['Donald Trump has won the 2024 Presidential Election. Donald Trump has won the 2024 Presidential Election. ... Latest calls. Awaiting first race calls... Electoral map. View as U.S. map.', 'US Presidential Election Results 2024 - BBC News Close menu BBC News Kamala Harris of the Democrat party has 0 electoral college votes. Donald Trump of the Republican party has 0 electoral college votes. Kamala Harris of the Democrat party has 158,810 votes (38.4%) Donald Trump of the Republican party has 249,225 votes (60.2%) US presidential election results 2024 US election 2024 Voting in some states is particularly hard to predict, with polls showing they could be won by the Republicans or the Democratic party. The battleground states that could decide the 2024 presidential election are: Voters in 11 states will also elect a governor. US election polls: Who is ahead - Harris or Trump? US election 2024 About the BBC', 'Trump wins 2024 U.S. presidential election - Los Angeles Times Trump elected 4

3

## 2. SETUP RAG CHAINS

In [10]:
# Build chain in more elegant way https://python.langchain.com/docs/integrations/tools/tavily_search/ using tools

In [11]:
prompt_with_rag = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that answer question to users following context:  {context} .",
        ),
        ("human", "{input}"),
    ]
)

prompt_without_rag = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that answer question to users.",
        ),
        ("human", "{input}"),
    ]
)

chain_with_rag = prompt_with_rag | llm
chain_without_rag = prompt_without_rag | llm


### LLM without RAG

In [36]:
answer_without_rag = chain_without_rag.invoke(
       {
        "input": question
    }
)

print(answer_without_rag)

content="As an AI, I don't have real-time information or the ability to predict future events. Therefore, I cannot tell you who will win the USA presidential elections in 2024. The outcome of the elections will depend on various factors, including the candidates, their campaigns, and the preferences of the voters. It's best to follow reliable news sources and stay informed about the candidates and their platforms as the elections approach." additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 84, 'prompt_tokens': 34, 'total_tokens': 118, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-35-turbo-16k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-53ccdf03-0bfa-4d5f-aec0-bece99f8de5f-0' usage_metadata={'input_tokens': 34, 'output_tokens': 84, 'total_tokens': 118, 'input_token_details': {}, 'output_token_details': {}}


### LLM with RAG (intenet grounded)

In [37]:
answer_with_rag = chain_with_rag.invoke(
       {
        "input": question,
        "context":search_docs_nc,
    }
)

print(answer_with_rag)

content='Donald Trump has won the 2024 Presidential Election.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 453, 'total_tokens': 464, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-35-turbo-16k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-e54917cf-89c1-4c2a-83ed-f0c5183af3d9-0' usage_metadata={'input_tokens': 453, 'output_tokens': 11, 'total_tokens': 464, 'input_token_details': {}, 'output_token_details': {}}


## 3. EVALUATION (sample data)

In [38]:
### Test set preparation
questions = ["How many total employees (EPAMers) EPAM company has?", 
             "When was the company First Derivativ adquired by EPAM?", 
             "What is  TelescopeAI® in EPAM", 
             "Explain what is aboyt InfoNgen software product", 
             "what is EPAM"]

ground_truths = [ "65,149 EPAmers",
                 "October 2024",
                 "an artificial intelligence-based platform for IT operations and workforce management",
                 "text analytics and sentiment analysis enterprise software product that uses artificial intelligence.",
                 "American company that specializes in software engineering services, digital platform engineering, and digital product design" ]

contexts = []
for question in questions:
    context_raw = tavily_search_contex.invoke(question)
    context_parsed = parse_tavily_input(context_raw, output_srt=False)
    contexts.append(context_parsed)
    
answers = []
for index, question in enumerate(questions):
    context = contexts[index]
    answer = chain_with_rag.invoke({"input": question,
                                    "context":context})
    answers.append(answer.content)


In [40]:
contexts[0]

["As of\nMarch 31,\n2023\nAs of\nDecember 31,\n2022\nAssets\nCurrent assets\nCash and cash equivalents\n$\xa0\xa0\xa0 1,749,422\n$\xa0\xa0\xa0 1,681,344\nTrade receivables and contract assets, net of allowance of 14,184\nand 15,310, respectively\n934,236\n932,626\nShort-term investments\n60,373\n60,336\nPrepaid and other current assets\n86,758\n85,319\nTotal current assets\n2,830,789\n2,759,625\nProperty and equipment, net\n267,067\n273,348\nOperating lease right-of-use assets, net\n146,815\n148,780\nIntangible assets, net\n73,113\n77,652\nGoodwill\n533,730\n529,072\nDeferred tax assets\n167,654\n172,797\nOther noncurrent assets\n52,177\n47,877\nTotal assets\n$\xa0\xa0\xa0 4,071,345\n$\xa0\xa0\xa0 4,009,151\nLiabilities\nCurrent liabilities\nAccounts payable\n$\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 28,857\n$\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 30,852\nAccrued compensation and benefits expenses\n444,728\n475,871\nAccrued expenses and other current liabilities\n130,902\n154,339\nIncome taxes payab

In [39]:
answers

['The total number of employees (EPAMers) at EPAM Systems, Inc. is not provided in the given information.',
 'EPAM Systems announced the successful completion of its acquisition of First Derivative on the date the information was released, but the specific date of the acquisition was not provided in the given context.',
 "TelescopeAI® is a platform developed by EPAM, a leading global provider of digital platform engineering and software development services. It is designed to provide actionable insights and analytics to help businesses make informed decisions and increase productivity. TelescopeAI® combines EPAM's integrated business, experience, and technology consulting services with advanced data analytics capabilities to deliver solutions for various verticals and business processes. It offers features such as specialized safety and compliance solutions for industries like oil and gas, workforce management for healthcare, and clinical trials support for life sciences. Overall, Tele

In [16]:
print(len(answers))
print(len(questions))
print(len(contexts))
print(len(ground_truths))

5
5
5
5


In [17]:
#contexts = [" ".join(c) for c in contexts]
contexts

[["US-based software engineering services provider EPAM Systems has announced the expansion of its IT operations in India by opening two new offices in Bengaluru and Chennai. ... will accommodate over 2,100 employees. Designed to reflect the 'home of the modern engineer,' these spaces offer collaborative environments and access to cutting-edge",
  "An interview with Arkadiy Dobkin, founder, CEO, president and chair of EPAM Systems. The onset of war in Eastern Europe in February 2022 was a particularly complex challenge for EPAM Systems. Of the U.S.-based digital transformation services and product engineering company's more than 55,000 employees, 60 percent lived in either Ukraine, Belarus or Russia — including almost 15,000 in Ukraine.",
  'NEWTOWN, Pa., Feb. 18, 2021 /PRNewswire/ -- EPAM Systems, Inc. (NYSE: EPAM), a leading global provider of digital platform engineering and software development services, today announced results for its fourth quarter and full year ended December 31

In [18]:
# https://docs.ragas.io/en/v0.1.21/howtos/customisations/azure-openai.html
# https://docs.ragas.io/en/latest/concepts/components/eval_dataset/#creating-an-evaluation-dataset-from-singleturnsamples

In [45]:
contexts

[["As of\nMarch 31,\n2023\nAs of\nDecember 31,\n2022\nAssets\nCurrent assets\nCash and cash equivalents\n$\xa0\xa0\xa0 1,749,422\n$\xa0\xa0\xa0 1,681,344\nTrade receivables and contract assets, net of allowance of 14,184\nand 15,310, respectively\n934,236\n932,626\nShort-term investments\n60,373\n60,336\nPrepaid and other current assets\n86,758\n85,319\nTotal current assets\n2,830,789\n2,759,625\nProperty and equipment, net\n267,067\n273,348\nOperating lease right-of-use assets, net\n146,815\n148,780\nIntangible assets, net\n73,113\n77,652\nGoodwill\n533,730\n529,072\nDeferred tax assets\n167,654\n172,797\nOther noncurrent assets\n52,177\n47,877\nTotal assets\n$\xa0\xa0\xa0 4,071,345\n$\xa0\xa0\xa0 4,009,151\nLiabilities\nCurrent liabilities\nAccounts payable\n$\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 28,857\n$\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 30,852\nAccrued compensation and benefits expenses\n444,728\n475,871\nAccrued expenses and other current liabilities\n130,902\n154,339\nIncome taxes paya

### Metrics calculation

In [46]:
from ragas import evaluate
from datasets import Dataset
from ragas.metrics import (
    faithfulness,
    answer_relevancy, # Requires emb
    context_recall,
    context_precision,
)

data = {
    "question": questions,
    "answer": answers,
    'retrieved_contexts':contexts,
    "reference": ground_truths
}

dataset = Dataset.from_dict(data)

result = evaluate(
    dataset = dataset, 
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
    llm=llm,
    embeddings=azure_embeddings,
)


Evaluating: 100%|██████████| 20/20 [00:13<00:00,  1.45it/s]


In [20]:
df = result.to_pandas()
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy
0,How many total employees (EPAMers) EPAM compan...,[US-based software engineering services provid...,"EPAM Systems has more than 55,000 employees.","65,149 EPAmers",0.0,0.0,1.0,0.967047
1,When was the company First Derivativ adquired ...,"[NEWTOWN, PA, USA, and NEWRY, N. IRELAND, Octo...",First Derivative was acquired by EPAM Systems ...,October 2024,1.0,1.0,0.5,0.968741
2,What is TelescopeAI® in EPAM,"[NEWTOWN, PA - January 23, 2020 - EPAM Systems...",TelescopeAI® is a platform developed by EPAM S...,an artificial intelligence-based platform for ...,1.0,0.0,0.285714,0.863862
3,Explain what is aboyt InfoNgen software product,[EPAM is a Leader according to the 2023 Gartne...,InfoNgen is one of the in-house brands offered...,text analytics and sentiment analysis enterpri...,0.0,0.0,0.0625,0.90955
4,what is EPAM,[EPAM Systems is a global software engineering...,"EPAM Systems, Inc. is a global software engine...",American company that specializes in software ...,1.0,1.0,1.0,0.843075


### Metrics summary and conclutions

In [21]:
metrics = ['context_precision',	'context_recall', 'faithfulness', 'answer_relevancy']
df[metrics].describe()

Unnamed: 0,context_precision,context_recall,faithfulness,answer_relevancy
count,5.0,5.0,5.0,5.0
mean,0.6,0.4,0.569643,0.910455
std,0.547723,0.547723,0.422219,0.057688
min,0.0,0.0,0.0625,0.843075
25%,0.0,0.0,0.285714,0.863862
50%,1.0,0.0,0.5,0.90955
75%,1.0,1.0,1.0,0.967047
max,1.0,1.0,1.0,0.968741


## 4.  TEST DATA GENERATION (AUGMENTED DATA)

In [47]:
# https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/#choose-your-llm
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
#from langchain_openai import ChatOpenAI
#from langchain_openai import OpenAIEmbeddings


generator_llm = LangchainLLMWrapper(llm)
generator_embeddings = LangchainEmbeddingsWrapper(azure_embeddings)

In [48]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://en.wikipedia.org/wiki/EPAM_Systems")
docs = loader.load()

In [52]:
docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/EPAM_Systems', 'title': 'EPAM Systems - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nEPAM Systems - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n

In [53]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

Generating personas: 100%|██████████| 1/1 [00:00<00:00,  1.28it/s]                                         
Generating Scenarios: 100%|██████████| 1/1 [00:04<00:00,  4.25s/it]
Generating Samples: 100%|██████████| 12/12 [00:02<00:00,  4.87it/s]


In [54]:
dataset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What is the MACH Alliance and what is EPAM's r...,[Awards 5 References 6 External links Toggle t...,EPAM Systems is a founding member of the MACH ...,single_hop_specifc_query_synthesizer
1,What is EPAM Systems and what services does it...,[Awards 5 References 6 External links Toggle t...,"EPAM Systems, Inc. is an American company that...",single_hop_specifc_query_synthesizer
2,What is the location of EPAM Systems?,[Awards 5 References 6 External links Toggle t...,"EPAM Systems is headquartered in Newtown, Penn...",single_hop_specifc_query_synthesizer
3,What is the role of digital consulting in EPAM...,"[China March 2018 Digital Strategy, Experience...",EPAM offers digital consulting as part of its ...,single_hop_specifc_query_synthesizer
4,What is the role of EPAM Systems in Ukraine?,"[China March 2018 Digital Strategy, Experience...",EPAM Systems has started exiting operations in...,single_hop_specifc_query_synthesizer
5,What is the significance of digital strategy i...,"[China March 2018 Digital Strategy, Experience...",Digital strategy plays a crucial role in the t...,single_hop_specifc_query_synthesizer
6,What is the information available about Wikipe...,[EPAM Systems - Wikipedia Jump to content Main...,EPAM Systems - Wikipedia is a platform that pr...,single_hop_specifc_query_synthesizer
7,How can one contribute to EPAM Systems?,[EPAM Systems - Wikipedia Jump to content Main...,"To contribute to EPAM Systems, one can help by...",single_hop_specifc_query_synthesizer
8,What is the purpose of the Main page on the EP...,[EPAM Systems - Wikipedia Jump to content Main...,The Main page on the EPAM Systems website serv...,single_hop_specifc_query_synthesizer
9,What is the significance of Berlin in the prov...,[the original on 2019-11-28. Retrieved 2019-11...,There is no information about Berlin in the pr...,single_hop_specifc_query_synthesizer


## 5. EVALUATION (Augmented test sample)

## 6. BUILD LANCHAIN AGENT WITH TOOLS

In [55]:
import datetime
today = datetime.datetime.today().strftime("%D")
from langchain_core.runnables import RunnableConfig, chain


prompt = ChatPromptTemplate(
    [
        ("system", f"You are a helpful assistant. The date today is {today}."),
        ("human", "{user_input}"),
        ("placeholder", "{messages}"),
    ]
)

# specifying tool_choice will force the model to call this tool.
llm_with_tools = llm.bind_tools([tavily_search_contex])

llm_chain = prompt | llm_with_tools

@chain
def tool_chain(user_input: str, config: RunnableConfig):
    input_ = {"user_input": user_input}
    ai_msg = llm_chain.invoke(input_, config=config)
    tool_msgs = tavily_search_contex.batch(ai_msg.tool_calls, config=config)
    return llm_chain.invoke({**input_, "messages": [ai_msg, *tool_msgs]}, config=config)


question_test = "What happened in EPAM timeline in the year 2006?"
tool_chain.invoke(question_test)

AIMessage(content="In 2006, EPAM had some significant events in its timeline. Here are a few highlights:\n\n1. VDI joined the EPAM family in 2006. VDI is a software engineering services company.\n\nUnfortunately, I couldn't find more specific details about EPAM's activities in 2006. For a more comprehensive timeline of EPAM's history, you can visit their official website [here](https://www.epam.com/about/who-we-are/history).\n\nPlease note that this information is based on the available search results and may not include all events from EPAM's timeline in 2006.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 125, 'prompt_tokens': 663, 'total_tokens': 788, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-35-turbo-16k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-59cd72bd-0f3a-4784-822c-ff084fde6332-0', usage_metadata={'input_tokens': 663, 

## 7. CHALLENGE: Build automation test using augmented data + RAG agent