In [1]:
OPENAI_KEY = "OPENAI_KEY"

In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.qa_generation.base import QAGenerationChain
from langchain.chains import LLMChain, ConversationalRetrievalChain, RetrievalQAWithSourcesChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers import MultiQueryRetriever
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentType, OpenAIFunctionsAgent, ZeroShotAgent, initialize_agent
from langchain.utilities import TextRequestsWrapper, DuckDuckGoSearchAPIWrapper
from langchain import WikipediaAPIWrapper
from langchain.tools import WikipediaQueryRun, RequestsPostTool
from langchain.agents.tools import Tool
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent, create_retriever_tool
from langchain.document_transformers import DoctranPropertyExtractor
from langchain.document_transformers import LongContextReorder
from langchain.document_loaders import PyPDFLoader, WebBaseLoader
from langchain.text_splitter import LatexTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter, NLTKTextSplitter, CharacterTextSplitter
from bs4 import BeautifulSoup

In [3]:
ddg_api = DuckDuckGoSearchAPIWrapper()

In [4]:
pdf_loader = PyPDFLoader(file_path='data/Сергей Братчиков, CV.pdf')
pdf_docs = pdf_loader.load()
len(pdf_docs)

2

In [5]:
web_loader = WebBaseLoader(web_path="http://ria.ru/20230809/yaponiya-1888975642.html")
web_docs = web_loader.load()
len(web_docs)

1

In [6]:
wiki_api = WikipediaAPIWrapper()

In [7]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k', openai_api_key=OPENAI_KEY)

In [8]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

In [9]:
all_splits = text_splitter.split_documents(web_docs + pdf_docs)
len(all_splits)

47

In [12]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory='data/chroma')
# vectorstore.persist()

In [13]:
all_splits[0]

Document(page_content='Япония выразила протест против решения России о налоговом соглашении - РИА Новости, 09.08.2023', metadata={'source': 'http://ria.ru/20230809/yaponiya-1888975642.html', 'title': 'Япония выразила протест против решения России о налоговом соглашении - РИА Новости, 09.08.2023', 'description': 'Правительство Японии сожалеет о решении России приостановить налоговые соглашения с рядом стран и направило протест по дипломатическим каналам, потребовав... РИА Новости, 09.08.2023', 'language': 'ru'})

In [15]:
vectorstore.similarity_search_with_score("Россия", k=3)

[(Document(page_content='смогут зачесть в России суммы уплаченного за рубежом подоходного налога.Как поясняется в указе, решение было принято, "исходя из необходимости принятия безотлагательных мер в связи с совершением рядом иностранных государств недружественных действий в отношении Российской Федерации, ее граждан и юридических лиц".https://ria.ru/20230808/kiberataka-1888772616.htmlhttps://ria.ru/20230807/forum-1888695449.htmlроссияяпонияРИА Новости 154.796internet-group@rian.ru7 495 645-6601ФГУП МИА «Россия', metadata={'description': 'Правительство Японии сожалеет о решении России приостановить налоговые соглашения с рядом стран и направило протест по дипломатическим каналам, потребовав... РИА Новости, 09.08.2023', 'language': 'ru', 'source': 'http://ria.ru/20230809/yaponiya-1888975642.html', 'title': 'Япония выразила протест против решения России о налоговом соглашении - РИА Новости, 09.08.2023'}),
  0.32794255382702875),
 (Document(page_content='Версия 2023.1 Beta© 2023 МИА «Росс

## Retrievers

In [194]:
retriever = MultiQueryRetriever.from_llm(vectorstore.as_retriever(search_kwargs={"k": 3}), llm)

In [195]:
retriever.get_relevant_documents("Места работы")

[Document(page_content='Я принимаю условия соглашения и даю своё согласие на обработку персональных данных в соответствии \n                            с Политикой конфиденциальности\n                             Федерального Государственного Унитарного Предприятия «Международное информационное агентство «Россия сегодня», \n                            расположенного по адресу: Россия, 119021, г. Москва, Зубовский бульвар, д. 4.', metadata={'description': 'Правительство Японии сожалеет о решении России приостановить налоговые соглашения с рядом стран и направило протест по дипломатическим каналам, потребовав... РИА Новости, 09.08.2023', 'language': 'ru', 'source': 'http://ria.ru/20230809/yaponiya-1888975642.html', 'title': 'Япония выразила протест против решения России о налоговом соглашении - РИА Новости, 09.08.2023'}),
 Document(page_content='Почта\n                        \n                            Пароль\n                        Войти\n                        Восстановить пароль\

## ConvRetrievalChain

In [209]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [210]:
conv_chain = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)
conv_chain

ConversationalRetrievalChain(memory=ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[]), output_key=None, input_key=None, return_messages=True, human_prefix='Human', ai_prefix='AI', memory_key='chat_history'), callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, combine_docs_chain=StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="Use the following pieces of context to answer the users question. \nIf you don't know the answer, just say that you don't know, don't try to mak

In [406]:
conv_chain({"question": "Что ты знаешь о Сергее Братчикове?"})['answer']

KeyboardInterrupt: 

## Tool based search

In [43]:
custom_tool = Tool(name='save_note', func=lambda x: {'action': print(x)}, description='Useful for when you need to save note or reminder for user request')

In [44]:
retr_tool = create_retriever_tool(
    vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5}),
    "search_in_docs",
    "Useful for when you need to answer question you don't know. And need to find additional information for answer.\nSearches and returns documents regarding to query."
)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)
ddg_tool = DuckDuckGoSearchRun(api_wrapper=ddg_api)
# tools = [retr_tool, wiki_tool]
tools = [retr_tool, ddg_tool]
tools

[Tool(name='search_in_docs', description="Useful for when you need to answer question you don't know. And need to find additional information for answer.\nSearches and returns documents regarding to query.", args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, func=<bound method BaseRetriever.get_relevant_documents of VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], metadata=None, vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x7f39bf150640>, search_type='similarity', search_kwargs={'k': 5})>, coroutine=None),
 DuckDuckGoSearchRun(name='duckduckgo_search', description='A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, api_wrapper=DuckDuckGoSearchA

In [58]:
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory

agent_memory = AgentTokenBufferMemory(memory_key="chat_history",
                                      llm=llm,
                                      max_token_limit=3000)
agent_memory.clear()

In [59]:
PREFIX = """Answer the following questions as best you can. Use russian language. You have access to the following tools:"""
mrkl = initialize_agent(
    tools, llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    # agent_kwargs={'prefix': PREFIX},
    verbose=True,
    memory=agent_memory,
    return_intermediate_steps=True
)

In [60]:
mrkl_result = mrkl({"input": "Какой сейчас год и какие его основные события?"})
mrkl_result['output'], mrkl_result['intermediate_steps']



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer the first question, I can simply provide the current year. For the second question, I can use the "duckduckgo_search" tool to search for the major events of the current year.

Action:
```
{
  "action": "duckduckgo_search",
  "action_input": "major events of the current year"
}
```[0m
Observation: [33;1m[1;3mHistory The settlers brought the lottery to America. It's had a long, uneven history Godofredo A. Vásquez/AP Law Special counsel Jack Smith got a secret search warrant for Trump's Twitter account... NPR news, audio, and podcasts. Coverage of breaking stories, national and world news, politics, business, science, technology, and extended coverage of major national and world events. Global Russia China Climate Change January Jan. 1: Wildfires continue to burn through Boulder, Colorado, leading to the evacuation of over 30,000 people and the destruction of homes across Boulder County. Jan. 3: The U.S. re

('The current year is 2022. Some major events include the wildfires in Boulder, Colorado, and the high number of COVID-19 cases with the omicron variant.',
 [(AgentAction(tool='duckduckgo_search', tool_input='major events of the current year', log='Thought: To answer the first question, I can simply provide the current year. For the second question, I can use the "duckduckgo_search" tool to search for the major events of the current year.\n\nAction:\n```\n{\n  "action": "duckduckgo_search",\n  "action_input": "major events of the current year"\n}\n```'),
   "History The settlers brought the lottery to America. It's had a long, uneven history Godofredo A. Vásquez/AP Law Special counsel Jack Smith got a secret search warrant for Trump's Twitter account... NPR news, audio, and podcasts. Coverage of breaking stories, national and world news, politics, business, science, technology, and extended coverage of major national and world events. Global Russia China Climate Change January Jan. 1: 

In [266]:
agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)

In [267]:
result = agent_executor({"input": "Владимир Путин и как он связан с Сергеем Братчиковым"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Wikipedia` with `{'query': 'Владимир Путин'}`


[0m[36;1m[1;3mPage: Vladimir Putin
Summary: Vladimir Vladimirovich Putin (born 7 October 1952) is a Russian politician and former intelligence officer serving as the current president of Russia. Putin has served continuously as president or prime minister since 1999: as prime minister from 1999 to 2000 and from 2008 to 2012, and as president from 2000 to 2008 and since 2012.Putin worked as a KGB foreign intelligence officer for 16 years, rising to the rank of lieutenant colonel before resigning in 1991 to begin a political career in Saint Petersburg. He moved to Moscow in 1996 to join the administration of President Boris Yeltsin. He briefly served as director of the Federal Security Service (FSB) and secretary of the Security Council of Russia, before being appointed prime minister in August 1999. After the resignation of Yeltsin, Putin became acting president an

{'input': 'Владимир Путин и как он связан с Сергеем Братчиковым',
 'chat_history': [HumanMessage(content='Владимир Путин и как он связан с Сергеем Братчиковым', additional_kwargs={}, example=False),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'Wikipedia', 'arguments': '{\n  "query": "Владимир Путин"\n}'}}, example=False),
  FunctionMessage(content="Page: Vladimir Putin\nSummary: Vladimir Vladimirovich Putin (born 7 October 1952) is a Russian politician and former intelligence officer serving as the current president of Russia. Putin has served continuously as president or prime minister since 1999: as prime minister from 1999 to 2000 and from 2008 to 2012, and as president from 2000 to 2008 and since 2012.Putin worked as a KGB foreign intelligence officer for 16 years, rising to the rank of lieutenant colonel before resigning in 1991 to begin a political career in Saint Petersburg. He moved to Moscow in 1996 to join the administration of President Boris Yeltsin