In [1]:
from dotenv import load_dotenv
import os
load_dotenv()
from langchain_core.tools import tool, StructuredTool
openai_api_key = os.getenv('OPENAI_API_KEY')
nvidia_api_key = os.getenv('NVIDIA_API_KEY')
if openai_api_key:
    print("OpenAI API Key found")
else:
    print("OpenAI API Key not found.")
if nvidia_api_key:
    print("NVIDIA API Key found")
else:
    print("NVIDIA API Key not found.")


OpenAI API Key found
NVIDIA API Key found


In [2]:
from langchain_core.messages import HumanMessage
from simple_workflows import *
from simple_tools import *
from workflows_as_tools import *
llm1= ChatGroq(model="llama3-70b-8192",temperature=0)

  from tqdm.autonotebook import tqdm, trange


In [10]:
### This is a multiagent workflow. Its purpose is to retrieve a collection of papers from arxiv.
### The input is a 'list' (in the sense of everyday speech) with each element being the name or keywords around the paper (check the bib file).
### Under the hood it searches for the most relevant paper and downlads it in the pdf folder.
### In the end you get a report of the papers that were retrieved.
### This needs an OpenAI API key to work. There are ways around it, but you need to use a Chat method that uses tools. 
### You can try your own bibliography here. example bib={1. life of brian, 2. death rebearth 3.  time  illustion wondering face }

input={"receptionist_retriever_history":[HumanMessage(content="")],
    "last_action_outcome":[HumanMessage(content="")],
    "metadata":HumanMessage(content=" "),
    "article_keywords":HumanMessage(content=" "),
    "title_of_retrieved_paper":HumanMessage(content=" "),
    "should_I_clean": False}
input["receptionist_retriever_history"][0]=HumanMessage(content="Please fetch me the following papers:" + "1 mitochondira are really small? , 2 is life really short?")

### Here You can set different agents to staff the workflow. The default is arxiv_retriever_workflow(retrieval_model=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), 
### cleaner_model=ChatNVIDIA(model="meta/llama3-70b-instruct"), receptionist_model=ChatNVIDIA(model="meta/llama3-70b-instruct"))
### the retrieval agents needs tools and ChatNVIDIA is still in development. 

retrieve_app=ArxivRetrievalWorkflow(retriever_model=llm1,cleaner_model=llm1,receptionist_model=llm1)
retrieve_app=retrieve_app.create_workflow()
retrieve_app=retrieve_app.compile()
state=retrieve_app.invoke(input,{"recursion_limit": 100})    
print(state["receptionist_retriever_history"][-1].content)  


Receptionist: The following has been forwarded to the arxiv_retriever:  mitochondria small
Retriever: I am going to call  get_id_from_url
Tool_executor: I am going to executeget_id_from_urlwith{'url': 'https://export.arxiv.org/api/query?search_query=mitochondria+small&max_results=5'}
Scraper: I got the following paperThe most relevant arXiv paper to the query "mitochondria small" is "MiShape: 3D Shape Modelling of Mitochondria in Microscopy" and its id-url is "http://arxiv.org/abs/2303.01546v1".
Retriever: I am going to call  download_pdf
Tool_executor: I am going to executedownload_pdfwith{'id': '2303.01546v1', 'title': 'MiShape_3D_Shape_Modelling'}
Retriever:I am reporting back to the arxiv_receptionist withThe paper with the title 'MiShape: 3D Shape Modelling of Mitochondria in Microscopy' has successfully been downloaded.
Reporting to receptionist
Receptionist: The following has been forwarded to the arxiv_retriever:  life short
Retriever: I am going to call  get_id_from_url
Tool_e

In [5]:
### Same as before but in a form of a tool. Just for testing purposes.
ArxivRetrievalTool=ArxivRetrievalToolClass(streaming=False,retriever_model=llm1,cleaner_model=llm1,receptionist_model=llm1)
ArxivRetrievalTool=StructuredTool(name="RetrieveBibTool",func=ArxivRetrievalTool.retrieve_bib,args_schema=ArxivRetrieverInput,
                           description=ArxivRetrievalTool.description)
ArxivRetrievalTool.invoke("1.Creatine for gains, 2. Is time relative or relativety had its time")
             


Receptionist: The following has been forwarded to the arxiv_retriever:  Here is the first query:

creatine gains
Retriever: I am going to call  get_id_from_url
Tool_executor: I am going to executeget_id_from_urlwith{'url': 'https://export.arxiv.org/api/query?search_query=creatine+gains&max_results=5'}
Scraper: I got the following paperThe most relevant arXiv paper to the query "creatine gains" is "Inverse Z-spectrum analysis for MT- and spillover-corrected and T1-compensated steady-state pulsed CEST-MRI - application to pH-weighted MRI of acute stroke" and its id-url is "http://arxiv.org/abs/1302.6605v2".
Retriever: I am going to call  download_pdf
Tool_executor: I am going to executedownload_pdfwith{'id': '1302.6605v2', 'title': 'inverse_z_spectrum_analysis'}
Retriever:I am reporting back to the arxiv_receptionist withThe paper with the title 'Inverse Z-spectrum analysis for MT- and spillover-corrected and T1-compensated steady-state pulsed CEST-MRI - application to pH-weighted MRI of

Error in LangChainTracer.on_tool_error callback: TracerException("Found chain run at ID 9ebbb611-2696-4d02-a71b-b4b7f347d704, but expected {'tool'} run.")


ReceptionistHere is the report:

The paper with the title 'Inverse Z-spectrum analysis for MT- and spillover-corrected and T1-compensated steady-state pulsed CEST-MRI - application to pH-weighted MRI of acute stroke' has been downloaded but it doesn't seem to fit the query 'creatine for gains'.

The paper with the title 'Three levels of understanding physical relativity: Galileo's relativity, Up-to-date Galileo's relativity and Einstein's relativity: A historical survey' has been downloaded and it seems to fit the query 'Is time relative or relativety had its time'.

We are done.


KeyError: 'report'

In [None]:
### It takes the name of a pdf located in the folder files\pdfs as inpup and creates two markdowns, one with mupdf and one with nougat.
pdf_to_markdown.invoke("Li")

In [None]:
ocr_enhancer_app=OcrEnchancerWorkflow(enhancer_model=llm1)
ocr_enhancer_app=ocr_enhancer_app.create_workflow()
ocr_enhancer_app=ocr_enhancer_app.compile()
input={"main_text_filename": HumanMessage(content="Li"), "supporting_text_filename": HumanMessage(content="mu_Li")}
state=ocr_enhancer_app.invoke(input)    
print(state)

In [None]:
### The idea of this chain/tool is to remove the proofs from a paper so it will be easier to make a summary out of it. 
### The tool uses two chains under the hood. The first stamps the pages of the text that continue a proof from the previous page.
### The idea was to help the LLM a bit to recognize proofs. The second LLM is doint the removal.
### From all the modules I created, this was the most unsucessful one. Even with strong LLMs like GPT-4o and Opus, I had partial results.
### I welcome anyone who can improve the prompt for this tool.
proof_remover.invoke("Li")

In [5]:
### This tool takes a text found in the folder files/markdowns and creates a set of keywords and a summary. in a form of a string and extracts the keywords and summary.
### It is preferable to use a file that doesnt contain proofs because it produces a better summary. 
input={"main_text_filename": HumanMessage(content="Li"),
           "report":HumanMessage(content=""),}
keyword_and_summary_app=KeywordAndSummaryWorkflow(keyword_and_summary_maker_model=llm1)
keyword_and_summary_app=keyword_and_summary_app.create_workflow()
keyword_and_summary_app=keyword_and_summary_app.compile()
state=keyword_and_summary_app.invoke(input)

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\artno\\Bibliography\\files\\markdowns\\Li.mmd'

In [None]:
input={"keywords_and_summary_filename": HumanMessage(content="markdowns\Li_keyword_and_summary"), "target_language":HumanMessage
(content="en"),"main_text_filename": HumanMessage(content="Li"), "report":HumanMessage}

translation_app=TranslationWorkflow(translator_model=llm1)
translation_app=translation_app.create_workflow()
translation_app=translation_app.compile()
state=translation_app.invoke(input)
print(state)

In [3]:
TranslationTool =TranslationToolClass(streaming=True,streamcon=None,translator_model=llm1)
    
TranslationTool=StructuredTool(name="TranslationTool",func=TranslationTool.translate_file,args_schema=TranslatorInput,
                           description=TranslationTool.description)
tools =  [TranslationTool, pdf_to_markdown]
tool_executor=ToolExecutor(tools)
input={"keywords_and_summary_filename":"","target_language":"en","main_text_filename":"Li"}
Invocation=ToolInvocation(tool="TranslationTool", tool_input=input)
TranslationTool.invoke(input={"keywords_and_summary_filename":"","target_language":"en","main_text_filename":"Li"})

File not found: The keyword_and_summary file does not exist. Assuming keyword_and_summary is blank.
Translation of Li in progress


 56%|█████▌    | 19/34 [02:51<04:08, 16.54s/it]