In [1]:
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import MagenticOneGroupChat
from autogen_agentchat.ui import Console
from autogen_core.tools import FunctionTool
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [180]:
from semanticscholar import SemanticScholar
from markitdown import MarkItDown
import requests
import os
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
import arxiv
import ast 
from IPython.display import display, Markdown

In [39]:
load_dotenv("./.env")
openai_api_key = os.environ.get("OPENAI_API_KEY")

## Plan for Literature review 

1. Determine larger area of research and focused area of research. Focused area of research is the area of particular interest of the query. Larger area of research is the more general area which includes focus area as a part.
2. Extract systematic literature reviews of larger area of research. Out of article body extract paragraphs and references which characterize state of the art in focused area of research.
3. Summarise state of the art for focus area of research.
4. Define strongest research groups. To do it search in references for focus area of research often appearing author names with special attention to first and last article authors. Combine often caithoring authors in reserarch groups. 
5. Extract latest publications from strongest research groups and based on this publications describe scientific approach of every research group. 
6. Compare research group approach looking for similarities and differencies in approach. 


In [69]:
def s2_search(query: str, max_results: int = 2) -> list:  # type: ignore[type-arg]
    """
    Search Semantic scholar by keywords and return the results including abstracts.
    """
    from semanticscholar import SemanticScholar

    sch = SemanticScholar()
    
    search = sch.search_paper(query=query, limit = max_results)

    results = []
    for paper in search.items:
        try:
            results.append(
                {
                    "title": paper.title,
                    "authors": [author.name for author in paper.authors],
                    "published": paper.publicationDate.strftime("%Y-%m-%d"),
                    "abstract": paper.abstract,
                    "pdf_url": paper.openAccessPdf["url"]
                }
            )
        except:
            continue

    # # Write results to a file
    # with open('s2_search_results.json', 'w') as f:
    #     json.dump(results, f, indent=2)

    return results

In [41]:
def download_pdf_file(url: str) -> str:
    """
    Download PDF from given URL to local directory.
    :param url: The url of the PDF file to be downloaded
    :return: path of the downloaded file, empty string if download failed
    """

    # Request URL and get response object
    response = requests.get(url, stream=True)

    # isolate PDF filename from URL
    pdf_file_name = os.path.basename(url)
    if response.status_code == 200:
        # Save in current working directory
        filepath = os.path.join(os.getcwd(), pdf_file_name)
        if not filepath.endswith(".pdf"):
            filepath = filepath+".pdf"
        with open(filepath, 'wb') as pdf_object:
            pdf_object.write(response.content)
            print(f'{pdf_file_name} was successfully saved!')
            return filepath
    else:
        print(f'Could not download {pdf_file_name},')
        print(f'HTTP response status code: {response.status_code}')
        return None

In [42]:
def convert_pdf_to_markdown(path: str) -> str:
    """
    Convert provided pdf file to markdown text
    :param path: Local path to pdf file
    :return: path to output markdown file
    """
    md = MarkItDown(
        # llm_client=client,
        # llm_model="gpt-4o",
        # llm_prompt="Extract text from pdf file with OCR and return well-formatted Markdown. Exclude footers, page numbers, watermarks.",
    )
    result = md.convert(path)
    outpath = Path(path).with_suffix(".md")
    with open(outpath, 'w') as pdf_object:
            pdf_object.write(result.markdown)
    return result.markdown

In [188]:
#def arxiv_search(query: str, max_results: int = 2, min_year: int = 2020, max_year: int = 2025) -> list:  # type: ignore[type-arg]
def arxiv_search(query: str, max_results: int = 2) -> list:  # type: ignore[type-arg]
    """
    Search Arxiv for papers and return the results including abstracts and full text.
    :param query: query to search in arxiv
    :param max_results: maximum number of articles returned
    :return: path to output markdown file
    """

    client = arxiv.Client(
        page_size=max_results,
        delay_seconds=5,
        num_retries=3
    )
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)

    results = []
    for paper in client.results(search):

        try:

            #extract full text
            md_result = None
            if paper.pdf_url is not None : 
                pdf_file =  download_pdf_file(paper.pdf_url)
                if pdf_file is not None : 
                    md = MarkItDown()
                    md_result = md.convert(pdf_file).markdown
                
            
            results.append(
                {
                    "title": paper.title,
                    "authors": [author.name for author in paper.authors],
                    "published": paper.published.strftime("%Y-%m-%d"),
                    "abstract": paper.summary,
                    "full_text": md_result
                }
            )
        # except:
        #     # Here need to be some warning
        #     pass
        finally:
            if pdf_file is not None:
                os.remove(pdf_file)

    # # Write results to a file
    # with open('arxiv_search_results.json', 'w') as f:
    #     json.dump(results, f, indent=2)

    return results

In [197]:
# client = arxiv.Client(
#     page_size=5,
#     delay_seconds=5,
#     num_retries=3
# )

# search = arxiv.Search(query="recent advances in deep learning optimizers with an accent on novel optimization algorythms", max_results=5, sort_by=arxiv.SortCriterion.Relevance)

# search_iter = client.results(search)

In [None]:
# p1 = next(search_iter)

In [30]:
download_pdf_file("https://arxiv.org/pdf/2012.06469v1")

2012.06469v1 was successfully saved!


'/home/jovyan/work/LitReview/2012.06469v1.pdf'

In [45]:
res = convert_pdf_to_markdown('/home/jovyan/work/LitReview/2012.06469v1.pdf')

In [54]:
client = arxiv.Client(
        page_size=5,
        delay_seconds=5,
        num_retries=3
    )
search = arxiv.Search(query="recent advances in deep learning optimizers", max_results=5, sort_by=arxiv.SortCriterion.Relevance)
res = client.results(search)

In [71]:
s2_search_tool = FunctionTool(
    s2_search, description="Search Semantic scholar for papers by keywords, returns found papers including abstracts"
)
arxiv_search_tool = FunctionTool(
    arxiv_search, description="Search Arxiv for papers related to a given topic, including abstracts"
)


In [67]:
arxiv_search("recent advances in deep learning optimizers",max_results=2)

2105.04026v2 was successfully saved!
2306.11113v2 was successfully saved!


[{'title': 'The Modern Mathematics of Deep Learning',
  'authors': ['Julius Berner',
   'Philipp Grohs',
   'Gitta Kutyniok',
   'Philipp Petersen'],
  'published': '2021-05-09',
  'abstract': 'We describe the new field of mathematical analysis of deep learning. This field emerged around a list of research questions that were not answered within the classical framework of learning theory. These questions concern: the outstanding generalization power of overparametrized neural networks, the role of depth in deep architectures, the apparent absence of the curse of dimensionality, the surprisingly successful optimization performance despite the non-convexity of the problem, understanding what features are learned, why deep architectures perform exceptionally well in physical problems, and which fine aspects of an architecture affect the behavior of a learning task in which way. We present an overview of modern approaches that yield partial answers to these questions. For selected approach

In [90]:
model_client = OpenAIChatCompletionClient(
    model="o3-mini",
    api_key = openai_api_key
)

    s2_search_agent = AssistantAgent(
        name="Semantic_Scholar_Search_Agent",
        tools=[s2_search_tool],
        model_client=model_client,
        description="An agent that can search Semantic scholar paper database using keywords related to given topic",
        system_message="You are a helpful AI assistant. Solve tasks using your tools.",
    )
    
    arxiv_search_agent = AssistantAgent(
        name="Arxiv_Search_Agent",
        tools=[arxiv_search_tool],
        model_client=model_client,
        description="An agent that can search Arxiv for papers related to a given topic, including abstracts and full text",
        system_message="You are a helpful AI assistant. Solve tasks using your tools. Specifically, you can take into consideration the user's request and craft a search query that is most likely to return relevant academic papers.",
    )
    
    summarizer_agent = AssistantAgent(
        name="Summarizer_Agent",
        model_client=model_client,
        description="An agent that can summarize one scientific paper in a time. The paper should be provided as title, list of authors, abstaract and full text. Summarization will be build in context of general task query",
        system_message="You are a helpful AI assistant. Summarize content of scientific paper provided in no more that 2000 words. Build a summarization in context of goal of literature search",
    )
    
    report_agent = AssistantAgent(
        name="Report_Agent",
        model_client=model_client,
        description="Generate a report based on a given topic",
        system_message="You are a helpful assistant. Your task is to synthesize data extracted into a high quality literature review including CORRECT references. You MUST write a final report that is formatted as a literature review with CORRECT references.  Your response should end with the word 'TERMINATE'",
    )

In [91]:
termination = TextMentionTermination("TERMINATE")
team = MagenticOneGroupChat(
    participants=[arxiv_search_agent, summarizer_agent, report_agent], 
    termination_condition=termination,
    model_client = model_client
)

In [None]:
await Console(
    team.run_stream(
        task="Make a search of 3 recent publications and write a literature review on recent advances in deep learning optimizers with an accent on novel optimization algorythms",
    )
);

#await model_client.close()

## No team application

In [169]:
arxiv_search_agent = AssistantAgent(
    name="Arxiv_Search_Agent",
    tools=[arxiv_search_tool],
    model_client=model_client,
    description="An agent that can search Arxiv for papers related to a given topic, including abstracts and full text",
    system_message="You are a helpful AI assistant. Given the user input define the search topic for scientific articles in arxiv, and search arxive for the topic. return list of found articles and topic",
)

summarizer_agent = AssistantAgent(
    name="Summarizer_Agent",
    model_client=model_client,
    description="An agent that can summarize one scientific paper in a time. The paper is provided as title, list of authors, abstaract and full text. Summarization is to be builtin context of provided user query",
    system_message="You are a helpful AI assistant. Summarize content of scientific paper provided in no more that 2000 words.  " +
    "The paper is provided as title, list of authors, abstaract and full text. Summarization is to be built in context of provided user query",
)

report_agent = AssistantAgent(
    name="Report_Agent",
    model_client=model_client,
    description="Generate a report based on a given topic",
    system_message="You are a helpful assistant. Your task is to synthesize data extracted into a high quality literature review containing no more than 5000 words including CORRECT references. You MUST write a final report that is formatted as a literature review with CORRECT references.",
)

In [181]:
task = "Make a search of 10 recent publications and write a literature review on recent advances in deep learning optimizers with an accent on novel optimization algorythms"

In [182]:
arxiv_search = await arxiv_search_agent.run(task = task)

2008.05730v1 was successfully saved!
2105.04026v2 was successfully saved!
2306.11113v2 was successfully saved!
2504.20096v1 was successfully saved!
2301.00942v1 was successfully saved!
2007.15745v3 was successfully saved!
Could not download 2302.09566v2,
HTTP response status code: 404
1903.03040v2 was successfully saved!
2103.07585v1 was successfully saved!
1910.08476v2 was successfully saved!


In [183]:
paper_list = ast.literal_eval(arxiv_search.messages[-1].content)

In [184]:
#paper_list[0]["topic"] = task
for paper in paper_list:
    paper["topic"] = task
    paper_summary = await summarizer_agent.run(
        task = f"Write a summary of provided article : {paper_list[0]}",
    )
    paper["summary"] = paper_summary.messages[-1].content
    await summarizer_agent.on_reset(None)

In [185]:
for paper in paper_list:
    del paper["full_text"]
    del paper["topic"]

In [186]:
lit_review_output = await report_agent.run(task = f"Synthesize literature revew based ion provided topic \"{task}\" and gathered data {paper_list}")

In [187]:
print(lit_review_output.messages[-1].content)

Below is a literature review that synthesizes recent advances in deep learning optimizers, with a special emphasis on novel optimization algorithms. In what follows, we review advances spanning adaptive gradient methods, surrogate‐based and active learning strategies, second‐order techniques, and connections with reinforcement learning. This survey builds on and integrates insights from ten recent publications from 2019 to 2025.

──────────────────────────────
1. Introduction

Deep learning has transformed many domains by enabling the solution of highly complex problems. At the same time, the success of deep learning critically depends on optimization algorithms that are both efficient and robust. Early methods—such as stochastic gradient descent (SGD)—have been augmented with adaptive schemes (e.g., Adam) and momentum techniques to tackle issues of high variance and slow convergence. More recently, novel approaches have emerged that not only refine per-iteration update rules but also 

In [168]:
print(paper_list[1]["summary"])

Below is a literature‐style summary of the article “Iterative Surrogate Model Optimization (ISMO): An active learning algorithm for PDE constrained optimization with deep neural networks” written in the context of recent advances in deep learning optimizers and novel optimization algorithms. This summary is meant both to capture the contributions of ISMO and to situate its ideas alongside other recent publications in the field.

──────────────────────────────
1. Background and Motivation

Traditional optimization methods for problems constrained by partial differential equations (PDEs) face a major computational bottleneck: each function evaluation (and its derivatives) requires solving a PDE numerically, a task that is often extremely expensive. To overcome this challenge, surrogate models—especially deep neural networks (DNNs)—have been used to approximate the mapping y → L(y) from design parameters y (which might be high-dimensional) to observables L(y) (which are outputs of PDE sol

# Literature Review on Recent Advances in Machine Learning Optimizers

The field of optimization in machine learning has seen significant advancements in recent years. These developments include novel algorithms, techniques for integrating machine learning with traditional optimization methods, and applications across various domains. This literature review synthesizes the key findings from recent studies highlighting innovative approaches to optimization problems in machine learning.

## 1. Novel Optimization Techniques

One notable advancement is the integration of Optimal Transport (OT) theory into machine learning optimization methodologies. OT provides a probabilistic framework for comparing probability distributions, which has proven useful for data-driven tasks such as generative modeling and transfer learning. Montesuma et al. (2023) discussed significant contributions in this area, emphasizing recent innovations in computational OT methods and their implications for various aspects of machine learning, including supervised and unsupervised learning paradigms (Montesuma, E. F., Mboula, F. N., & Souloumiac, A. (2023). *Recent Advances in Optimal Transport for Machine Learning*. arXiv:2306.16156).

Further, recent studies have harnessed machine learning to enhance conventional optimization techniques, specifically in areas like model predictive control (MPC). E et al. (2022) highlighted how machine learning can empower control solvers, addressing challenges inherent in tackling complex optimal control problems (E, W., Han, J., & Long, J. (2022). *Empowering Optimal Control with Machine Learning: A Perspective from Model Predictive Control*. arXiv:2205.07990).

## 2. Constrained Optimization

The challenge of integrating machine learning with combinatorial optimization is another area garnering attention. Kotary et al. (2021) provided a comprehensive survey on leveraging machine learning to solve constrained optimization problems. They emphasized the potential for hybrid models that can generate fast, approximate solutions through the combination of machine learning architectures and combinatorial optimizers (Kotary, J., Fioretto, F., Van Hentenryck, P., & Wilder, B. (2021). *End-to-End Constrained Optimization Learning: A Survey*. arXiv:2103.16378).

## 3. Process Optimization

The application of machine learning in process optimization, especially within chemical engineering, has also seen notable advances. Mitrai and Daoutidis (2024) reviewed strategies for automating the selection and tuning of optimization algorithms based on learned behaviors from numerical solvers. They discussed the importance of representing decision-making problems effectively for machine learning tasks (Mitrai, I., & Daoutidis, P. (2024). *Accelerating Process Control and Optimization via Machine Learning: A Review*. arXiv:2412.18529).

## 4. The Interplay Between Machine Learning and Mathematical Programming

The integration of machine learning and mathematical programming has opened up new avenues for optimization under uncertainty. Ning and You (2019) explored how data-driven optimization can synergistically link machine learning with traditional optimization frameworks, facilitating better decision-making under uncertain conditions. Their review covered various data-driven approaches, identifying critical research opportunities in this emerging inter-disciplinary field (Ning, C., & You, F. (2019). *Optimization under Uncertainty in the Era of Big Data and Deep Learning: When Machine Learning Meets Mathematical Programming*. arXiv:1904.01934).

## 5. Hybrid Models and Future Directions

The emergent trend toward hybrid models that combine classical optimization techniques with machine learning is becoming increasingly prominent. These models have demonstrated their capability in tackling intricate systems where conventional methods fall short. As research continues to evolve, scholars advocate for more adaptive algorithms that can learn from their performance dynamically, which has significant implications for real-time optimization scenarios.

## Conclusion

In conclusion, recent advancements in machine learning optimizers reveal a clear trajectory toward the integration of traditional optimization strategies with machine learning innovations. This synthesis of techniques not only enhances the efficiency of optimization processes but also broadens their applicability across diverse fields. Future research is expected to delve deeper into hybrid models, the interactions between learning and optimization, and uncertainty handling mechanisms in complex environments.

### References

E, W., Han, J., & Long, J. (2022). Empowering Optimal Control with Machine Learning: A Perspective from Model Predictive Control. *arXiv:2205.07990*.  

Mitrai, I., & Daoutidis, P. (2024). Accelerating Process Control and Optimization via Machine Learning: A Review. *arXiv:2412.18529*.  

Montesuma, E. F., Mboula, F. N., & Souloumiac, A. (2023). Recent Advances in Optimal Transport for Machine Learning. *arXiv:2306.16156*.  

Ning, C., & You, F. (2019). Optimization under Uncertainty in the Era of Big Data and Deep Learning: When Machine Learning Meets Mathematical Programming. *arXiv:1904.01934*.

Kotary, J., Fioretto, F., Van Hentenryck, P., & Wilder, B. (2021). End-to-End Constrained Optimization Learning: A Survey. *arXiv:2103.16378*.


In [78]:
import requests

response = requests.get(
    "https://api.aimlapi.com/models",
    headers={"Accept":"*/*"},
)

data = response.json()

In [79]:
import json
import pandas as pd
df_models = pd.DataFrame(json.loads(response.content)["data"])

In [80]:
df_models = pd.concat([df_models,pd.json_normalize(df_models["info"])], axis = 1).drop("info", axis = "columns")

In [82]:
df_models[:40]

Unnamed: 0,id,type,features,endpoints,name,developer,description,contextLength,maxTokens,url,docs_url
0,openai/gpt-4o,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4o,Open AI,"GPT-4o integrates text, vision, and audio for ...",128000.0,16384.0,https://aimlapi.com/models/chat-gpt-4-omni,https://docs.aimlapi.com/api-references/text-m...
1,gpt-4o-2024-08-06,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4o 2024-08-06,Open AI,Multimodal AI model by OpenAI enhancing human-...,128000.0,16384.0,https://aimlapi.com/models/gpt-4o-2024-08-06-api,https://docs.aimlapi.com/api-references/text-m...
2,gpt-4o-2024-05-13,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4o 2024-05-13,Open AI,GPT-4o-2024-05-13 is the initial release versi...,128000.0,4096.0,https://aimlapi.com/models/gpt-4o-2024-05-13-api,https://docs.aimlapi.com/api-references/text-m...
3,gpt-4o-mini,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4o mini,Open AI,"GPT-4o Mini: Cost-efficient, advanced model fo...",128000.0,16384.0,https://aimlapi.com/models/chat-gpt-4o-mini,https://docs.aimlapi.com/api-references/text-m...
4,gpt-4o-mini-2024-07-18,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4o mini 2024-07-18,Open AI,"GPT-4o Mini: Cost-efficient, advanced model fo...",128000.0,16384.0,https://aimlapi.com/models/chat-gpt-4o-mini,https://docs.aimlapi.com/api-references/text-m...
5,chatgpt-4o-latest,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",ChatGPT 4o latest,Open AI,Multimodal AI model by OpenAI enhancing human-...,128000.0,16384.0,https://aimlapi.com/models/gpt-4o-2024-08-06-api,https://docs.aimlapi.com/api-references/text-m...
6,gpt-4-turbo,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4 turbo,Open AI,High-speed AI model for instant language proce...,128000.0,4096.0,https://aimlapi.com/models/chat-gpt-4-turbo,https://docs.aimlapi.com/api-references/text-m...
7,gpt-4-turbo-2024-04-09,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4 turbo 2024-04-09,Open AI,,128000.0,4096.0,,https://docs.aimlapi.com/api-references/text-m...
8,gpt-4,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4,Open AI,Revolutionary AI model for unparalleled natura...,8000.0,8192.0,https://aimlapi.com/models/chat-gpt-4,https://docs.aimlapi.com/api-references/text-m...
9,gpt-4-0125-preview,chat-completion,"[openai/chat-completion, openai/response-api, ...","[/v1/chat/completions, /v1/responses]",GPT 4 0125 preview,Open AI,,8000.0,4096.0,,https://docs.aimlapi.com/api-references/text-m...


In [78]:
df_models.iloc[220]["description"]

'Aura is a real-time TTS model with human-like voices for conversational AI applications.'