In [104]:
%pip install Jinja2 markdown

Collecting markdown
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Downloading Markdown-3.7-py3-none-any.whl (106 kB)
Installing collected packages: markdown
Successfully installed markdown-3.7
Note: you may need to restart the kernel to use updated packages.


In [173]:
from huggingface_hub import HfApi
from tavily import TavilyClient
from openai import OpenAI
import arxiv
from bs4 import BeautifulSoup
import markdown
from IPython.display import Markdown
from itertools import chain
import pandas as pd
import re
from inspect import signature

In [174]:
TAVILY_API_KEY = ""
OPENAI_API_KEY = ""

In [175]:
hf_api = HfApi()
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
arxiv_client = arxiv.Client()
llm_client = OpenAI(api_key = OPENAI_API_KEY)

In [176]:
def log_tool_request(request):
    print(f"< REQ {request}")

def log_tool_response(response):
    print(f"> RSP {response}")

In [177]:
def log_tool_invocation(description=None):
    def decorator(func):
        def wrapper(*args, **kwargs):
            # Extract argument names and values
            func_sig = signature(func)
            bound_args = func_sig.bind(*args, **kwargs)
            bound_args.apply_defaults()
            args_str = ", ".join(f"{key}={value}" for key, value in bound_args.arguments.items())
            
            log_tool_request(f"{description} {args_str}")
            
            result = func(*args, **kwargs)
            log_tool_response(result)
            return result
        return wrapper
    return decorator

In [178]:
@log_tool_invocation("Retrieving top large models from HuggingFace Hub")
def get_top_models(n = 10):
    models = hf_api.list_models(sort="trending_score", limit=n)
    return [model.modelId for model in models]

In [179]:
def format_datetime_readable(dt):
    return dt.strftime('%d %B %Y at %H:%M:%S %Z')

In [180]:
def read_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

In [181]:
@log_tool_invocation("Retrieving model information from HuggingFace Hub")
def get_model_info_from_hugging_face(model_id):
    model_info = hf_api.model_info(repo_id=model_id, expand=["createdAt", "downloads", "likes", "trendingScore"])

    description = read_file(hf_api.hf_hub_download(model_id, 'README.md'))

    model_info = {
        "model_id": model_id,
        "created_at": format_datetime_readable(model_info.created_at),
        "downloads": model_info.downloads,
        "likes": model_info.likes,
        "trending_score": model_info.trending_score,
        "description": description
        }
    return model_info

In [182]:
@log_tool_invocation("Retrieving model information on the web using Tavily")
def get_model_info_on_the_web(model_id):
    response = tavily_client.search(model_id)
    return response['results']

In [183]:
@log_tool_invocation("Retrieving model information on arxiv documents using Tavily")
def get_model_info_on_arxiv(model_id):
    response = tavily_client.search(model_id, include_domains=["arxiv.org"], max_results=10)
    return response['results']

In [184]:
def get_arxiv_ids_from_search_results(search_results):
    arxiv_ids = set()
    for result in search_results:
        url = result['url']

        # Extract the ArXiv ID from the URL
        match = re.search(r'arxiv\.org/(abs|pdf|html)/([\d.]+)(v\d+)?', url)
        if match:
            arxiv_id = match.group(2)
            arxiv_ids.add(arxiv_id)
    return arxiv_ids

In [185]:
@log_tool_invocation("Retrieving paper information using Arxiv API")
def get_arxiv_links_by_id(arxiv_id):
    search = arxiv.Search(id_list=[arxiv_id])
    for paper in arxiv_client.results(search):
        return f"[{paper.title}]({paper})" # assume there is only one paper
    return None

In [186]:
@log_tool_invocation("Calling OpenAI gpt-4o-mini model")
def call_llm(system_prompt, message):
    completion = llm_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
            {"role": "user", "content": [{"type": "text", "text": message}]}
        ])
    return completion.choices[0].message.content

In [194]:
def remove_markdown_formatting(markdown_content):
    html_content = markdown.markdown(markdown_content)
    
    # Use BeautifulSoup to remove HTML tags and get plain text
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)
    return text[:50_000] # trim so that LLM calls don't fail

In [188]:
# SYSTEM_PROMPT = """You are a helpful assistant that performs short and concise summarization of the large machine learning models.
# Below you would see the contents of the README FILE (MODEL CARD), as well as the WEB SEARCH RESULTS for this model.
# Use both of these sources to construct the summary, but prioritize web search results.
# Use bullets to summarize the model competitive characteristics, and pay special attention to mention those characteristics which best differenciate this model from any others."""

# SYSTEM_PROMPT = """Summarize using bullets."""

SYSTEM_PROMPT = """Given the information below, summarize the large machine learning model competitive characteristics (how it differentiates from any other model) using no more than 10 single-level bullets. Only output these bullets, not any extra text.

Example of the required output:
* Characteristic 1
* Characteristic 2
* Characteristic 3
..."""

def get_model_competitive_overview(model_description, model_web_info):
    model_info = f"# WEB SEARCH RESULTS\n{model_web_info}\n\n# README FILE (MODEL CARD)\n{remove_markdown_formatting(model_description)}"
    return call_llm(SYSTEM_PROMPT, model_info)

In [189]:
def get_web_links_from_search_results(search_results):
    return set([f"[{result['title']}]({result['url']})" for result in search_results])

In [161]:
get_web_links_from_search_results(web_info)

{'[DeepSeek v3 - Advanced AI & LLM Model Online](https://deepseekv3.org/)',
 '[DeepSeek-V3 Technical Report - arXiv.org](https://arxiv.org/pdf/2412.19437)',
 '[DeepSeek-V3, ultra-large open-source AI, outperforms ... - VentureBeat](https://venturebeat.com/ai/deepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch/)',
 '[Introducing DeepSeek-V3 | DeepSeek API Docs](https://api-docs.deepseek.com/news/news1226)',
 '[deepseek-ai/DeepSeek-V3 at main - Hugging Face](https://huggingface.co/deepseek-ai/DeepSeek-V3/tree/main)'}

In [134]:
web_info

{'query': 'deepseek-ai/DeepSeek-V3',
 'follow_up_questions': None,
 'answer': None,
 'images': [],
 'results': [{'title': 'DeepSeek-V3 is Now The Best Open Source AI Model - Analytics India Magazine',
   'url': 'https://analyticsindiamag.com/ai-news-updates/deepseek-v3-is-the-best-open-source-ai-model/',
   'content': 'DeepSeek-V3 is Now The Best Open Source AI Model In AI News DeepSeek-V3 is Now The Best Open Source AI Model DeepSeek, a Chinese AI research lab backed by High-Flyer Capital Management has released DeepSeek-V3, the latest version of their frontier model. DeepSeek AI also released the benchmark scores, and it outperformed Meta’s flagship Llama 3.1 405B parameter model, among many other closed-source models. DeepSeek-V3 is Now The Best Open Source AI Model Rising 2025 | DE&I in Tech & AI AI Startups Conference. AI Forum for India ADaSci Corporate training program on Generative AI provides a unique opportunity to empower, retain and advance your talent AIM Research produces

In [125]:
output = get_model_competitive_overview(get_model_info_from_hugging_face("deepseek-ai/DeepSeek-V3")['description'], web_info)

In [130]:
print(output.choices[0].message.content)

* 671 billion total parameters with a Mixture-of-Experts architecture.
* 37 billion activated parameters for efficient resource utilization.
* Introduces an auxiliary-loss-free load balancing strategy.
* Utilizes Multi-Token Prediction for enhanced performance and inference acceleration.
* Trained on a diverse dataset of 14.8 trillion tokens for comprehensive understanding.
* Achieves performance superior to other open-source models and rivals closed-source counterparts.
* Requires only 2.788 million GPU hours for full training, highlighting efficiency.
* Offers stable training with no incidents of loss spikes or rollbacks.
* Supports multiple inference frameworks, including SGLang, LMDeploy, and TensorRT-LLM.
* Capable of handling context windows up to 128k tokens for versatile applications.


In [170]:
top_models = get_top_models()
print("\n".join(top_models))

< REQ Retrieving top large models from HuggingFace Hub n=10
> RSP ['deepseek-ai/DeepSeek-V3', 'PowerInfer/SmallThinker-3B-Preview', 'deepseek-ai/DeepSeek-V3-Base', 'black-forest-labs/FLUX.1-dev', 'hexgrad/Kokoro-82M', 'meta-llama/Llama-3.3-70B-Instruct', 'StephanST/WALDO30', 'nomic-ai/modernbert-embed-base', 'cognitivecomputations/Dolphin3.0-Llama3.1-8B', 'stabilityai/stable-diffusion-3.5-large']
deepseek-ai/DeepSeek-V3
PowerInfer/SmallThinker-3B-Preview
deepseek-ai/DeepSeek-V3-Base
black-forest-labs/FLUX.1-dev
hexgrad/Kokoro-82M
meta-llama/Llama-3.3-70B-Instruct
StephanST/WALDO30
nomic-ai/modernbert-embed-base
cognitivecomputations/Dolphin3.0-Llama3.1-8B
stabilityai/stable-diffusion-3.5-large


In [13]:
get_model_info_from_hugging_face("deepseek-ai/DeepSeek-V3")

{'model_id': 'deepseek-ai/DeepSeek-V3',
 'created_at': '25 December 2024 at 12:52:23 UTC',
 'downloads': 71747,
 'likes': 1321,
 'trending_score': 599,
 'description': '<!-- markdownlint-disable first-line-h1 -->\n<!-- markdownlint-disable html -->\n<!-- markdownlint-disable no-duplicate-header -->\n\n<div align="center">\n  <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek-V3" />\n</div>\n<hr>\n<div align="center" style="line-height: 1;">\n  <a href="https://www.deepseek.com/" target="_blank" style="margin: 2px;">\n    <img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" style="display: inline-block; vertical-align: middle;"/>\n  </a>\n  <a href="https://chat.deepseek.com/" target="_blank" style="margin: 2px;">\n    <img alt="Chat" src="https://img.shields.io/badge/🤖%20Chat-DeepSeek%20V3-536af5?color=536af5&logoColor=white" style="display: inline-block; vertical-align

In [137]:
web_info = get_model_info_on_the_web("deepseek-ai/DeepSeek-V3")

In [138]:
web_info

[{'title': 'DeepSeek-V3, ultra-large open-source AI, outperforms ... - VentureBeat',
  'url': 'https://venturebeat.com/ai/deepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch/',
  'content': 'DeepSeek-V3, ultra-large open-source AI, outperforms Llama and Qwen on launch | VentureBeat DeepSeek-V3, ultra-large open-source AI, outperforms Llama and Qwen on launch Chinese AI startup DeepSeek, known for challenging leading AI vendors with its innovative open-source technologies, today released a new ultra-large model: DeepSeek-V3. According to benchmarks shared by DeepSeek, the offering is already topping the charts, outperforming leading open-source models, including Meta’s Llama 3.1-405B, and closely matching the performance of closed models from Anthropic and OpenAI. Despite the economical training, DeepSeek-V3 has emerged as the strongest open-source model in the market. The company ran multiple benchmarks to compare the performance of the AI and noted that it conv

In [164]:
arxiv_search_results = get_model_info_on_arxiv("deepseek-ai/DeepSeek-V3")
arxiv_ids = get_arxiv_ids_from_search_results(arxiv_search_results)
print([get_arxiv_links_by_id(arxiv_id) for arxiv_id in arxiv_ids])

['[DeepSeek-VL: Towards Real-World Vision-Language Understanding](http://arxiv.org/abs/2403.05525v2)', '[DeepSeek-V3 Technical Report](http://arxiv.org/abs/2412.19437v1)', '[DeepSeek LLM: Scaling Open-Source Language Models with Longtermism](http://arxiv.org/abs/2401.02954v1)']


TODO:
* Add logs to all the tool invocations
* Write code to combine all calls
* Call LLM to summarize information for each model

In [190]:
def large_models_market_analysis_agent():
    top_models = get_top_models()

    models_info = {}
    for model_id in top_models:
        models_info[model_id] = get_model_info_from_hugging_face(model_id)

        models_info[model_id]['web_info'] = get_model_info_on_the_web(model_id)
        models_info[model_id]['web_links'] = get_web_links_from_search_results(models_info[model_id]['web_info'])

        arxiv_search_results = get_model_info_on_arxiv(model_id)
        arxiv_ids = get_arxiv_ids_from_search_results(arxiv_search_results)
        models_info[model_id]['papers'] = [get_arxiv_links_by_id(arxiv_id) for arxiv_id in arxiv_ids]

        models_info[model_id]['competitive_overview'] = get_model_competitive_overview(models_info[model_id]['description'], models_info[model_id]['web_info'])

    return models_info

In [191]:
def get_comparison_table(models_info):
    required_columns = {
        "created_at": "Created At",
        "downloads": "Total Downloads",
        "likes": "Total Likes",
        "trending_score": "Trending Score",
    }
    
    # Extract only the required columns
    filtered_data = [
        {**{"Model Name": model}, **{required_columns[key]: details.get(key) for key in required_columns}}
        for model, details in models_info.items()
    ]
    
    return pd.DataFrame(filtered_data)

In [192]:
def get_models_overview(models_info):
    md_content = ""

    for model_id, details in models_info.items():
        md_content += f"\n# {model_id}\n\n"
        md_content += details['competitive_overview']
        md_content += f"\n\nMentioned in the following web pages:\n"
        for web_link in details['web_links']:
            md_content += f"* {web_link}\n"
        md_content += f"\nMentioned in the following papers:\n"
        for paper in details['papers']:
            md_content += f"* {paper}\n"
    return md_content

In [None]:
models_info = large_models_market_analysis_agent()

< REQ Retrieving top large models from HuggingFace Hub n=10
> RSP ['deepseek-ai/DeepSeek-V3', 'PowerInfer/SmallThinker-3B-Preview', 'deepseek-ai/DeepSeek-V3-Base', 'black-forest-labs/FLUX.1-dev', 'hexgrad/Kokoro-82M', 'meta-llama/Llama-3.3-70B-Instruct', 'StephanST/WALDO30', 'nomic-ai/modernbert-embed-base', 'cognitivecomputations/Dolphin3.0-Llama3.1-8B', 'stabilityai/stable-diffusion-3.5-large']
< REQ Retrieving model information from HuggingFace Hub model_id=deepseek-ai/DeepSeek-V3
> RSP {'model_id': 'deepseek-ai/DeepSeek-V3', 'created_at': '25 December 2024 at 12:52:23 UTC', 'downloads': 74084, 'likes': 1412, 'trending_score': 612, 'description': '<!-- markdownlint-disable first-line-h1 -->\n<!-- markdownlint-disable html -->\n<!-- markdownlint-disable no-duplicate-header -->\n\n<div align="center">\n  <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek-V3" />\n</div>\n<hr>\n<div align="center" style="line-height: 1;">\

In [153]:
get_comparison_table(models_info).style.hide(axis='index')

Model Name,Created At,Total Downloads,Total Likes,Trending Score
deepseek-ai/DeepSeek-V3,25 December 2024 at 12:52:23 UTC,74084,1411,611
PowerInfer/SmallThinker-3B-Preview,12 December 2024 at 11:56:09 UTC,6996,288,217
deepseek-ai/DeepSeek-V3-Base,25 December 2024 at 12:52:06 UTC,8663,1180,188
black-forest-labs/FLUX.1-dev,31 July 2024 at 21:13:44 UTC,1166389,7799,181
hexgrad/Kokoro-82M,26 December 2024 at 00:20:08 UTC,1479,269,168
meta-llama/Llama-3.3-70B-Instruct,26 November 2024 at 16:08:47 UTC,416929,1516,127
StephanST/WALDO30,02 October 2024 at 14:20:40 UTC,0,163,102
nomic-ai/modernbert-embed-base,29 December 2024 at 23:51:30 UTC,4837,135,89
cognitivecomputations/Dolphin3.0-Llama3.1-8B,29 December 2024 at 18:37:00 UTC,242,82,82
stabilityai/stable-diffusion-3.5-large,22 October 2024 at 07:29:57 UTC,127483,1807,66


In [160]:
display(Markdown(get_models_overview(models_info)))


# deepseek-ai/DeepSeek-V3

TBD

Mentioned in the following web pages:
* [DeepSeek v3 - Advanced AI & LLM Model Online](https://deepseekv3.org/)
* [DeepSeek-V3 Technical Report - arXiv.org](https://arxiv.org/pdf/2412.19437)
* [DeepSeek V3: Advanced AI Language Model with 671B Parameters](https://www.deepseekv3.com/en)
* [Introducing DeepSeek-V3 | DeepSeek API Docs](https://api-docs.deepseek.com/news/news1226)
* [DeepSeek Online - Try DeepSeek V3 Free | No Registration Required](https://www.deepseekv3.net/)

Mentioned in the following papers:
* ['[DeepSeek-VL: Towards Real-World Vision-Language Understanding](http://arxiv.org/abs/2403.05525v2)']
* ['[DeepSeek-V3 Technical Report](http://arxiv.org/abs/2412.19437v1)']
* ['[DeepSeek LLM: Scaling Open-Source Language Models with Longtermism](http://arxiv.org/abs/2401.02954v1)']

# PowerInfer/SmallThinker-3B-Preview

TBD

Mentioned in the following web pages:
* [SmallThinker 3B: A Small Thinking Model Revolutionizing AI Efficiency](https://pub.towardsai.net/smallthinker-3b-a-small-thinking-model-revolutionizing-ai-efficiency-f528cf7d6906)
* [smallthinker:3b](https://registry.ollama.com/library/smallthinker:3b)
* [Testing SmallThinker 3B Preview by PowerInfer - YouTube](https://www.youtube.com/watch?v=OVNnXQp_wNU)
* [SmallThinker-由Qwen 2.5 3B而来的，全新小型推理模型](https://www.ilinkandlink.com/2025/01/02/smallthinker/)
* [SmallThinker 3B Preview By PowerInfer: Benchmarks, Features and ...](https://llm.extractum.io/model/PowerInfer/SmallThinker-3B-Preview,6YvWQRdkYbb2o0HqvU7LTJ)

Mentioned in the following papers:
* ['[PowerInfer-2: Fast Large Language Model Inference on a Smartphone](http://arxiv.org/abs/2406.06282v3)']
* ['[PowerInfer: Fast Large Language Model Serving with a Consumer-grade GPU](http://arxiv.org/abs/2312.12456v2)']

# deepseek-ai/DeepSeek-V3-Base

TBD

Mentioned in the following web pages:
* [DeepSeek-V3, ultra-large open-source AI, outperforms ... - VentureBeat](https://venturebeat.com/ai/deepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch/)
* [DeepSeek-V3 Technical Report - arXiv.org](https://arxiv.org/pdf/2412.19437)
* [Paper page - DeepSeek-V3 Technical Report - Hugging Face](https://huggingface.co/papers/2412.19437)
* [README_WEIGHTS.md · deepseek-ai/DeepSeek-V3-Base at main - Hugging Face](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/README_WEIGHTS.md)
* [deepseek-ai/DeepSeek-V3 - GitHub](https://github.com/deepseek-ai/DeepSeek-V3)

Mentioned in the following papers:
* ['[DeepSeek-VL2: Mixture-of-Experts Vision-Language Models for Advanced Multimodal Understanding](http://arxiv.org/abs/2412.10302v1)']
* ['[DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](http://arxiv.org/abs/2402.03300v3)']
* ['[DeepSeek-V3 Technical Report](http://arxiv.org/abs/2412.19437v1)']
* ['[DeepSeek LLM: Scaling Open-Source Language Models with Longtermism](http://arxiv.org/abs/2401.02954v1)']

# black-forest-labs/FLUX.1-dev

TBD

Mentioned in the following web pages:
* [black-forest-labs/FLUX.1-dev at main - Hugging Face](https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main)
* [FLUX 1.1 - BlackForestLabs](https://blackforestlabs.org/flux-1-1/)
* [README.md · black-forest-labs/FLUX.1-dev at main - Hugging Face](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/README.md)
* [FLUX.1 Dev | FLUX Dev AI Image Generator by Black Forest Labs](https://flux1ai.com/dev)
* [black-forest-labs/FLUX.1-dev - Hugging Face](https://huggingface.co/black-forest-labs/FLUX.1-dev)

Mentioned in the following papers:
* ['[SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models](http://arxiv.org/abs/2411.05007v2)']
* ['[Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation](http://arxiv.org/abs/2412.01243v1)']
* ['[OminiControl: Minimal and Universal Control for Diffusion Transformer](http://arxiv.org/abs/2411.15098v3)']
* ['[I-Max: Maximize the Resolution Potential of Pre-trained Rectified Flow Transformers with Projected Flow](http://arxiv.org/abs/2410.07536v2)']
* ['[Training-free Regional Prompting for Diffusion Transformers](http://arxiv.org/abs/2411.02395v1)']
* ['[Diffusion Beats Autoregressive: An Evaluation of Compositional Generation in Text-to-Image Models](http://arxiv.org/abs/2410.22775v1)']

# hexgrad/Kokoro-82M

TBD

Mentioned in the following web pages:
* [Hugging Face - The AI community building the future.](https://hf.wing.moe/)
* [GitHub - remsky/Kokoro-FastAPI: Dockerized FastAPI wrapper for Kokoro ...](https://github.com/remsky/Kokoro-FastAPI)
* [hexgrad/Kokoro-82M · [TODO] FP16 Inference - Hugging Face](https://huggingface.co/hexgrad/Kokoro-82M/discussions/4)
* [Kokoro 82M Installation - Best TTS Model to Run on Google Colab](https://www.youtube.com/watch?v=up-ZG35uuvQ)
* [Models - Hugging Face](https://hf.wing.moe/models)

Mentioned in the following papers:
* ['[StyleTTS: A Style-Based Generative Model for Natural and Diverse Text-to-Speech Synthesis](http://arxiv.org/abs/2205.15439v2)']
* ['[A Survey of Resource-efficient LLM and Multimodal Foundation Models](http://arxiv.org/abs/2401.08092v2)']
* ['[HEMM: Holistic Evaluation of Multimodal Foundation Models](http://arxiv.org/abs/2407.03418v1)']
* ['[Grad-TTS: A Diffusion Probabilistic Model for Text-to-Speech](http://arxiv.org/abs/2105.06337v2)']
* ['[PGTask: Introducing the Task of Profile Generation from Dialogues](http://arxiv.org/abs/2304.06634v2)']
* ['[Enhancing Inflation Nowcasting with LLM: Sentiment Analysis on News](http://arxiv.org/abs/2410.20198v1)']
* ['[StyleTTS 2: Towards Human-Level Text-to-Speech through Style Diffusion and Adversarial Training with Large Speech Language Models](http://arxiv.org/abs/2306.07691v2)']
* ['[Extending Whisper with prompt tuning to target-speaker ASR](http://arxiv.org/abs/2312.08079v2)']

# meta-llama/Llama-3.3-70B-Instruct

TBD

Mentioned in the following web pages:
* [meta / llama-3.3-70b-instruct - docs.api.nvidia.com](https://docs.api.nvidia.com/nim/reference/meta-llama-3_3-70b-instruct)
* [Llama 3.3 | Model Cards and Prompt formats](https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_3/)
* [Meta's new Llama 3.3 70B Instruct model now available on watsonx.ai - IBM](https://www.ibm.com/new/announcements/meta-s-new-llama-3-3-70b-instruct-model-now-available-on-watsonx-ai)
* [Llama 3.3 70B Instruct - API, Providers, Stats | OpenRouter](https://openrouter.ai/meta-llama/llama-3.3-70b-instruct)
* [unsloth/Llama-3.3-70B-Instruct - Hugging Face](https://huggingface.co/unsloth/Llama-3.3-70B-Instruct)

Mentioned in the following papers:
* ['[MGH Radiology Llama: A Llama 3 70B Model for Radiology](http://arxiv.org/abs/2408.11848v2)']
* ['[Domain Adaptation of Llama3-70B-Instruct through Continual Pre-Training and Model Merging: A Comprehensive Evaluation](http://arxiv.org/abs/2406.14971v1)']
* ['[The Llama 3 Herd of Models](http://arxiv.org/abs/2407.21783v3)']
* ['[Non-instructional Fine-tuning: Enabling Instruction-Following Capabilities in Pre-trained Language Models without Instruction-Following Data](http://arxiv.org/abs/2409.00096v1)']
* ['[LiveBench: A Challenging, Contamination-Free LLM Benchmark](http://arxiv.org/abs/2406.19314v1)']
* ['[Confidential Computing on NVIDIA Hopper GPUs: A Performance Benchmark Study](http://arxiv.org/abs/2409.03992v4)']

# StephanST/WALDO30

TBD

Mentioned in the following web pages:
* [Stephan Sturges on LinkedIn: WALDO 3.0 is coming along nicely! This ...](https://www.linkedin.com/posts/stephanst_waldo-30-is-coming-along-nicely-this-release-activity-7194234634816081922-SeGA)
* [WALDO/Readme.md at master · stephansturges/WALDO · GitHub](https://github.com/stephansturges/WALDO/blob/master/Readme.md)
* [WALDO30 | AI Model Details](https://www.aimodels.fyi/models/huggingFace/waldo30-stephanst)
* [Waldo30 - use with Halio - General - Hailo Community](https://community.hailo.ai/t/waldo30-use-with-halio/4832)
* [stephansturges/WALDO - GitHub](https://github.com/stephansturges/WALDO)

Mentioned in the following papers:
* ['[Dark Matter (H)eats Young Planets](http://arxiv.org/abs/2309.02495v3)']
* ['[Lagrangian Neural Networks](http://arxiv.org/abs/2003.04630v2)']
* ['[Kernel Methods for Interferometric Imaging](http://arxiv.org/abs/2412.01908v1)']
* ['[OPT: Open Pre-trained Transformer Language Models](http://arxiv.org/abs/2205.01068v4)']
* ['[A Broad-line, Low-luminosity Active Galactic Nucleus at ${z=7.3}$ Anchoring a Large Galaxy Overdensity](http://arxiv.org/abs/2411.11534v1)']
* ['[Applications of machine learning in gravitational wave research with current interferometric detectors](http://arxiv.org/abs/2412.15046v1)']
* ['[MSA-3D: Metallicity Gradients in Galaxies at $z\\sim1$ with JWST/NIRSpec Slit-stepping Spectroscopy](http://arxiv.org/abs/2409.01616v3)']
* ['[Panning for gold with the Neil Gehrels Swift Observatory: an optimal strategy for finding the counterparts to gravitational wave events](http://arxiv.org/abs/2411.05072v2)']
* ['[JWST Imaging of Edge-on Protoplanetary Disks. IV. Mid-infrared Dust Scattering in the HH 30 disk](http://arxiv.org/abs/2412.07523v1)']

# nomic-ai/modernbert-embed-base

TBD

Mentioned in the following web pages:
* [[Hands-on] RAG on Docs Using ModernBERT - by Avi Chawla](https://blog.dailydoseofds.com/p/hands-on-rag-on-docs-using-modernbert)
* [Nomic AI Launches ModernBERT-Embed-Base, Trained on... | DeepNewz](https://deepnewz.com/ai-modeling/nomic-ai-launches-modernbert-embed-base-trained-on-235-million-documents-256-f4acadfc)
* [Install ModernBERT Embed Locally - Great New RAG Model](https://www.youtube.com/watch?v=HcVav0IqZlk)
* [ModernBERT: A new improved BERT for text embeddings](https://medium.com/data-science-in-your-pocket/modernbert-a-new-improved-bert-for-text-embeddings-538239202527)
* [ModernBERT — A modernized BERT for NLP tasks | UnfoldAI](https://unfoldai.com/modernbert/)

Mentioned in the following papers:
* ['[CoRNStack: High-Quality Contrastive Data for Better Code Ranking](http://arxiv.org/abs/2412.01007v2)']
* ['[Arctic-Embed: Scalable, Efficient, and Accurate Text Embedding Models](http://arxiv.org/abs/2405.05374v1)']
* ['[Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference](http://arxiv.org/abs/2412.13663v2)']
* ['[Nomic Embed: Training a Reproducible Long Context Text Embedder](http://arxiv.org/abs/2402.01613v1)']

# cognitivecomputations/Dolphin3.0-Llama3.1-8B

TBD

Mentioned in the following web pages:
* [Dolphin3.0 Llama3.1 8B by cognitivecomputations](https://llm.extractum.io/model/cognitivecomputations/Dolphin3.0-Llama3.1-8B,7rToucpFJjB0d8WeBFJ57p)
* [dolphin-llama3](https://ollama.com/library/dolphin-llama3)
* [nchapman/dolphin3.0-llama3:8b](https://ollama.com/nchapman/dolphin3.0-llama3:8b)
* [Cognitivecomputations/Dolphin3.0-Llama3.1-8B uncensored - Hacker News](https://news.ycombinator.com/item?id=42607271)
* [Dolphin 3.0 Released (Llama 3.1 + 3.2 + Qwen 2.5): A Local-First ...](https://www.marktechpost.com/2025/01/05/dolphin-3-0-released-llama-3-1-3-2-qwen-2-5-a-local-first-steerable-ai-model-that-puts-you-in-control-of-your-ai-stack-and-alignment/)

Mentioned in the following papers:
* ['[Hermes 3 Technical Report](http://arxiv.org/abs/2408.11857v1)']
* ['[Llama Scope: Extracting Millions of Features from Llama-3.1-8B with Sparse Autoencoders](http://arxiv.org/abs/2410.20526v1)']
* ["[Extending Llama-3's Context Ten-Fold Overnight](http://arxiv.org/abs/2404.19553v1)"]
* ['[The Llama 3 Herd of Models](http://arxiv.org/abs/2407.21783v3)']
* ['[Llama 3 Meets MoE: Efficient Upcycling](http://arxiv.org/abs/2412.09952v1)']
* ['[Applying Refusal-Vector Ablation to Llama 3.1 70B Agents](http://arxiv.org/abs/2410.10871v1)']
* ['[Instruction-Tuning Llama-3-8B Excels in City-Scale Mobility Prediction](http://arxiv.org/abs/2410.23692v1)']
* ['[The Uniqueness of LLaMA3-70B Series with Per-Channel Quantization](http://arxiv.org/abs/2408.15301v2)']
* ['[EXAONE 3.0 7.8B Instruction Tuned Language Model](http://arxiv.org/abs/2408.03541v3)']

# stabilityai/stable-diffusion-3.5-large

TBD

Mentioned in the following web pages:
* [stabilityai/stable-diffusion-3.5-large · Hugging Face](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)
* [ControlNets for Stable Diffusion 3.5 Large - Stability AI](https://stability.ai/news/sd3-5-large-controlnets)
* [Stable Diffusion 3.5 Large - a Hugging Face Space by stabilityai](https://huggingface.co/spaces/stabilityai/stable-diffusion-3.5-large)
* [GitHub - Stability-AI/sd3.5](https://github.com/Stability-AI/sd3.5)
* [Introducing Stable Diffusion 3.5 - Stability AI](https://stability.ai/news/introducing-stable-diffusion-3-5)

Mentioned in the following papers:
* ['[Detecting AutoEncoder is Enough to Catch LDM Generated Images](http://arxiv.org/abs/2411.06441v1)']
* ['[Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation](http://arxiv.org/abs/2412.01243v1)']
* ['[IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models](http://arxiv.org/abs/2308.06721v1)']
* ['[Unveiling Redundancy in Diffusion Transformers (DiTs): A Systematic Study](http://arxiv.org/abs/2411.13588v1)']
* ['[Stable Diffusion is a Natural Cross-Modal Decoder for Layered AI-generated Image Compression](http://arxiv.org/abs/2412.12982v1)']
* ['[DiffusionPipe: Training Large Diffusion Models with Efficient Pipelines](http://arxiv.org/abs/2405.01248v1)']
* ['[DiffusionDB: A Large-scale Prompt Gallery Dataset for Text-to-Image Generative Models](http://arxiv.org/abs/2210.14896v4)']
* ['[Generated Faces in the Wild: Quantitative Comparison of Stable Diffusion, Midjourney and DALL-E 2](http://arxiv.org/abs/2210.00586v2)']
* ['[Context-Aware Full Body Anonymization using Text-to-Image Diffusion Models](http://arxiv.org/abs/2410.08551v2)']
* ['[Learning from Mistakes: Iterative Prompt Relabeling for Text-to-Image Diffusion Model Training](http://arxiv.org/abs/2312.16204v3)']
