In [1]:
# !pip install googlesearch-python
# !pip install -U crawl4ai

In [2]:
import crawl4ai
import os
import asyncio
import nest_asyncio
nest_asyncio.apply()
from datetime import datetime

from dotenv import load_dotenv
from googlesearch import search
from openai import OpenAI
from IPython.display import Markdown, display, update_display

from playwright.async_api import async_playwright
from crawl4ai.content_filter_strategy import PruningContentFilter
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig, CacheMode

home_dir = os.path.expanduser("~")

In [3]:
topic = 'list LLM API provider and reviews'
num_results = 10

# local_model = "qwen3-14b-mlx"
# local_api_key = 'None'
# local_base_url = 'http://127.0.0.1:1234/v1'

clean_up_model = "gemini-2.0-flash-lite"
clean_up_api_key = os.getenv('GOOGLE_API_KEY')
clean_up_base_url = 'https://generativelanguage.googleapis.com/v1beta/openai/'

report_model = "gemini-2.0-flash"
report_api_key = os.getenv('GOOGLE_API_KEY')
report_base_url = 'https://generativelanguage.googleapis.com/v1beta/openai/'

data_path = f'Reports extracted'

In [4]:
def get_list_of_url_from_google_search(topic, num_results):
    list_of_urls = []
    
    google_url = search(topic, num_results=num_results+1,unique=True, region='sg', 
                          sleep_interval=5, advanced=False, safe=None)
    for url in google_url:
        list_of_urls.append(url)
        
    return list_of_urls[1:]

async def scrape_site_clean_makedown(url):
    print(url)
    browser_config = BrowserConfig(browser_type="chromium",  # Type of browser to simulate
                                    headless=True,  # Whether to run in headless mode (no GUI)
                                    verbose=True,  # Enable verbose logging)
                                  )
    
    content_filter = PruningContentFilter()
    markdown_generator = DefaultMarkdownGenerator(content_filter=content_filter)
    config = CrawlerRunConfig(markdown_generator=markdown_generator)
    
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            scrape_data = await crawler.arun(url, config=config)
    
        return scrape_data.markdown.fit_markdown
    except:
        return ""

def call_llm(system_prompt, user_prompt, stream,
            model, base_url, api_key, temperature=0.0):

    messages = [{"role": "system", "content": system_prompt,
                "role": "user","content": user_prompt}]
    
    openai = OpenAI(base_url=base_url, 
                          api_key=api_key)
    if stream == True:

        stream_response = openai.chat.completions.create(model = model,
                                            messages = messages,
                                            stream=stream,
                                            temperature=temperature)
        
        response = ""
        display_handle = display(Markdown(""), display_id=True)
        
        for chunk in stream_response:
                response += chunk.choices[0].delta.content or ''
                response = response.replace("```","").replace("markdown", "")
                update_display(Markdown(response), 
                               display_id=display_handle.display_id)
            
    else:
        response = openai.chat.completions.create(model = model,
                                            messages = messages, 
                                            temperature=temperature)

        response = response.choices[0].message.content
        
    response_without_thinking = response.split('/think>')[-1]
    
    return response_without_thinking

def get_report(topic, num_results, folder):
    list_of_url = get_list_of_url_from_google_search(topic, num_results)

    scrape_data_list = []
    
    for url in list_of_url:
        result = asyncio.run(scrape_site_clean_makedown(url=url))
        result += f"URL : {url}"
        if len(result) >= 100:
            scrape_data_list.append(result)
    clean_data_with_llm_list = []

    for data in scrape_data_list:
        prompt_clean_up_temp = prompt_clean_up + data
        clean_data = call_llm(system_clean_up, prompt_clean_up_temp, stream = False,
                             model=clean_up_model, base_url=clean_up_base_url, api_key=clean_up_api_key)
        clean_data_with_llm_list.append(clean_data)

    extracted_text = ''
    for counter, data in enumerate(clean_data_with_llm_list):
        extracted_text += f'\nSource: {counter+1}\n'
        extracted_text += data
    
    prompt_report_temp = prompt_report + extracted_text
    report = call_llm(system_report, prompt_report_temp, stream = False,
                             model=report_model, base_url=report_base_url, api_key=report_api_key)
    
    save_response_to_txt(report, folder)

    return report

def save_response_to_txt(report, folder):
    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    counter = 0
    file_name = ''
    
    while (len(file_name) > 200) | (file_name == ''):
        file_name = report.replace('```markdown', '')\
          .replace('```', '').splitlines()[counter]\
        .replace('#', '')\
        .replace(':', '')\
        .replace('*', '').replace("''", '')
        
        counter+= 1
    
    while file_name[0]==" ":
        file_name = file_name[1:]
        
    file_name = file_name.replace('_', '')\
                            .replace('#', '')\
                            .replace(':', '')\
                            .replace('*', '')\
                            .replace('/','')\
                            .replace('|','')
                        
        
    f = open(f'{folder}/{file_name}_{now}_.txt', 'w')
    f.write(report.replace('```markdown', '').replace('```', ''))
    f.close()
    
    print(f'Saved: {folder}_{file_name}.txt \n\n')

In [5]:
system_clean_up = """You are a helpful Assistance, 
                    that clean up  from a extracted text .
                    It is very important to all include URL link.
                    
                    Respond in markdown. 
        
                    It is very important to only use the extracted text 
                    and to cross check only with the extracted text 
                    and if you don't know,
                    don't try to make up any details.

                    It is very important not to summarize and provide all details.
                    It is very important to include all URL links.
                """

prompt_clean_up = f"""Below is an extracted text from a website. 
                        help clean up from a extracted text.
                        
                        that is based on the topic: {topic}
                        
                       extracted text:
                   
            """

system_report= """You are a helpful Assistance that looks extracted text
                Based only on the extracted text analyze extracted text and write report. 
                
                Respond in markdown. 

                It is very important have list the all the URL links used at the end of the report.
                It is very important not to say 
                "based solely on the provided extracted text" or "Based on the extracted text"
                It is very important to be as detailed as possible.
                It is very important to only based it only on extracted text,
                If you don't know, don't try to make up any details

                i
                """

prompt_report = f"""Below is an extracted text, 
                    
                    write me analyze extracted text and write report in details
                    based on the topic: {topic}, 
                    
                    It is very important have list the all the URL links used at the end of the report.
                    extracted text:
                    
            """

In [6]:
report = get_report(topic, 
                    num_results, 
                    folder=data_path)

https://www.keywordsai.co/blog/top-10-llm-api-providers


https://github.com/cheahjs/free-llm-api-resources


https://medium.com/@mplsmntowers/free-api-providers-9f25b64bf194


https://www.helicone.ai/blog/llm-api-providers


https://apidog.com/blog/free-open-source-llm-apis/


https://zapier.com/blog/best-llm/


https://research.aimultiple.com/llm-pricing/


https://www.reddit.com/r/LLMDevs/comments/1dnb3ob/llm_apis_price_comparison_by_model/


https://datasciencedojo.com/blog/10-top-llm-companies/


https://konfigthis.com/blog/llmops-apis/


Saved: Reports extracted_LLM API Providers and Reviews A Comprehensive Report.txt 




In [7]:
Markdown(report)

## LLM API Providers and Reviews: A Comprehensive Report

This report analyzes the provided text to identify and review various Large Language Model (LLM) API providers. It consolidates information from multiple sources to offer a comprehensive overview of the landscape, including pricing, models offered, key features, and potential use cases.

**1. Introduction**

The increasing demand for AI-powered applications has led to a proliferation of LLM API providers. Choosing the right provider is crucial for developers and businesses looking to leverage the power of LLMs. This report aims to simplify the selection process by providing a detailed comparison of leading platforms.

**2. Top LLM API Providers**

Based on the extracted text, the following providers are identified as key players in the LLM API space:

**2.1 Paid Providers**

*   **Fireworks AI:**
    *   **What:** Generative inference platform focused on speed, scalability, and production readiness. Uses the FireAttention engine.
    *   **Why:** Low latency, stable hosting, and an active community.
    *   **Models:** Hosts hundreds of open-source models (DeepSeek v3, Llama, Qwen, Stable Diffusion). Supports Multi-LoRA fine-tuning.
    *   **Pricing:** Based on model size and complexity ($0.10 - $3.00 per million tokens).
    *   **Best for:** Speed and scalability in multi-modal AI tasks.
*   **Together AI:**
    *   **What:** High-performance inference platform with automated optimizations for open-source LLMs.
    *   **Why:** Streamlines development by handling infrastructure tasks (caching, load balancing).
    *   **Models:** Supports hundreds of open-source LLMs.
    *   **Pricing:** Pay-as-you-go.
    *   **Best for:** Large-scale model deployment with low latency and strong privacy.
*   **OpenRouter:**
    *   **What:** Unified interface providing access to a wide range of AI models (open-source and commercial) through a single API.
    *   **Why:** Flexibility to switch between LLMs.
    *   **Models:** Access to models from OpenAI, Anthropic, Fireworks, Together AI, and more.
    *   **Pricing:** No extra usage fees beyond the model provider's charges, plus a 5% fee on deposits.
    *   **Best for:** Routing traffic across multiple LLMs.
*   **Groq:**
    *   **What:** High-speed inference platform built on LPU (Logical Processing Unit) technology.
    *   **Why:** Fastest possible performance.
    *   **Models:** Llama, Mistral.
    *   **Pricing:** Token-based, geared towards enterprise use.
    *   **Best for:** High-performance inferencing with hardware optimization.
*   **Hugging Face:**
    *   **What:** Open-source platform for building, training, and deploying machine learning models.
    *   **Why:** Large model hub and support for various programming languages and cloud platforms.
    *   **Models:** Massive catalog of open-source models.
    *   **Pricing:** Pay-by-the-hour model for hosting on AWS or GCP.
    *   **Best for:** Getting started with Natural Language Processing (NLP) projects.
*   **Replicate:**
    *   **What:** Cloud-based service for running and managing ML models.
    *   **Why:** Quick deployment and fine-tuning without complex setup.
    *   **Models:** Large collection of open-source models.
    *   **Pricing:** Pay-as-you-go based on runtime.
    *   **Best for:** Rapid prototyping and experimenting with open-source or custom models.
*   **Perplexity AI:**
    *   **What:** Known for intelligent search and Q&A. Offers `pplx-api` for real-time data access.
    *   **Why:** Direct internet access for up-to-the-minute information.
    *   **Models:** Several Llama-based models with extended context lengths.
    *   **Pricing:** $5 per 1,000 requests, plus per-token costs ($0.20 - $5 per million tokens).
    *   **Best for:** AI-driven search and knowledge applications.
*   **Hyperbolic:**
    *   **What:** Provides AI inference services and affordable GPU compute.
    *   **Why:** Flexibility in choosing GPU power at lower costs.
    *   **Pricing:** Charges by GPU usage.
    *   **Best for:** Cost-effective GPU rental and API access.
*   **Databricks:**
    *   **What:** Unified analytics platform with its own LLM, DBRx.
    *   **Why:** Seamless integration with existing data pipelines and support for ML/AI projects.
    *   **Models:** Access to DBRx.
    *   **Pricing:** Varies based on workload, storage, and compute.
*   **Mistral:**
    *   **What:** French company specializing in open-source LLMs. Offers flexible deployment options.
    *   **Why:** Handles complex reasoning tasks, easy to deploy, and cost-effective.
    *   **Models:** Mistral Large 24.11, Pixtral Large, Mistral Small 24.09, Codestral, Ministral 8B & 3B, Mistral Embed, Mistral Moderation 24.11
    *   **Pricing:** Varies by model (input/output tokens).
    *   **Best for:** Specialized Models.
*   **DeepInfra:**
    *   **What:** Cloud-based hosting of large-scale AI models.
    *   **Why:** Easy to use, cloud-centric approach.
    *   **Pricing:** Usage-based, billed by token or at execution time.
    *   **Best for:** Cloud-based hosting of large-scale AI models.
*   **Anyscale:**
    *   **What:** Platform for scaling compute-intensive AI workloads.
    *   **Why:** Governance, admin, and billing controls, security and privacy features.
    *   **Pricing:** Usage-based, enterprise plans available.
    *   **Best for:** End-to-end AI development and deployment and applications requiring high scalability.
*   **Novita AI:**
    *   **What:** Cloud infrastructure platform providing Model APIs and dedicated GPU resources.
    *   **Why:** Up to 50% lower costs on model inference, globally distributed GPU network.
    *   **Pricing:** Usage-based, billed by token or by execution time.
    *   **Best for:** Low-cost, reliable AI model deployment with both serverless and dedicated GPU options.
*   **Cohere:**
    *   **What:** Provides access to Cohere's LLMs.
    *   **Limits:** 20 requests/minute, 1,000 requests/month
    *   **Models:** Models share a common quota. Command-A, Command-R7B, Command-R+, Command-R, Aya Expanse 8B, Aya Expanse 32B, Aya Vision 8B, Aya Vision 32B
*   **Amazon:**
    *   **Models:** Amazon Nova Micro, Amazon Nova Lite, and Amazon Nova Pro
    *   **Access:** API

**2.2 Free Providers**

*   **OpenRouter:**
    *   **Limits:** 20 requests/minute, 50 requests/day, 1000 requests/day with $10 lifetime top-up.
    *   **Models:** A wide variety of open-source models.
*   **Google AI Studio:**
    *   **Models:** Gemini series, Gemma series.
    *   **Note:** Data is used for training when used outside of the UK/CH/EEA/EU.
*   **NVIDIA NIM:**
    *   **Requirements:** Phone number verification required. Models tend to be context window limited.
    *   **Limits:** 40 requests/minute
    *   **Models:** Various open models.
*   **Mistral (La Plateforme):**
    *   **Requirements:** Free tier (Experiment plan) requires opting into data training, and phone number verification.
    *   **Limits (per-model):** 1 request/second, 500,000 tokens/minute, 1,000,000,000 tokens/month
    *   **Models:** Open and Proprietary Mistral models.
*   **Mistral (Codestral):**
    *   **Status:** Currently free to use. Monthly subscription based.
    *   **Requirements:** Phone number verification.
    *   **Limits:** 30 requests/minute, 2,000 requests/day
    *   **Models:** Codestral
*   **HuggingFace Inference Providers:**
    *   **Note:** HuggingFace Serverless Inference limited to models smaller than 10GB. Some popular models are supported even if they exceed 10GB.
    *   **Limits:** $0.10/month in credits
    *   **Models:** Various open models across supported providers
*   **Cerebras:**
    *   **Limits:** Free tier restricted to 8K context.
    *   **Models:** Llama series, Gemma series, DeepSeek series, Groq series, Mistral series, Qwen series, Whisper series.
*   **Together (Free):**
    *   **Limits:** Up to 60 requests/minute
    *   **Models:** Llama series, DeepSeek series.
*   **GitHub Models:**
    *   **Limits:** Dependent on Copilot subscription tier (Free/Pro/Pro+/Business/Enterprise)
    *   **Models:** A wide variety of open-source and proprietary models.
*   **RunPod:**
    *   **Description:** Distributed, decentralized crypto-based compute. Data is sent to individual hosts.
    *   **Models:** A wide variety of open-source models.
*   **Cloudflare Workers AI:**
    *   **Limits:** 10,000 neurons/day
    *   **Models:** A wide variety of open-source models.
*   **Google Cloud Vertex AI:**
    *   **Requirements:** Very stringent payment verification for Google Cloud.
    *   **Models:** Gemini series, Llama series.

**2.3 Providers with Trial Credits**

*   **Together:** $1 credit when you add a payment method.
*   **Fireworks:** $1 credit.
*   **Baseten:** $5 credit when you add a payment method.
*   **Nebius AI:** $30 credit.
*   **Novita AI:** $0.5 for 1 year, $10 for 3 months for LLMs with referral code + GitHub account connection.
*   **Scale AI:** $10 for 3 months.
*   **Solar AI:** $10 for 3 months.
*   **NLP Cloud:** $15 credit, requires phone number verification.
*   **Alibaba Cloud (International) Model Studio:** 1 million tokens/model.
*   **Inference.net:** $1 credit, $25 on responding to email survey.
*   **Replicate:** $1 credit.
*   **nCompass:** $5 credit.
*   **Hyperbolic:** $1 credit.
*   **SambaNova Cloud:** $5 for 3 months.
*   **Scaleway Generative APIs:** 1,000,000 free tokens.

**3. Key Considerations for Choosing an LLM API Provider**

*   **Project Requirements:** Define your project's specific needs, including speed, cost, model variety, real-time data access, and security requirements.
*   **Model Variety:** Consider the range of models offered by each provider and whether they align with your use case.
*   **Pricing Structure:** Understand the pricing models (pay-as-you-go, subscription-based, token-based) and potential hidden costs (deposit fees, data storage).
*   **Performance:** Evaluate the performance of different providers based on metrics like latency, throughput, and accuracy.
*   **Ease of Use:** Assess the ease of integration and the availability of documentation and support.
*   **Scalability:** Ensure the provider can handle your application's scaling needs.
*   **Data Privacy and Security:** Consider the provider's data privacy policies and security measures, especially if you are dealing with sensitive data.

**4. LLM Ops APIs**

LLM Ops APIs are essential for businesses looking to integrate and manage LLMs effectively. They streamline development, provide monitoring and analytics, and offer tools for various aspects of the LLM lifecycle.

*   **Monster API:** Single, cost-efficient API abstraction over multiple Generative AI models.
*   **Pulze AI:** Single API that routes between different LLM providers for optimization.
*   **Context.ai:** Focuses on performance monitoring.
*   **Carbon:** Primarily focused on ingesting external data into LLM ecosystems.
*   **Humanloop API:** Offers a variety of LLM Ops tools, including evaluation and monitoring, fine-tuning, and optimization.
*   **Langfuse API:** Offers tools for managing the entire lifecycle of LLM applications: prompt management, tracing, evaluation, and metrics.
*   **Graphlit:** Primary offering is a data ingestion tool.
*   **DataRobot:** Comprehensive platform for building and operating LLM applications.
*   **OpenPipe:** YC startup focused on training, deployment, fine-tuning, and evaluation of LLM models.
*   **Autoblocks:** Platform focused on LLM evaluation: monitoring, testing, debugging, analytics, and post-deployment tools.

**5. Model Comparison and Recommendations**

*   **Top Models:** GPT 4o, Gemini 1.5 Pro, Claude 3.5 Sonnet.
*   **Cost-Effective Alternatives:** Llama 3, DeepSeekV2.
*   **General Chat/Instruction Following:** Llama 3.x Instruct, Mistral 7B Instruct, Mixtral 8x7B, Gemma 2/3 Instruct, Qwen 2.5 Instruct
*   **Coding:** DeepSeek Coder, Qwen2.5 Coder, Llama 4 Scout/Maverick, Codestral
*   **Multimodal (Text + Image):** Llama 3.2 Vision Instruct, Qwen 2.5 VL Instruct series, Phi-3.5 Vision, Aya Vision
*   **Long Context Processing:** Phi-3 128k variants
*   **High Inference Speed:** Groq
*   **Maximum Power (via Free Tiers/Previews):** Llama 3.3 70B, Llama 3.1 405B (trials), Qwen 2.5 72B
*   **Efficiency/Resource Constraints:** Smaller models (Llama 3.2, Phi-3 Mini, Gemma 3), quantized models (AWQ/FP8)

**6. Conclusion**

The LLM API landscape is dynamic and diverse, offering a wide range of options for developers and businesses. By carefully considering project requirements, evaluating provider features, and understanding pricing structures, users can make informed decisions and leverage the power of LLMs to build innovative and impactful applications.

**7. URL Links**

*   Together AI:
    *   Models: <https://www.together.ai/models>
    *   Pricing: <https://www.together.ai/pricing>
    *   Website: <https://together.ai>
*   Groq:
    *   Pricing: <https://groq.com/pricing/>
*   Hugging Face:
    *   Pricing: <https://huggingface.co/pricing#endpoints>
    *   Inference Providers: <https://huggingface.co/docs/inference-providers/en/index>
*   Replicate:
    *   Pricing: <https://replicate.com/pricing>
    *   Website: <https://replicate.com/>
*   Hyperbolic:
    *   Pricing: <https://hyperbolic.xyz/pricing>
    *   Website: <https://app.hyperbolic.xyz/>
*   Mistral (La Plateforme): <https://console.mistral.ai/>
*   Mistral (Codestral): <https://codestral.mistral.ai/>
*   Cerebras: <https://cloud.cerebras.ai/>
*   Cohere: <https://cohere.com/>
*   GitHub Models: <https://github.com/marketplace/models>
*   RunPod: <https://www.runpod.io/>
*   Cloudflare Workers AI: <https://developers.cloudflare.com/workers-ai>
*   Google Cloud Vertex AI: <https://console.cloud.google.com/vertex-ai/model-garden>
*   Fireworks: <https://fireworks.ai/>
*   Baseten: <https://www.baseten.co/>, <https://www.baseten.co/library/>
*   Nebius AI: <https://studio.nebius.ai/models>
*   Novita AI: <https://novita.ai/referral?invited_code=E5R0CA&ref=ytblmjc&utm_source=affiliate>
*   Scale AI: <https://scale.com/>
*   Solar AI: <https://solar.ai/>
*   NLP Cloud: <https://nlpcloud.com/home>
*   Alibaba Cloud (International) Model Studio: <https://bailian.console.alibabacloud.com/>
*   Inference.net: <https://inference.net>
*   nCompass: <https://ncompass.tech>
*   SambaNova Cloud: <https://cloud.sambanova.ai/>
*   Scaleway Generative APIs: <https://console.scaleway.com/generative-api/models>
*   LLM APIs: Price Comparison by Model - r/LLMDevs: <https://medium.com/@Experto_AI/llm-apis-price-comparison-by-model-66d1c7bd259d?sk=99f3ad1216aa77ab00aa17a154cf1efb>
