In [3]:
import pandas as pd
import os
from dotenv import load_dotenv

texts_df = pd.read_csv('sample_openai_nlp_raw_inputs.csv')

translation_prompt = "You are a translator. Translate this text from Chinese to English. Do not add anything else."

In [11]:
url_id = texts_df['id'][:100].tolist()
url_id_df = pd.DataFrame(url_id, columns=["url_id"])
url_id_df.to_csv("url_ids.csv", index=False)

### Standard async functions for most LLMs

In [6]:
import asyncio
import time
import random

#manually making async function as bilbystats function doesnt track tokens

async def translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_llm_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    input_price,             #input price per 1m token
    output_price             #output price per 1m token
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []

    # Pick n unique random indices
    row_numbers = texts_df.index[:n].tolist()
    chinese_texts = [texts_df.loc[idx, "body"] for idx in row_numbers]

    # Launch all translation tasks at once
    tasks = [call_llm_async(text) for text in chinese_texts]
    responses = await asyncio.gather(*tasks, return_exceptions=True)

    for i, response in zip(row_numbers, responses):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = extract_input_tokens(response)
            output_tokens = extract_output_tokens(response)
            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)
            
        except Exception as e:
            print(f"Skipping row {i}: {e}")
            

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6)*(input_price*av_in_token_per_doc + output_price*av_out_token_per_doc)
        
        print(f"Mean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}") #6 d.p. just to be more accurate
    else:
        print("No valid results to calculate mean tokens.")


### Async function with batch calling (for APIs with limit rates)

In [18]:
async def translate_price_async_batch(
    n,
    texts_df,
    call_llm_async,
    extract_translation,
    extract_input_tokens,
    extract_output_tokens,
    input_price,
    output_price,
    batch_size=10,  
    batch_delay=60  # Seconds between batches
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []

    row_numbers = texts_df.index[:n].tolist()
    # Validate and clean input texts
    chinese_texts = [
        texts_df.loc[idx, "body"] if pd.notna(texts_df.loc[idx, "body"]) and isinstance(texts_df.loc[idx, "body"], str) else ""
        for idx in row_numbers
    ]
    full_prompts = [f"{translation_prompt}\n{text}" if text else "" for text in chinese_texts]

    # Process in batches to respect rate limits
    responses = []
    for i in range(0, len(full_prompts), batch_size):
        batch_prompts = full_prompts[i:i + batch_size]
        batch_indices = row_numbers[i:i + batch_size]

    # One LLM call per prompt
        tasks = [call_llm_async(prompt) for prompt in batch_prompts]
        batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
        responses.extend(batch_responses)  # <--- collect all batch responses

    # Wait before next batch, if needed
        if i + batch_size < len(full_prompts):
            await asyncio.sleep(batch_delay)

    for i, response, full_prompt in zip(row_numbers, responses, full_prompts):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = extract_input_tokens(response)
            output_tokens = extract_output_tokens(response)

            if output_tokens == 0:
                raise ValueError("No output tokens generated, possible incomplete translation")

            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)

        except Exception as e:
            print(f"Skipping row {i}: {e}")

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6) * (input_price * av_in_token_per_doc + output_price * av_out_token_per_doc)
        
        print(f"\nMean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}")
    else:
        print("No valid results to calculate mean tokens.")

### 100 Runs Chat GPT-4.1-mini

In [12]:
import openai
from openai import AsyncOpenAI

load_dotenv()
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_openai_async(prompt, model="gpt-4.1-mini"):
    response = await client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a translator. Translate from Chinese to English."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2500
    )
    return response

def extract_translation(response):
    return response.choices[0].message.content

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_openai_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.4,
    1.6    
))


Row: 0
Input tokens: 370, Output tokens: 383, Total: 753
Recently, a reporter learned from the Jilin Provincial Department of Transportation that while consolidating the achievements of clearing COVID-19 cases in the community, Jilin is orderly restoring production and living order. As of now, all seven ongoing expressway construction projects this year have fully resumed work and production.

Jilin Transportation has formulated and issued the "Work Plan for Resuming Construction and Production of Provincial Highway Projects" and the "Epidemic Prevention and Control Manual and Flowchart." A special leadership task force has been established to provide comprehensive, full-process, and full-chain guidance for the resumption of highway projects. Daily coordination is maintained regarding epidemic prevention and control and the resumption of construction and production. Combined with the provincial department’s task force and leadership accountability mechanisms, problems are promptly add

### 100 Runs Chat GPT-4.1

In [13]:
from openai import AsyncOpenAI

load_dotenv()
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_openai_async(prompt, model="gpt-4.1"):
    response = await client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a translator. Translate from Chinese to English."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2500
    )
    return response

def extract_translation(response):
    return response.choices[0].message.content

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_openai_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    2,
    8    
))


Row: 0
Input tokens: 370, Output tokens: 379, Total: 749
Recently, the Jilin Provincial Department of Transportation announced that while consolidating the achievement of zero COVID-19 cases in the general population, Jilin has been orderly restoring normal production and daily life. As of now, all seven ongoing expressway construction projects in the province have fully resumed work and production this year.

To guide the resumption of highway projects, Jilin’s transportation authorities have formulated and issued the “Provincial Highway Construction Project Work Resumption Plan” and the “COVID-19 Prevention and Control Handbook and Workflow Chart.” A special leading group has also been established to provide comprehensive, full-process, all-chain guidance for project resumption. They conduct daily scheduling of epidemic prevention and control along with work resumption progress, utilizing the provincial work resumption team and a leading-official accountability mechanism to solve pr

### Claude Sonnet 4 100 Runs

In [14]:
from anthropic import AsyncClient
load_dotenv()
client = AsyncClient(api_key=os.getenv("ANTHROPIC_API_KEY"))

async def call_claude_async(prompt, model="claude-sonnet-4-20250514", max_tokens=2500):
    response_claude = await client.messages.create(
        model=model,
        max_tokens=max_tokens,
        system=translation_prompt,
        messages=[{"role": "user", "content": prompt}]
    )
    return response_claude


def extract_translation(response):
    return response.content[0].text

def extract_input_tokens(response):
    return response.usage.input_tokens

def extract_output_tokens(response):
    return response.usage.output_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_claude_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    3,
    15    
))


Row: 0
Input tokens: 531, Output tokens: 404, Total: 935
Reporters recently learned from the Jilin Provincial Department of Transportation that while consolidating the results of clearing community transmission, Jilin is orderly restoring production and daily life. As of now, all 7 continuing highway construction projects this year have fully resumed work and production.

Jilin's transportation department has formulated and issued the "Provincial Highway Construction Project Work and Production Resumption Plan" and "Epidemic Prevention and Control Work Manual and Flow Chart," while simultaneously establishing a working leadership team to provide comprehensive guidance for highway project resumption throughout the entire process, in all aspects, and across the entire chain. They maintain daily monitoring of epidemic prevention and control and work resumption situations, and combined with the provincial department's work resumption team and department-level leadership responsibility mec

### 100 Runs Mistral Small

In [None]:
import mistralai
from mistralai import Mistral
from tenacity import retry, stop_after_attempt, wait_exponential

import os
import asyncio

load_dotenv()

mistral_api_key = os.getenv("MISTRAL_API_KEY")

client = Mistral(api_key=mistral_api_key)


async def call_mistral_async(prompt, model="mistral-small-latest", max_tokens=2500):
    response = await client.chat.complete_async(
        model=model,
        messages=[
            {"role": "user", "content": prompt},
            #{"role": "system", "content": translation_prompt}
            ],
        max_tokens=max_tokens
    )

    return response


def extract_translation(response):
    return response.choices[0].message.content

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens


n = 100

#have to use batch due to the 500,000 tokens limit rate
await (translate_price_async_batch(
    n,
    texts_df,                # Pass your dataframe here!
    call_mistral_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.05,
    0.1,
    batch_size=34,          
    batch_delay=60           # processing it 34 batches per minute
))



Row: 0
Input tokens: 494, Output tokens: 364, Total: 858
Journalists recently learned from the Jilin Provincial Department of Transportation that while consolidating the results of achieving zero community transmission, Jilin is restoring production and living order in an orderly manner. As of now, all seven highway projects under construction this year have resumed work. The Jilin Transportation Department has formulated and issued the *Work Plan for Resuming Work and Production of Highway Construction Projects Across the Province* and the *Handbook and Process Diagram for Epidemic Prevention and Control Work*, while simultaneously establishing a working leadership team to guide the resumption of work and production of highway projects throughout the entire process, in all aspects, and across the entire chain. The department insists on daily coordination of epidemic prevention and control and work resumption, combining the provincial-level work resumption team and the leadership resp

### 100 Runs Google Gemini 2.5 Flash

NOTE: VPN is required for this one

In [26]:
import pandas as pd
import os
from dotenv import load_dotenv
import asyncio
import time
import random
from cohere import AsyncClient
from google.generativeai import GenerativeModel
import google.generativeai as genai
from tenacity import retry, stop_after_attempt, wait_exponential

load_dotenv()

# Gemini client
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini_client = GenerativeModel("gemini-2.5-flash")

# Gemini API functions
#@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60))
async def call_gemini_async(prompt, model="gemini-2.5-flash"):
    full_prompt = f"{translation_prompt}\n{prompt}"
    response = await gemini_client.generate_content_async(
        contents=full_prompt,
        generation_config={"max_output_tokens": 2500}
    )
    if not hasattr(response, 'text') or not response.text:
        raise ValueError("Empty or invalid response from Gemini")
    return response

def extract_gemini_translation(response):
    return response.text

def extract_gemini_input_tokens(response):
    return response.usage_metadata.prompt_token_count

def extract_gemini_output_tokens(response):
    return response.usage_metadata.candidates_token_count

n = 100
input_price = 0.30  
output_price = 2.50  

await (translate_price_async_batch(
    n=n,
    texts_df=texts_df,
    call_llm_async=call_gemini_async,
    extract_translation=extract_gemini_translation,
    extract_input_tokens=extract_gemini_input_tokens,
    extract_output_tokens=extract_gemini_output_tokens,
    input_price=input_price,
    output_price=output_price,
    batch_size=14,  # 14 rows per batch (1 call per row = 14 calls < 15 QPM)
    batch_delay=60  # a batch of 14 runs every 60 secs, lower than the 15 QPM.
))



Row: 0
Input tokens: 363, Output tokens: 372, Total: 735
Reporters recently learned from the Jilin Provincial Department of Transportation that Jilin is consolidating the achievements of clearing community infections while orderly restoring production and daily life. As of now, all 7 ongoing highway projects for this year have fully resumed work and production. Jilin Transportation has formulated and issued the "Work Plan for Resumption of Work and Production of Provincial Highway Construction Projects" and the "Epidemic Prevention and Control Work Manual and Flowchart," and simultaneously established a special working leadership team to guide the resumption of work and production for highway projects throughout the entire process, in all aspects, and across the entire chain; it insists on daily scheduling of epidemic prevention and control and work resumption situations, and combines with the provincial department's special work resumption team and department-level leader responsibil

### Cohere specific function

In [30]:
import asyncio
import time
import random
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60))
async def translate_price_async_cohere(
    n,
    texts_df,                # Pass your dataframe here!
    call_llm_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    input_price,             #input price per 1m token
    output_price,             #input price per 1m token
    batch_size,
    batch_delay
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []
    model = "c4ai-aya-expanse-32b"

    # Pick n unique random indices
    row_numbers = texts_df.index[:n].tolist()
    # Validate and clean input texts
    chinese_texts = [
        texts_df.loc[idx, "body"] if pd.notna(texts_df.loc[idx, "body"]) and isinstance(texts_df.loc[idx, "body"], str) else ""
        for idx in row_numbers
    ]
    full_prompts = [f"{translation_prompt}\n{text}" if text else "" for text in chinese_texts]

    # Process in batches to respect rate limits
    responses = []
    for i in range(0, len(full_prompts), batch_size):
        batch_prompts = full_prompts[i:i + batch_size]
        batch_indices = row_numbers[i:i + batch_size]

    # One LLM call per prompt
        tasks = [call_llm_async(prompt) for prompt in batch_prompts]
        batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
        responses.extend(batch_responses)  # <--- collect all batch responses

    # Wait before next batch, if needed
        if i + batch_size < len(full_prompts):
            await asyncio.sleep(batch_delay)

    for i, response, full_prompt in zip(row_numbers, responses, full_prompts):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = await extract_input_tokens(full_prompt, model, client)
            output_tokens = await extract_output_tokens(translation, model, client)
            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)

        except Exception as e:
            print(f"Skipping row {i}: {e}")

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6)*(input_price*av_in_token_per_doc + output_price*av_out_token_per_doc)
        
        print(f"Mean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}")
    else:
        print("No valid results to calculate mean tokens.")

Cohere was acting up, so adding a simple retries function with tenacity was required, as well as an 'exponential wait' function to reduce API call limits from being reached. Note that async functions are required for input and output tokens as they are not 'built in' to the API call like the usage function with ChatGPT, Claude, Mistral etc. Instead they require a seperate call, so overall 3 calls per text requires (1 translation, 1 input tokens, 1 output tokens).

### 100 Runs Cohere Aya Expanse

In [32]:
#%pip install cohere
import asyncio
import os
import cohere
from cohere import AsyncClient
from tenacity import retry, stop_after_attempt, wait_exponential

load_dotenv()
cohere_api_key = os.getenv("COHERE_API_KEY")

client = AsyncClient(cohere_api_key)

async def call_cohere_async(prompt, model="c4ai-aya-expanse-32b"):
    response = await client.chat(
        model=model,
        message=prompt,
        preamble=translation_prompt,
        temperature=0.0,
        max_tokens=5000
    )
    
    return response

def extract_translation(response):
    return response.text

async def extract_input_tokens(input_text, model, client): #async def as it requires an API call to get the token count, not inbedded into the API call like with Claude, Mistral and ChatGPT.
    response = await client.tokenize(text=input_text, model=model)
    return len(response.tokens)

async def extract_output_tokens(output_text, model, client):
    response = await client.tokenize(text=output_text, model=model)
    return len(response.tokens)

n = 100

await (translate_price_async_cohere(
    n,
    texts_df,                # Pass your dataframe here!
    call_cohere_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.5,
    1.5,
    batch_size=34,
    batch_delay=60 
))


Row: 0
Input tokens: 344, Output tokens: 425, Total: 769
Journalists recently learned from the Jilin Provincial Department of Transport that Jilin, while consolidating the results of clearing the social surface, is restoring production and life order in an orderly manner. So far, all seven continuing highway projects this year have resumed construction and production.

Jilin Transport has formulated and issued the "Work Plan for the Resumption of Construction and Production of Highway Construction Projects in the Province" and the "Epidemic Prevention and Control Handbook and Flow Chart", and established a dedicated leadership team for the work, providing all-process, all-aspect, and all-chain guidance on the resumption of highway projects; it insists on daily coordination of epidemic prevention and control and resumption of work and production situations, combining the provincial department's resumption of work and production special team and the hall-level leadership guarantee mecha

### Character count for APIs (DeepL, Microsoft Azure, Google Cloud Translate)

In [None]:
chinese_texts_char = []

for i in range(0,len(texts_df)):
    text_string = texts_df.loc[i, "body"]
    chinese_texts_char.append(len(text_string))

av_char_per_doc = sum(chinese_texts_char) / len(chinese_texts_char)

print(av_char_per_doc)

1127.67
