In [None]:
import pandas as pd
import os
from dotenv import load_dotenv

texts_df = pd.read_csv('sample_openai_nlp_raw_inputs.csv')

translation_prompt = "You are a translator. Translate this text from Chinese to English. Do not add anything else."

In [None]:
import asyncio
import time
import random

#manually making async function as bilbystats function doesnt track tokens

async def translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_llm_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    input_price,             #input price per 1m token
    output_price             #output price per 1m token
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []

    # Pick n unique random indices
    row_numbers = random.sample(texts_df.index.tolist(), n)
    chinese_texts = [texts_df.loc[idx, "body"] for idx in row_numbers]

    # Launch all translation tasks at once
    tasks = [call_llm_async(text) for text in chinese_texts]
    responses = await asyncio.gather(*tasks, return_exceptions=True)

    for i, response in zip(row_numbers, responses):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = extract_input_tokens(response)
            output_tokens = extract_output_tokens(response)
            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)

        except Exception as e:
            print(f"Skipping row {i}: {e}")

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6)*(input_price*av_in_token_per_doc + output_price*av_out_token_per_doc)
        
        print(f"Mean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}") #6 d.p. just to be more accurate
    else:
        print("No valid results to calculate mean tokens.")


### 100 Runs Chat GPT-4.1-mini

In [None]:
import openai
from openai import AsyncOpenAI

load_dotenv()
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_openai_async(prompt, model="gpt-4.1-mini"):
    response = await client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a translator. Translate from Chinese to English."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2500
    )
    return response

def extract_translation(response):
    return response.choices[0].message.content

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_openai_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.4,
    1.6    
))

NameError: name 'load_dotenv' is not defined

### 100 Runs Chat GPT-4.1

In [None]:
from openai import AsyncOpenAI

load_dotenv()
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_openai_async(prompt, model="gpt-4.1"):
    response = await client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a translator. Translate from Chinese to English."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2500
    )
    return response

def extract_translation(response):
    return response.choices[0].message.content

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_openai_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    2,
    8    
))


Row: 250
Input tokens: 812, Output tokens: 715, Total: 1527
He Yong

With the box office success of "Ne Zha: Birth of the Demon Child," a number of alleged copyright infringement incidents have surfaced. Several lawyers, in interviews with the media, have stated that producing or selling dolls, stationery, and other items featuring Ne Zha’s image without permission, as well as using movie elements in store decorations and advertisements without authorization, all constitute infringement.

In today’s era of high traffic, riding the wave of hot trends has become a common marketing tactic. Businesses leverage the influence of trending topics and popular film and television works to attract consumer attention and thereby increase their brand awareness and product sales. Right now, "Ne Zha: Birth of the Demon Child" is a genuine megahit, becoming a major IP. Some cultural and tourism departments, businesses, and online celebrities are capitalizing on the "Ne Zha" craze, using Ne Zha’s imag

### Claude Sonnet 4 100 Runs

In [11]:
from anthropic import AsyncClient
load_dotenv()
client = AsyncClient(api_key=os.getenv("ANTHROPIC_API_KEY"))

async def call_claude_async(prompt, model="claude-sonnet-4-20250514", max_tokens=2500):
    response_claude = await client.messages.create(
        model=model,
        max_tokens=max_tokens,
        system=translation_prompt,
        messages=[{"role": "user", "content": prompt}]
    )
    return response_claude


def extract_translation(response):
    return response.content[0].text

def extract_input_tokens(response):
    return response.usage.input_tokens

def extract_output_tokens(response):
    return response.usage.output_tokens

n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_claude_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    3,
    15    
))


Row: 671
Input tokens: 460, Output tokens: 383, Total: 843
Beijing, June 9 (Reporter Wang Hao) - Reporters learned from the Ministry of Water Resources: Since the comprehensive implementation of the river chief and lake chief system, localities have established 1.2 million township and village-level river chiefs, lake chiefs, river patrollers, and river protectors, giving rise to a large number of civilian river and lake chiefs and river-lake management and protection volunteers such as "women river chiefs," "entrepreneur river chiefs," and "young river guardians."

Grassroots river and lake chiefs, social volunteers, and others have become important forces in safeguarding rivers and lakes, playing a crucial role in promoting the transformation of the river chief and lake chief system from "nominal" to "substantive" and strengthening river and lake management and protection.

To further enhance society's sense of responsibility and participation in river and lake protection, the "Sear

### 100 Runs Mistral Small

In [None]:
import mistralai
from mistralai import Mistral

import os
import asyncio

load_dotenv()

mistral_api_key = os.getenv("MISTRAL_API_KEY")

client = Mistral(api_key=mistral_api_key)

async def call_mistral_async(prompt, model="mistral-small-latest", max_tokens=2500):
    response = await client.chat.complete_async(
        model=model,
        messages=[
            {"role": "user", "content": prompt},
            {"role": "system", "content": translation_prompt}
            ],
        max_tokens=max_tokens
    )
    return response

def extract_translation(response):
    return response.text

def extract_input_tokens(response):
    return response.usage.prompt_tokens

def extract_output_tokens(response):
    return response.usage.completion_tokens


n = 100

await (translate_price_async(
    n,
    texts_df,                # Pass your dataframe here!
    call_mistral_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.05,
    0.1    
))



Row: 29
Input tokens: 170, Output tokens: 133, Total: 303
Good evening, my friend! Thanks for reading. If you pass by, please give a thumbs up and follow me so I won’t get lost! On the 21st, the China Coal Index hit a new low every day. It opened lower in the morning and kept declining step by step without looking back. There’s no momentum or capital support—even the big players are struggling. On the market, 2 stocks rose, 2 remained flat, and the rest all fell. As a netizen put it, it’s like we’ve signed a sellout agreement. For the video version and more great articles, check out 《...》.

Row: 922
Input tokens: 764, Output tokens: 695, Total: 1459
**Hubei Daily News** (Reporters Liu Sheng, Correspondent Su Qiong) In April 2023, the Ministry of Natural Resources reported 67 major typical illegal and regulatory issues found during the 2022 land protection inspection. On August 28, the Wuhan Office of the State Natural Resources Supervision publicly announced the rectification results 

### Gemini specific function

In [None]:
# Defining a new async function which is slightly more complex (to include batch calling due to Gemini's 15QPM limit)

async def translate_price_async(
    n,
    texts_df,
    call_llm_async,
    extract_translation,
    extract_input_tokens,
    extract_output_tokens,
    input_price,
    output_price,
    translation_prompt,
    model,
    client=None,  
    batch_size=10,  
    batch_delay=60  # Seconds between batches
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []

    row_numbers = random.sample(texts_df.index.tolist(), min(n, len(texts_df)))
    # Validate and clean input texts
    chinese_texts = [
        texts_df.loc[idx, "body"] if pd.notna(texts_df.loc[idx, "body"]) and isinstance(texts_df.loc[idx, "body"], str) else ""
        for idx in row_numbers
    ]
    full_prompts = [f"{translation_prompt}\n{text}" if text else "" for text in chinese_texts]

    # Process in batches to respect rate limits
    responses = []
    for i in range(0, len(chinese_texts), batch_size):
        batch_texts = chinese_texts[i:i + batch_size]
        batch_prompts = full_prompts[i:i + batch_size]
        batch_indices = row_numbers[i:i + batch_size]

        # Skip empty texts in batch
        tasks = [
            call_llm_async(text, translation_prompt, model)
            for text in batch_texts if text
        ]
        if not tasks:
            responses.extend([Exception("Empty input text")] * len(batch_texts))
            continue

        batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
        # Pad responses if some texts were skipped
        padded_responses = []
        task_idx = 0
        for text in batch_texts:
            if text:
                padded_responses.append(batch_responses[task_idx])
                task_idx += 1
            else:
                padded_responses.append(Exception("Empty input text"))
        responses.extend(padded_responses)

        # Wait before next batch
        if i + batch_size < len(chinese_texts):
            await asyncio.sleep(batch_delay)

    for i, response, full_prompt in zip(row_numbers, responses, full_prompts):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = extract_input_tokens(response)
            output_tokens = extract_output_tokens(response)

            if output_tokens == 0:
                raise ValueError("No output tokens generated, possible incomplete translation")

            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)

        except Exception as e:
            print(f"Skipping row {i}: {e}")

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6) * (input_price * av_in_token_per_doc + output_price * av_out_token_per_doc)
        
        print(f"\nMean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}")
    else:
        print("No valid results to calculate mean tokens.")


### 100 Runs Google Gemini 2.5 Flash

In [None]:
import pandas as pd
import os
from dotenv import load_dotenv
import asyncio
import time
import random
from cohere import AsyncClient
from google.generativeai import GenerativeModel
import google.generativeai as genai
from tenacity import retry, stop_after_attempt, wait_exponential

load_dotenv()

# Gemini client
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini_client = GenerativeModel("gemini-2.5-flash")

# Gemini API functions
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60))
async def call_gemini_async(prompt, translation_prompt, model="gemini-2.5-flash"):
    full_prompt = f"{translation_prompt}\n{prompt}"
    response = await gemini_client.generate_content_async(
        contents=full_prompt,
        generation_config={"max_output_tokens": 2500}
    )
    if not hasattr(response, 'text') or not response.text:
        raise ValueError("Empty or invalid response from Gemini")
    return response

def extract_gemini_translation(response):
    return response.text

def extract_gemini_input_tokens(response):
    return response.usage_metadata.prompt_token_count

def extract_gemini_output_tokens(response):
    return response.usage_metadata.candidates_token_count

n = 100
input_price = 0.30  
output_price = 2.50  

await (translate_price_async(
    n=n,
    texts_df=texts_df,
    call_llm_async=call_gemini_async,
    extract_translation=extract_gemini_translation,
    extract_input_tokens=extract_gemini_input_tokens,
    extract_output_tokens=extract_gemini_output_tokens,
    input_price=input_price,
    output_price=output_price,
    translation_prompt=translation_prompt,
    model="gemini-2.5-flash",
    batch_size=14,  # 14 rows per batch (1 call per row = 14 calls < 15 QPM)
    batch_delay=60  # a batch of 14 runs every 60 secs, lower than the 15 QPM.
))



Row: 420
Input tokens: 1053, Output tokens: 1, Total: 1054
On

Row: 133
Input tokens: 637, Output tokens: 6, Total: 643
79 years ago today,

Row: 240
Input tokens: 196, Output tokens: 212, Total: 408
Recently, Dr. Liu Haiyan, a member of "Dr. Yao Zhijian's Team Workstation" at Nanjing Brain Hospital, and Teacher Chen Xiujuan, a member of "Dr. Ke Xiaoyan's Team Workstation," conducted a series of professional activities at Ganzhou Third People's Hospital, including expert consultations, ward rounds, discussions of difficult cases, individual case guidance, and knowledge training. This is one of the regular monthly expert professional activities conducted by the hospital after it signed agreements with Nanjing Brain Hospital to establish two major doctoral workstations.

In recent years, the hospital has continuously introduced high-quality resources, strengthened cooperation with nationally renowned psychiatric hospitals through medical alliances, gathered more excellent experts, and e

Some ValueErrors, causing rows to be skipped. The rows that were loaded in seem to be reasonable with token counts in and out. Since I am just finding averages, missing a few rows is fine.

### Cohere specific function

In [None]:
import asyncio
import time
import random
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60))
async def translate_price_async_cohere(
    n,
    texts_df,                # Pass your dataframe here!
    call_llm_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    input_price,             #input price per 1m token
    output_price,             #input price per 1m token
):
    start_time = time.time()
    input_token_count = []
    output_token_count = []
    total_token_count = []
    model = "c4ai-aya-expanse-32b"

    # Pick n unique random indices
    row_numbers = random.sample(texts_df.index.tolist(), n)
    chinese_texts = [texts_df.loc[idx, "body"] for idx in row_numbers]
    full_prompts = [f"{translation_prompt}\n{text}" for text in chinese_texts] #defines the full prompt for token counting (required for cohere)

    # Launch all translation tasks at once
    tasks = [call_llm_async(text) for text in chinese_texts]
    responses = await asyncio.gather(*tasks, return_exceptions=True)

    for i, response, full_prompt in zip(row_numbers, responses, full_prompts):
        try:
            if isinstance(response, Exception):
                raise response
            translation = extract_translation(response)
            input_tokens = await extract_input_tokens(full_prompt, model, client)
            output_tokens = await extract_output_tokens(translation, model, client)
            total_tokens = input_tokens + output_tokens

            input_token_count.append(input_tokens)
            output_token_count.append(output_tokens)
            total_token_count.append(total_tokens)

            print(f"\nRow: {i}")
            print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}, Total: {total_tokens}")
            print(translation)

        except Exception as e:
            print(f"Skipping row {i}: {e}")

    if total_token_count:
        end_time = time.time()
        runtime = (end_time - start_time)
        
        av_in_token_per_doc = sum(input_token_count) / len(input_token_count)
        av_out_token_per_doc = sum(output_token_count) / len(output_token_count)
        av_token_per_doc = sum(total_token_count) / len(total_token_count)

        av_doc_price = (10e-6)*(input_price*av_in_token_per_doc + output_price*av_out_token_per_doc)
        
        print(f"Mean input Tokens per document: {av_in_token_per_doc:.2f} tokens")
        print(f"Mean output Tokens per document: {av_out_token_per_doc:.2f} tokens")
        print(f"Mean tokens per document: {av_token_per_doc:.2f} tokens")
        print(f"Runtime: {runtime:.1f} seconds")
        print(f"Average price per document: ${av_doc_price:.6f}")
    else:
        print("No valid results to calculate mean tokens.")

Cohere was acting up, so adding a simple retries function with tenacity was required, as well as an 'exponential wait' function to reduce API call limits from being reached. Note that async functions are required for input and output tokens as they are not 'built in' to the API call like the usage function with ChatGPT, Claude, Mistral etc. Instead they require a seperate call, so overall 3 calls per text requires (1 translation, 1 input tokens, 1 output tokens).

### 100 Runs Cohere Aya Expanse

In [40]:
#%pip install cohere
import asyncio
import os
import cohere
from cohere import AsyncClient
from tenacity import retry, stop_after_attempt, wait_exponential

load_dotenv()
cohere_api_key = os.getenv("COHERE_API_KEY")

client = AsyncClient(cohere_api_key)

async def call_cohere_async(prompt, model="c4ai-aya-expanse-32b"):
    response = await client.chat(
        model=model,
        message=prompt,
        preamble=translation_prompt,
        temperature=0.0,
        max_tokens=5000
    )
    
    return response

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60))
def extract_translation(response):
    return response.text

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60))
async def extract_input_tokens(input_text, model, client): #async def as it requires an API call to get the token count, not inbedded into the API call like with Claude, Mistral and ChatGPT.
    response = await client.tokenize(text=input_text, model=model)
    return len(response.tokens)

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60))
async def extract_output_tokens(output_text, model, client):
    response = await client.tokenize(text=output_text, model=model)
    return len(response.tokens)

n = 100

await (translate_price_async_cohere(
    n,
    texts_df,                # Pass your dataframe here!
    call_cohere_async,          # async function to call the model
    extract_translation,     # function: response -> translation
    extract_input_tokens,    # function: response -> int
    extract_output_tokens,   # function: response -> int
    0.5,
    1.5, 
))


Row: 869
Input tokens: 369, Output tokens: 437, Total: 806
State Council Premier Li Qiang recently signed a State Council order to promulgate the "State Council Provisions on Regulating the Services Provided by Intermediary Institutions for Companies' Public Issuance of Stocks" (hereinafter referred to as the "Provisions"), to be implemented from February 15, 2025. The aim of the Provisions is to regulate the conduct of intermediary institutions in providing services for the public issuance of stocks by companies, improve the quality of listed companies, protect the legitimate rights and interests of investors, and promote the healthy and stable development of the capital market. The Provisions consist of 19 articles, with the following key content:

First, it clarifies the professional standards for intermediary institutions. Intermediary institutions are required to adhere to the principles of honesty, diligence, and objectivity, and are prohibited from engaging in illegal or irregu

### Character count for APIs (DeepL, Microsoft Azure, Google Cloud Translate)

In [None]:
chinese_texts_char = []

for i in range(0,len(texts_df)):
    text_string = texts_df.loc[i, "body"]
    chinese_texts_char.append(len(text_string))

av_char_per_doc = sum(chinese_texts_char) / len(chinese_texts_char)

print(av_char_per_doc)

1127.67
