This notebook details measurements of the L4 category Analytic Operations

Specifically, it tests translation ability in 3 languages: spanish, mandarin, and hindi

In [2]:
import json
import os
import time
import datetime
import statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from datasets import load_dataset
from openai import OpenAI

load_dotenv()
client_oai = OpenAI(api_key=os.getenv('openai_apikey'))
client_dps = OpenAI(api_key=os.getenv('deepseek_apikey'), base_url="https://api.deepseek.com")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
analytic_en_es_oai = pd.read_csv('results/analytic_en_es_oai.csv')

In [4]:
# Load the translation dataset
ds = load_dataset("Helsinki-NLP/opus_books", "en-es")
analytic_en_es = ds["train"].to_pandas()
analytic_en_es = analytic_en_es.sample(n=500,random_state=88)
analytic_en_es.reset_index(inplace=True)

In [6]:
# translate to English
def prompt_translate(svc, text):
    if svc == 'oai':
        client = client_oai
        model = "gpt-4o-mini"
    elif svc == 'dps':
        client = client_dps
        model = "deepseek-chat"
    else:
        raise ValueError('Invalid service selected')
    
    prompt = f'''Translate the following into English, providing only the translation and no additional text:
    {text}'''

    messages = [
        {"role": "system", "content": "You are a helpful AI assistant"},
        {"role": "user",   "content": prompt} ]

    resp = client.chat.completions.create(
                    model=model,
                    messages=messages)

    answer1 = resp.choices[0].message.content

    return answer1

In [7]:
# compare translations
def prompt_translate_compare(svc, text1, text2):
    if svc == 'oai':
        client = client_oai
        model = "gpt-4o-mini"
    elif svc == 'dps':
        client = client_dps
        model = "deepseek-chat"
    else:
        raise ValueError('Invalid service selected')
    
    prompt = f'''Compare the following 2 pieces of text, and score them from 1 to 10 on how similar they are,
    with 1 being least similar and 10 being most similar.
    Provide only the numeric score, and no additional information.
    Text 1: {text1}
    Text 2: {text2}'''

    messages = [
        {"role": "system", "content": "You are a helpful AI assistant"},
        {"role": "user",   "content": prompt} ]

    resp = client.chat.completions.create(
                    model=model,
                    messages=messages)

    answer1 = resp.choices[0].message.content

    return answer1

In [None]:
# generating translations
analytic_en_es_dps = analytic_en_es
analytic_en_es_dps[['translated','expected']] = ['','']
tm = time.time()
deltas = []
length = len(analytic_en_es_dps)
for index, row in analytic_en_es_dps[:length].iterrows():
    analytic_en_es_dps.loc[index,['expected']] = row['translation']['en']
    analytic_en_es_dps.loc[index,['translated']] = prompt_translate('dps', row['translation']['es'])
    if (index%20 == 0) & (index != 0):
        delta = np.round(time.time()-tm,2)
        deltas.append(delta)
        est = (statistics.mean(deltas)/20) * (length-index)
        print(f'finished ({index}/{length}), time delta: {delta}, est time remaining: {str(datetime.timedelta(seconds=est))}')
        tm = time.time()

finished (20/500), time delta: 51.16, est time remaining: 0:20:27.840000
finished (40/500), time delta: 47.79, est time remaining: 0:18:57.925000
finished (60/500), time delta: 49.33, est time remaining: 0:18:07.386667
finished (80/500), time delta: 38.72, est time remaining: 0:16:21.750000
finished (100/500), time delta: 45.14, est time remaining: 0:15:28.560000


In [21]:
analytic_en_es_oai.head(5)

Unnamed: 0,index,id,translation,translated,expected
0,15483,15483,"{'en': '""If that touch had not been thrown in,...","""If he had not added those points and collar,""...","""If that touch had not been thrown in,"" said D..."
1,39077,39077,"{'en': 'Monsieur, if she has escaped, it is no...","Sir, if he has escaped, it's not my fault, I s...","Monsieur, if she has escaped, it is not my fau..."
2,85353,85353,"{'en': 'This was Fix, one of the detectives wh...",This Fix had to watch all travelers taking the...,"This was Fix, one of the detectives who had be..."
3,74810,74810,"{'en': 'An intense, musky odor filled the air....",A strong smell of musk stank in the atmosphere.,"An intense, musky odor filled the air. It was ..."
4,49026,49026,{'en': 'He really ought to have expected thing...,Was this still the father? The same man who la...,He really ought to have expected things to hav...


In [22]:
analytic_en_es_oai.to_csv('results/analytic_en_es_oai.csv')

In [28]:
# compare translations
analytic_en_es_oai['score_dps'] = ''
tm = time.time()
deltas = []
length = len(analytic_en_es_oai)
for index, row in analytic_en_es_oai[:length].iterrows():
    analytic_en_es_oai.loc[index,['score_dps']] = prompt_translate_compare('dps', row['translated'],row['expected'])
    if (index%20 == 0) & (index != 0):
        delta = np.round(time.time()-tm,2)
        deltas.append(delta)
        est = (statistics.mean(deltas)/20) * (length-index)
        print(f'finished ({index}/{length}), time delta: {delta}, est time remaining: {str(datetime.timedelta(seconds=est))}')
        tm = time.time()

finished (20/500), time delta: 29.45, est time remaining: 0:11:46.800000
finished (40/500), time delta: 27.15, est time remaining: 0:10:50.900000
finished (60/500), time delta: 26.49, est time remaining: 0:10:09.326667
finished (80/500), time delta: 26.1, est time remaining: 0:09:33.247500
finished (100/500), time delta: 27.49, est time remaining: 0:09:06.720000
finished (120/500), time delta: 26.58, est time remaining: 0:08:36.990000
finished (140/500), time delta: 27.52, est time remaining: 0:08:10.577143
finished (160/500), time delta: 29.0, est time remaining: 0:07:47.032500
finished (180/500), time delta: 28.42, est time remaining: 0:07:21.244444
finished (200/500), time delta: 27.82, est time remaining: 0:06:54.030000
finished (220/500), time delta: 24.79, est time remaining: 0:06:22.849091
finished (240/500), time delta: 26.18, est time remaining: 0:05:54.239167
finished (260/500), time delta: 26.85, est time remaining: 0:05:26.621538
finished (280/500), time delta: 27.45, est t

In [30]:
analytic_en_es_oai.head(5)
analytic_en_es_oai.to_csv('results/analytic_en_es_oai.csv')