In [1]:
%pip install openai==0.28




[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:

import json
input_json_file = "input.json"
example_json_content = [
    {
        "segment": 633,
        "start": 1613.107,
        "end": 1623.093,
        "text": "We can't be at each other's throats, so what I'm willing to do is give you 15% off the top, and you provide security and distribution if needed.",
        "audio_file": "SPEAKER_17/segment_633/audio.wav"
    },
    {
        "segment": 634,
        "start": 1623.5,
        "end": 1630.0,
        "text": "That sounds good to me, but is that for the overall production?",
        "audio_file": "SPEAKER_17/segment_634/audio.wav"
    },
       {
        "segment": 635,
        "start": 1630.1,
        "end": 1637.5,
        "text":"It's raining cats and dogs",
        "audio_file": "SPEAKER_17/segment_635/audio.wav"
    },
      {
        "segment": 636,
        "start": 1637.5,
        "end": 1640.0,
        "text": "Getting this deal done is a piece of cake",
        "audio_file": "SPEAKER_17/segment_636/audio.wav"
    }

    ]
with open (input_json_file,"w") as f:
    json.dump(example_json_content,f, indent=4)

In [None]:
import os
import json
import getpass
from typing import List, Optional

import openai
from pydantic import BaseModel, ValidationError, RootModel

# ========= CONFIGURATION ==========

openai_api_key = getpass.getpass("Veuillez saisir votre clé OpenAI : ")
openai.api_key = openai_api_key  # Clé OpenAI

model_name = "gpt-3.5-turbo"
target_language = "French"
input_file_path = "input_text.json"
output_file_path = "output_text.json"
CHUNK_SIZE = 30

# ========= PYDANTIC MODELS ==========

class TranslatedSegment(BaseModel):
    
    start: Optional[float]
    end: Optional[float]
    text: str
    speaker: Optional[str]
    translated_text: str

class TranslatedSegmentList(RootModel[List[TranslatedSegment]]):
    """A root model representing a list of TranslatedSegment objects."""

# ========= FONCTIONS ==========

def build_user_message_for_chunk(chunk: List[dict]) -> str:
    prompt_intro = (
        "Below is a list of segments in JSON format. "
        "For each segment:\n"
        "- Do NOT modify the 'text' field (it must remain in the source language).\n"
        f"- Create or fill 'translated_text' with the translation into {target_language}.\n\n"
        "Return a valid JSON array of objects, where each object preserves the original fields "
        "(segment, start, end, text) and includes a new field 'translated_text'.\n"
        "Example:\n"
        "[\n"
        "  {\n"
          
        "    \"start\": 0.0,\n"
        "    \"end\": 5.0,\n"
        "    \"text\": \"This is the original text.\",\n"
        "    \"speaker\": \"SPEAKER_1\",\n"
        "    \"translated_text\": \"Voici le texte traduit.\" \n"
        "  }\n"
        "]\n\n"
        "Important:\n"
        "- Translate from detected source language to the target language.\n"
        "- Preserve nuances, cultural references, wordplay, idiomatic expressions, etc.\n"
        "- Any symbols, measurement units, or numbers must be written out in words.\n"
        
    )

    chunk_json = json.dumps(chunk, ensure_ascii=False, indent=2)
    user_message = f"{prompt_intro}\n\nHere is the chunk to translate:\n{chunk_json}\n\n"
    return user_message

def translate_chunk(chunk: List[dict]) -> List[TranslatedSegment]:
    user_message = build_user_message_for_chunk(chunk)

    system_prompt = (
        "You are a highly skilled translator. "
        "First, read the entire set of segments to fully grasp overall context. "
        "Then, translate each segment's 'text' field into the target language while preserving context, "
        "nuances, cultural references, wordplay, idiomatic expressions, acronyms, measurement units, slang, "
        "and so on. If there's any wordplay or idiomatic expression that needs adaptation, keep the same intent and style. "
        "make sure to not translate the names of people, companies, or brands. "
        "All symbols, numbers, or measurement units must be written out in full words in the target language. (e.g., 5 kilometers -> cinq kilomètres, % -> pour cent, $ -> dollars, etc.) "
    )

    try:
        response = openai.ChatCompletion.create(
            model=model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_message}
            ],
            temperature=0.2
        )
        llm_output = response["choices"][0]["message"]["content"].strip()

        # On essaye de parser la réponse comme JSON
        data = json.loads(llm_output)
        # On le valide via Pydantic RootModel
        validated = TranslatedSegmentList.parse_obj(data)
        return validated.root
    except (json.JSONDecodeError, ValidationError) as e:
        print("Erreur de parsing ou de validation Pydantic :", e)
        return []
    except Exception as e:
        print(f"Erreur d'appel OpenAI pour le chunk : {e}")
        return []

def main():
    
    with open(input_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    if not isinstance(data, list):
        raise ValueError("Le fichier d'entrée doit être une liste JSON de segments.")

    all_translated_segments = []

    for i in range(0, len(data), CHUNK_SIZE):
        chunk = data[i : i + CHUNK_SIZE]
        print(f"\n--- Traitement du chunk n°{i//CHUNK_SIZE + 1} contenant {len(chunk)} segments ---")

        translated_items = translate_chunk(chunk)
        # Ajout au tableau global
        all_translated_segments.extend([item.dict() for item in translated_items])

    # Sauvegarde
    with open(output_file_path, 'w', encoding='utf-8') as f:
        json.dump(all_translated_segments, f, indent=4, ensure_ascii=False)

    print(f"\nTraduction terminée ! Résultats enregistrés dans : {output_file_path}")

if __name__ == "__main__":
    main()


### Evaluation ( d-Bleu, bert-score, comet-qe)

In [8]:
%pip install sacrebleu


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
import openai
import json
import sacrebleu

Round-trip translation (for evaluation)

In [None]:


# =========== CONFIGURATION ===========
openai.api_key = "sk-YourAPIKey"  # Set your OpenAI API key here
model_name = "gpt-3.5-turbo"            # Modèle utilisé pour la traduction
input_file = "eval_translated.json"  # Fichier qui contient la traduction
output_file = "backtranslated_3.5.json"
# =====================================

def back_translate(text, source_lang="French", target_lang="English"):
    """
    Effectue la back-translation du texte (source_lang -> target_lang) via OpenAI.
    Ici, on suppose que la traduction existante est en 'French' et que l'original
    était en 'English'. Ajustez si nécessaire.
    """
    try:
        response = openai.ChatCompletion.create(
            model=model_name,
            messages=[
                {
                    "role": "system",
                    "content": (
                        f"Tu es un traducteur expérimenté. "
                        f"Traduis le texte ci-dessous du {source_lang} vers le {target_lang} "
                        f"en préservant le sens et les nuances."
                    )
                },
                {
                    "role": "user",
                    "content": text
                }
            ],
            temperature=0.2
        )
        return response["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print(f"Erreur pendant la back-translation: {e}")
        return text  # Fallback: renvoie le texte d'entrée s'il y a une erreur

def main():
    # 1. Lire le fichier JSON contenant la traduction
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Préparer des listes pour le calcul du score BLEU (corpus-level)
    original_texts = []
    back_translated_texts = []
    
    # 2. Faire la back-translation pour chaque item
    for item in data:
        original_text = item.get("text", "")
        translated_text = item.get("translated_text", "")

        # Back-translation du 'translated_text' vers la langue source (ici, English)
        back_translated = back_translate(translated_text, source_lang="French", target_lang="English")
        
        # On stocke la back-translation dans l'item
        item["back_translated_text"] = back_translated
        
        # On accumule pour calculer un BLEU "corpus" à la fin
        original_texts.append(original_text)
        back_translated_texts.append(back_translated)
    
    # 3. Calculer le score BLEU global (corpus-level)
    #    Note: sacrebleu.corpus_bleu prend en entrée:
    #      - hypotheses: liste des textes hypothèses (ici, back_translated_texts)
    #      - references: liste de listes (ou single reference), ex. [[ref1, ref2, ...], [ref1, ref2, ...], ...]
    #    Donc on encapsule original_texts dans une liste
    bleu = sacrebleu.corpus_bleu(back_translated_texts, [original_texts])
    bleu_score = bleu.score
    
    # 4. Sauvegarder dans un nouveau fichier JSON
    #    On peut ajouter un champ "bleu_score" global à la racine ou l'afficher.
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)
    
    print(f"Back-translation terminée. Fichier généré : {output_file}")
    print(f"Score BLEU (corpus) = {bleu_score:.2f}")

if __name__ == "__main__":
    main()


In [13]:
input_file = "backtranslated_3.5.json"

In [15]:
def main():

    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        original_texts = []
        back_translated_texts = []

        for item in data:

            original_text = item.get("text", "")
            translated_text = item.get("translated_text", "")
            back_translated = item.get("back_translated_text", "")
            original_texts.append(original_text)
            back_translated_texts.append(back_translated)
        
        bleu = sacrebleu.corpus_bleu(back_translated_texts, [original_texts])
        bleu_score = bleu.score
        print(f"Back-translation terminée. Fichier généré : {output_file}")
        print(f"Score BLEU (corpus) = {bleu_score:.2f}")

if __name__ == "__main__":
    main()
    

            

Back-translation terminée. Fichier généré : backtranslated_3.5.json
Score BLEU (corpus) = 48.07


un score BLEU autour de 40 pour de la back-translation indique généralement une bonne préservation du sens et des structures principales.

Si vous aviez un score BLEU très faible (ex. < 20), ça pourrait indiquer que la traduction ou la back-traduction s’est grandement éloignée de l’original.

À l’inverse, un score élevé (ex. 70+) signifierait que la back-traduction est extrêmement proche de la phrase d’origine sur le plan lexical. Cela peut être souhaitable, ou au contraire indiquer une traduction trop littérale.

In [None]:
%pip install bert-score


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [7]:
import json
from bert_score import score

# On suppose que le fichier contenant l'original, la traduction et la back-traduction s'appelle:
input_file = "backtranslated_3.5.json"

def main():
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    original_texts = []
    backtranslated_texts = []

    for item in data:
        original = item.get("text", "")
        backtrans = item.get("back_translated_text", "")

        # On stocke la version anglaise originale et la version back-traduite
        original_texts.append(original)
        backtranslated_texts.append(backtrans)

    # BERTScore nécessite que la référence et l'hypothèse soient dans la même langue (anglais, ici).
    # Par défaut, lang="en" => BERTScore utilise un modèle anglais
    P, R, F1 = score(backtranslated_texts, original_texts, lang="en")

    print(f"Nombre de segments évalués : {len(original_texts)}")
    print(f"Score BERTScore (moyenne) :")
    print(f"  - Precision: {P.mean():.4f}")
    print(f"  - Recall:    {R.mean():.4f}")
    print(f"  - F1:        {F1.mean():.4f}")

if __name__ == "__main__":
    main()


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Nombre de segments évalués : 150
Score BERTScore (moyenne) :
  - Precision: 0.9609
  - Recall:    0.9659
  - F1:        0.9633


- F1 est souvent utilisé comme score global (entre 0 et 1). Plus c’est proche de 1, plus les phrases sont considérées sémantiquement similaires.
- on peut aussi consulter la Precision et le Recall, pour voir comment les tokens s’alignent.