In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install openai==0.28
!pip install pandas
!pip install tqdm
!pip install json5

In [None]:
import openai
import pandas as pd
import json
from tqdm import tqdm

openai.api_key = "API-KEY"

def create_prompt(text):
    few_shot_examples = (
        "Here are examples of Jamaican English (JamE):\n"
        "1. Hill had initially been indicted with the Canute and the Michelle Saddler and their three companies.\n"
        "2. The autopsy performed on Mae's torso shortly after it was found, revealed that her body was cut into pieces by a power machine saw.\n"
        "3. The culture of the region has been unique in combining British and Western influences with African and Asian lifestyles.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in Jamaican English (JamE)."
    )
    return few_shot_examples

def translate_to_jame(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a language model capable of translating text into Jamaican English (JamE)."},
                {"role": "user", "content": create_prompt(text)}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

def process_csv(file_path, column_to_translate):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)
    for idx, text in enumerate(tqdm(df[column_to_translate], desc=f"Translating {column_to_translate}"), start=1):
        translated_text = translate_to_jame(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")
    df[f"JamE ({column_to_translate})"] = translated_column
    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/JamE/Glue + SuperGlue/{file_path.split('/')[-1].replace('.csv', '_JamE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

for file_path, column_to_translate in file_translation_map.items():
    process_csv(file_path, column_to_translate)

print("All translations completed and saved to /content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/JamE/Glue + SuperGlue.")