In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install openai==0.28
!pip install pandas
!pip install tqdm
!pip install json5

In [None]:
import openai
import json
from tqdm import tqdm

openai.api_key = "API-KEY"

def create_prompt(text):
    few_shot_examples = (
        "Here are examples of African American Vernacular English (AAVE):\n"
        "1. I was bewildered, but I knew dat it was no gud asking his ass to explain.\n"
        "2. Cochran pontificated windily for da camera.\n"
        "3. I don't want them to follow in my footsteps, as I ain't go to no college, but I want them to go.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in African American Vernacular English (AAVE)."
    )
    return few_shot_examples

def translate_to_aave(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a language model capable of translating text into African American Vernacular English (AAVE)."},
                {"role": "user", "content": create_prompt(text)}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

def process_json(file_path, key_to_translate):
    with open(file_path, "r") as file:
        data = json.load(file)

    total_items = sum(len(logic_type['samples']) for logic_type in data.values())
    processed_items = 0

    for logic_type, logic_data in data.items():
        for item in tqdm(logic_data['samples'], desc=f"Translating {logic_type}"):
            item[f"AAVE ({key_to_translate})"] = translate_to_aave(item[key_to_translate])
            processed_items += 1
            print(f"Processed item {processed_items}/{total_items}")

    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE/{file_path.split('/')[-1].replace('.json', '_AAVE.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

logic_bench_files = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench MCQ(480).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench YN(500).json": "context"
}

for file_path, key_to_translate in logic_bench_files.items():
    process_json(file_path, key_to_translate)

print("Logic Bench translations completed and saved.")

In [None]:
# ChcE, CollSgE, IndE, JamE

import openai
import json
from tqdm import tqdm

openai.api_key = "API-KEY"

def create_prompt_chce(text):
    few_shot_examples = (
        "Here are examples of Chicano English (ChcE):\n"
        "1. When people wanna fight me I'm like \"well okay, well then I'll fight you.\"\n"
        "2. They were saying that they had a lot of problems at Garner because it was a lot of fights and stuff.\n"
        "3. I ain't really thinking about getting with J. or any other guy\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in Chicano English (ChcE)."
    )
    return few_shot_examples

def create_prompt_collsge(text):
    few_shot_examples = (
        "Here are examples of Colloquial Singapore English (Singlish) (CollSgE):\n"
        "1. But after a while it become quite senseless to me.\n"
        "2. And got to know this kind-hearted scholar who shelter her with Ø umbrella when it was raining.\n"
        "3. The cake John buy one always very nice to eat.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in Colloquial Singapore English (Singlish) (CollSgE)."
    )
    return few_shot_examples

def create_prompt_inde(text):
    few_shot_examples = (
        "Here are examples of Indian English (IndE):\n"
        "1. It was not too much common. Getting the accommodation has become very much difficult.\n"
        "2. During monsoon we get lot of rain and then gets very soggy and sultry.\n"
        "3. This is the second time that such an object had been sighted here.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in Indian English (IndE)."
    )
    return few_shot_examples

def create_prompt_jame(text):
    few_shot_examples = (
        "Here are examples of Jamaican English (JamE):\n"
        "1. Hill had initially been indicted with the Canute and the Michelle Saddler and their three companies.\n"
        "2. The autopsy performed on Mae's torso shortly after it was found, revealed that her body was cut into pieces by a power machine saw.\n"
        "3. The culture of the region has been unique in combining British and Western influences with African and Asian lifestyles.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in Jamaican English (JamE)."
    )
    return few_shot_examples

def translate_to_dialect(text, dialect):
    try:
        prompt_function = {
            'ChcE': create_prompt_chce,
            'CollSgE': create_prompt_collsge,
            'IndE': create_prompt_inde,
            'JamE': create_prompt_jame
        }[dialect]

        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": f"You are a language model capable of translating text into {dialect}."},
                {"role": "user", "content": prompt_function(text)}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

def process_json(file_path, key_to_translate, dialect):
    with open(file_path, "r") as file:
        data = json.load(file)

    total_items = sum(len(logic_type['samples']) for logic_type in data.values())
    processed_items = 0

    for logic_type, logic_data in data.items():
        for item in tqdm(logic_data['samples'], desc=f"Translating {logic_type} to {dialect}"):
            item[f"{dialect} ({key_to_translate})"] = translate_to_dialect(item[key_to_translate], dialect)

            processed_items += 1
            print(f"Processed item {processed_items}/{total_items}")

    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/{dialect}/{file_path.split('/')[-1].replace('.json', f'_{dialect}.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

logic_bench_files = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench MCQ(480).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench YN(500).json": "context"
}

dialects = ['ChcE', 'CollSgE', 'IndE', 'JamE']

for dialect in dialects:
    for file_path, key_to_translate in logic_bench_files.items():
        process_json(file_path, key_to_translate, dialect)

print("Logic Bench translations completed and saved for all dialects.")

In [None]:
!pip install pandas
!pip install tqdm
!pip install value-nlp

In [None]:
# Multi-VALUE

import pandas as pd
import json
from multivalue import Dialects
import spacy
import stanza

dialects = {
    'AAVE': Dialects.AfricanAmericanVernacular(),
    'ChcE': Dialects.ChicanoDialect(),
    'CollSgE': Dialects.ColloquialSingaporeDialect(),
    'IndE': Dialects.IndianDialect(),
    'JamE': Dialects.JamaicanDialect()
}

def translate_text(text, dialect):
    try:
        return dialects[dialect].transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_json(file_path, key_to_translate, dialect):
    with open(file_path, "r") as file:
        data = json.load(file)

    total_items = sum(len(logic_type['samples']) for logic_type in data.values())
    processed_items = 0

    for logic_type, logic_data in data.items():
        for item in logic_data['samples']:
            item[f"{dialect} ({key_to_translate})"] = translate_text(item[key_to_translate], dialect)

            processed_items += 1
            print(f"Processed item {processed_items}/{total_items}")

    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/{dialect}/{file_path.split('/')[-1].replace('.json', f'_MV{dialect}.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

logic_bench_files = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench MCQ(480).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench YN(500).json": "context"
}

for dialect in dialects.keys():
    for file_path, key_to_translate in logic_bench_files.items():
        process_json(file_path, key_to_translate, dialect)

print("Logic Bench translations completed and saved for all dialects.")