In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install openai==0.28
!pip install pandas
!pip install tqdm
!pip install json5

In [None]:
import openai
import pandas as pd
import json
from tqdm import tqdm

openai.api_key = "API-KEY"

def create_prompt(text):
    few_shot_examples = (
        "Here are examples of African American Vernacular English (AAVE):\n"
        "1. I was bewildered, but I knew dat it was no gud asking his ass to explain.\n"
        "2. Cochran pontificated windily for da camera.\n"
        "3. I don’t want them to follow in my footsteps, as I ain’t go to no college, but I want them to go.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in African American Vernacular English (AAVE)."
    )
    return few_shot_examples

def translate_to_aave(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a language model capable of translating text into African American Vernacular English (AAVE)."},
                {"role": "user", "content": create_prompt(text)}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

def process_csv(file_path, column_to_translate):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)
    for idx, text in enumerate(tqdm(df[column_to_translate], desc=f"Translating {column_to_translate}"), start=1):
        translated_text = translate_to_aave(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")
    df[f"AAVE ({column_to_translate})"] = translated_column
    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE/Glue + SuperGlue/{file_path.split('/')[-1].replace('.csv', '_AAVE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

def process_json(file_path, key_to_translate):
    with open(file_path, "r") as file:
        data = json.load(file)
    translated_data = []
    total_items = len(data["propositional_logic"]["samples"])

    for idx, item in enumerate(tqdm(data["propositional_logic"]["samples"], desc=f"Translating {key_to_translate}"), start=1):
        item_copy = item.copy()
        item_copy[f"AAVE ({key_to_translate})"] = translate_to_aave(item[key_to_translate])
        translated_data.append(item_copy)
        print(f"Processed row {idx}/{total_items}")

    data["propositional_logic"]["samples"] = translated_data
    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE/{file_path.split('/')[-1].replace('.json', '_AAVE.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/FOLIO(1000).csv": "Premises",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GSM8K(1000).csv": "Original",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/HumanEVAL(164).csv": "Prompt",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench MCQ(480).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/vDatasets/Logic Bench YN(500).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/MBPP(374).csv": "Original",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/SVAMP(700).csv": "Original"
}

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key)
    elif file_path.endswith(".json"):
        process_json(file_path, column_or_key)

print("All translations completed and saved to /content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE.")

In [None]:
import openai
import pandas as pd
import json
from tqdm import tqdm

openai.api_key = "API-KEY"

def create_prompt(text):
    few_shot_examples = (
        "Here are examples of African American Vernacular English (AAVE):\n"
        "1. I was bewildered, but I knew dat it was no gud asking his ass to explain.\n"
        "2. Cochran pontificated windily for da camera.\n"
        "3. I don’t want them to follow in my footsteps, as I ain’t go to no college, but I want them to go.\n"
        f"\nHere is the input text: {text}\n"
        "Please rewrite the input text in African American Vernacular English (AAVE)."
    )
    return few_shot_examples

def translate_to_aave(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a language model capable of translating text into African American Vernacular English (AAVE)."},
                {"role": "user", "content": create_prompt(text)}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

def process_csv(file_path, column_to_translate):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)
    for idx, text in enumerate(tqdm(df[column_to_translate], desc=f"Translating {column_to_translate}"), start=1):
        translated_text = translate_to_aave(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")
    df[f"AAVE ({column_to_translate})"] = translated_column
    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE/{file_path.split('/')[-1].replace('.csv', '_AAVE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

def process_json(file_path, key_to_translate):
    with open(file_path, "r") as file:
        data = json.load(file)
    translated_data = []
    total_items = len(data["propositional_logic"]["samples"])

    for idx, item in enumerate(tqdm(data["propositional_logic"]["samples"], desc=f"Translating {key_to_translate}"), start=1):
        item_copy = item.copy()
        item_copy[f"AAVE ({key_to_translate})"] = translate_to_aave(item[key_to_translate])
        translated_data.append(item_copy)
        print(f"Processed row {idx}/{total_items}")

    data["propositional_logic"]["samples"] = translated_data
    output_path = f"/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE/{file_path.split('/')[-1].replace('.json', '_AAVE.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/MBPP(374).csv": "Original",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/SVAMP(700).csv": "Original"
}

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key)
    elif file_path.endswith(".json"):
        process_json(file_path, column_or_key)

print("All translations completed and saved to /content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/AAVE.")