In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install pandas
!pip install tqdm
!pip install value-nlp

In [None]:
#AAVE

import pandas as pd
from multivalue import Dialects
from tqdm import tqdm
import json

aave = Dialects.AfricanAmericanVernacular()

def translate_text(text):
    try:
        return aave.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"AAVE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVAAVE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

def process_json(file_path, key_to_translate, output_dir):
    with open(file_path, "r") as file:
        data = json.load(file)
    translated_data = []
    total_items = len(data["propositional_logic"]["samples"])

    for idx, item in tqdm(enumerate(data["propositional_logic"]["samples"], start=1), total=total_items, desc=f"Processing {file_path.split('/')[-1]}"):
        item_copy = item.copy()
        try:
            item_copy[f"AAVE ({key_to_translate})"] = translate_text(item[key_to_translate])
        except KeyError:
            print(f"Key {key_to_translate} not found in item {idx}")
            item_copy[f"AAVE ({key_to_translate})"] = item.get(key_to_translate, "")
        translated_data.append(item_copy)
        print(f"Processed row {idx}/{total_items}")

    data["propositional_logic"]["samples"] = translated_data

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.json', '_MVAAVE.json')}"
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/Logic Bench YN(500).json": "context",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/MBPP(374).csv": "Original",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/SVAMP(700).csv": "Original"
}

output_dir_general = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/AAVE"

file_translation_map_glue = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir_glue = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/AAVE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir_general)
    elif file_path.endswith(".json"):
        process_json(file_path, column_or_key, output_dir_general)

for file_path, column_or_key in file_translation_map_glue.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir_glue)

print("All AAVE translations completed and saved.")

In [None]:
#IndE

import pandas as pd
from multivalue import Dialects
import spacy
import stanza
from tqdm import tqdm

inde = Dialects.IndianDialect()

def translate_text(text):
    try:
        return inde.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"IndE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVIndE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/IndE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir)

print("All IndE translations completed and saved.")

In [None]:
# EAAVE

import pandas as pd
from multivalue import Dialects
import spacy
import stanza
from tqdm import tqdm

eaave = Dialects.EarlyAfricanAmericanVernacular()

def translate_text(text):
    try:
        return eaave.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"EAAVE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVEAAVE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/EAAVE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir)

print("All EAAVE translations completed and saved.")

In [None]:
# JamE

import pandas as pd
from multivalue import Dialects
import spacy
import stanza
from tqdm import tqdm

jame = Dialects.JamaicanDialect()

def translate_text(text):
    try:
        return jame.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"JamE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVJamE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/JamE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir)

print("All JamE translations completed and saved.")

In [None]:
# CollSgE

import pandas as pd
from multivalue import Dialects
import spacy
import stanza
from tqdm import tqdm

collsge = Dialects.ColloquialSingaporeDialect()

def translate_text(text):
    try:
        return collsge.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"CollSgE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVCollSgE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/CollSgE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir)

print("All CollSgE translations completed and saved.")

In [None]:
# ChcE

import pandas as pd
from multivalue import Dialects
import spacy
import stanza
from tqdm import tqdm

chce = Dialects.ChicanoDialect()

def translate_text(text):
    try:
        return chce.transform(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

def process_csv(file_path, column_to_translate, output_dir):
    df = pd.read_csv(file_path)
    translated_column = []
    total_rows = len(df)

    for idx, text in tqdm(enumerate(df[column_to_translate], start=1), total=total_rows, desc=f"Processing {file_path.split('/')[-1]}"):
        translated_text = translate_text(text)
        translated_column.append(translated_text)
        print(f"Processed row {idx}/{total_rows}")

    new_column_header = f"ChcE ({column_to_translate})"
    df[new_column_header] = translated_column

    output_path = f"{output_dir}/{file_path.split('/')[-1].replace('.csv', '_MVChcE.csv')}"
    df.to_csv(output_path, index=False)
    print(f"Translated file saved to: {output_path}")

file_translation_map = {
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/BoolQ (1000).csv": "SAE Passage",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/COPA (500).csv": "Premise",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/MultiRC (1000).csv": "Paragraph",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/SST-2 (1000).csv": "Original Sentence",
    "/content/drive/MyDrive/!!Multi-AAVENUE/Original Datasets/GLUE + SUPERGLUE Datasets/WSC (659).csv": "Original Paragraph"
}

output_dir = "/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/ChcE/GLUE + SuperGLUE"

for file_path, column_or_key in file_translation_map.items():
    if file_path.endswith(".csv"):
        process_csv(file_path, column_or_key, output_dir)

print("All ChcE translations completed and saved.")