In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install pandas tqdm sacrebleu

import os
import pandas as pd
import json
from tqdm import tqdm
from sacrebleu.metrics import BLEU

In [None]:
#GPT 4o

DIALECTS = ['AAVE', 'IndE', 'JamE', 'CollSgE', 'ChcE']
DATASETS = [
    'WSC (659)', 'SST-2 (1000)', 'MultiRC (1000)', 'COPA (500)', 'BoolQ (1000)',
    'FOLIO(1000)', 'GSM8K(1000)', 'HumanEVAL(164)', 'MBPP(374)', 'SVAMP(700)',
    'Logic Bench MCQ(480)', 'Logic Bench YN(500)'
]
THRESHOLD = 0.7

COLUMN_MAPPING = {
    'BoolQ (1000)': ('SAE Passage', 'SAE Passage'),
    'COPA (500)': ('Premise', 'Premise'),
    'MultiRC (1000)': ('Paragraph', 'Paragraph'),
    'SST-2 (1000)': ('Original Sentence', 'Original Sentence'),
    'WSC (659)': ('Original Paragraph', 'Original Paragraph'),
    'SVAMP(700)': ('Original', 'Original'),
    'MBPP(374)': ('Original', 'Original'),
    'HumanEVAL(164)': ('Prompt', 'Prompt'),
    'GSM8K(1000)': ('Original', 'Original'),
    'FOLIO(1000)': ('Premises', 'Premises')
}

def create_output_directories(dialect):
    base_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/BLEU Score Filtered Datasets/{dialect}'
    os.makedirs(base_path, exist_ok=True)
    glue_path = f'{base_path}/GLUE + SuperGLUE'
    os.makedirs(glue_path, exist_ok=True)
    for dataset in DATASETS:
        if dataset in ['WSC (659)', 'SST-2 (1000)', 'MultiRC (1000)', 'COPA (500)', 'BoolQ (1000)']:
            os.makedirs(f'{glue_path}/{dataset}', exist_ok=True)
        else:
            os.makedirs(f'{base_path}/{dataset}', exist_ok=True)
    return base_path, glue_path

def calculate_bleu_score(reference, candidate):
    try:
        bleu = BLEU(effective_order=True)
        return bleu.sentence_score(candidate, [reference]).score / 100
    except Exception as e:
        print(f"Error calculating BLEU score: {str(e)}")
        return 0.0

def process_dataset(dialect, dataset, input_path, output_base_path):
    try:
        if 'Logic Bench' in dataset:
            return process_logicbench(input_path, dialect, dataset, output_base_path)

        sae_col, dialect_col_suffix = COLUMN_MAPPING.get(dataset, (None, None))
        if sae_col is None:
            raise ValueError(f"Column mapping not found for dataset: {dataset}")

        dialect_col = f'{dialect} ({sae_col})'
        bleu_col = f'BLEU Score {sae_col}'

        if input_path.endswith('.csv'):
            df = pd.read_csv(input_path)
        elif input_path.endswith('.json'):
            with open(input_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            df = pd.DataFrame(data)
        else:
            raise ValueError("Unsupported file format")

        df[sae_col] = df[sae_col].fillna('').astype(str)
        df[dialect_col] = df[dialect_col].fillna('').astype(str)

        # Calculate BLEU scores with progress bar
        tqdm.pandas(desc=f'Calculating BLEU scores for {dialect} - {dataset}')
        df[bleu_col] = df.progress_apply(
            lambda row: calculate_bleu_score(row[sae_col], row[dialect_col]),
            axis=1
        )

        # Save full dataset with BLEU scores
        output_path = f'{output_base_path}/{dataset}'
        df.to_csv(f'{output_path}/{dataset}_bleu_scores.csv', index=False)

        # Save filtered dataset
        filtered_df = df[df[bleu_col] < THRESHOLD]
        filtered_df.to_csv(f'{output_path}/{dataset}_filtered_bleu_scores.csv', index=False)

        total_samples = len(df)
        below_threshold = len(filtered_df)
        percentage = (below_threshold / total_samples) * 100

        with open(f'{output_path}/{dataset}_percentage.txt', 'w') as f:
            f.write(f'Total Samples: {total_samples}\n')
            f.write(f'Samples Below Threshold (<0.7): {below_threshold}\n')
            f.write(f'Percentage Below Threshold: {percentage:.2f}%')

        return total_samples, below_threshold, percentage

    except Exception as e:
        print(f'\nError in process_dataset for {dialect} - {dataset}: {str(e)}')
        return 0, 0, 0

def process_logicbench(input_path, dialect, dataset, output_base_path):
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    processed_data = []
    for logic_type, logic_data in data.items():
        for sample in logic_data['samples']:
            if 'MCQ' in dataset:
                processed_sample = {
                    'Context': sample['context'],
                    'Choice 1': sample['choices']['choice_1'],
                    'Choice 2': sample['choices']['choice_2'],
                    'Choice 3': sample['choices']['choice_3'],
                    'Choice 4': sample['choices']['choice_4'],
                    'Answer': sample['answer'],
                    f'{dialect} (context)': sample[f'{dialect} (context)'],
                    'BLEU Score Context': calculate_bleu_score(sample['context'], sample[f'{dialect} (context)'])
                }
            elif 'YN' in dataset:
                processed_sample = {
                    'Context': sample['context'],
                    f'{dialect} (context)': sample[f'{dialect} (context)'],
                    'BLEU Score Context': calculate_bleu_score(sample['context'], sample[f'{dialect} (context)'])
                }
                for i, qa_pair in enumerate(sample['qa_pairs'][:4], 1):
                    processed_sample[f'Question {i}'] = qa_pair['question']
                    processed_sample[f'Answer {i}'] = qa_pair['answer']
            processed_data.append(processed_sample)

    output_path = f'{output_base_path}/{dataset}'
    with open(f'{output_path}/{dataset}_bleu_scores.json', 'w', encoding='utf-8') as f:
        json.dump(processed_data, f, ensure_ascii=False, indent=2)

    filtered_data = [sample for sample in processed_data if sample['BLEU Score Context'] < THRESHOLD]
    with open(f'{output_path}/{dataset}_filtered_bleu_scores.json', 'w', encoding='utf-8') as f:
        json.dump(filtered_data, f, ensure_ascii=False, indent=2)

    total_samples = len(processed_data)
    below_threshold = len(filtered_data)
    percentage = (below_threshold / total_samples) * 100

    with open(f'{output_path}/{dataset}_percentage.txt', 'w') as f:
        f.write(f'Total Samples: {total_samples}\n')
        f.write(f'Samples Below Threshold (<0.7): {below_threshold}\n')
        f.write(f'Percentage Below Threshold: {percentage:.2f}%')

    return total_samples, below_threshold, percentage

def main():
    for dialect in tqdm(DIALECTS, desc='Processing dialects'):
        output_base_path, glue_path = create_output_directories(dialect)

        for dataset in tqdm(DATASETS, desc=f'Processing datasets for {dialect}'):
            if dataset in ['WSC (659)', 'SST-2 (1000)', 'MultiRC (1000)', 'COPA (500)', 'BoolQ (1000)']:
                input_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/{dialect}/Glue + SuperGlue/{dataset}_{dialect}.csv'
                output_path = glue_path
            else:
                input_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/GPT 4o/{dialect}/{dataset}_{dialect}.{"json" if "Logic Bench" in dataset else "csv"}'
                output_path = output_base_path

            try:
                total, below, percentage = process_dataset(
                    dialect,
                    dataset,
                    input_path,
                    output_path
                )
                if total > 0:
                    print(f'\nProcessed {dataset} for {dialect}:')
                    print(f'Total: {total}, Below threshold: {below}, Percentage: {percentage:.2f}%')
            except Exception as e:
                print(f'\nError processing {dataset} for {dialect}: {str(e)}')

if __name__ == '__main__':
    main()

In [None]:
# Multi-VALUE

DIALECTS = ['AAVE', 'IndE', 'JamE', 'CollSgE', 'ChcE']
DATASETS = [
    'FOLIO(1000)', 'SVAMP(700)', 'MBPP(374)', 'Logic Bench YN(500)',
    'Logic Bench MCQ(480)', 'HumanEVAL(164)', 'GSM8K(1000)',
    'BoolQ (1000)', 'COPA (500)', 'MultiRC (1000)', 'SST-2 (1000)', 'WSC (659)'
]
THRESHOLD = 0.7

COLUMN_MAPPING = {
    'BoolQ (1000)': ('SAE Passage', 'SAE Passage'),
    'COPA (500)': ('Premise', 'Premise'),
    'MultiRC (1000)': ('Paragraph', 'Paragraph'),
    'SST-2 (1000)': ('Original Sentence', 'Original Sentence'),
    'WSC (659)': ('Original Paragraph', 'Original Paragraph'),
    'SVAMP(700)': ('Original', 'Original'),
    'MBPP(374)': ('Original', 'Original'),
    'HumanEVAL(164)': ('Prompt', 'Prompt'),
    'GSM8K(1000)': ('Original', 'Original'),
    'FOLIO(1000)': ('Premises', 'Premises')
}

def create_output_directories(dialect):
    base_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/BLEU Score Filtered Datasets/Multi-VALUE/{dialect}'
    os.makedirs(base_path, exist_ok=True)
    glue_path = f'{base_path}/GLUE + SuperGLUE'
    os.makedirs(glue_path, exist_ok=True)
    for dataset in DATASETS:
        if dataset in ['BoolQ (1000)', 'COPA (500)', 'MultiRC (1000)', 'SST-2 (1000)', 'WSC (659)']:
            os.makedirs(f'{glue_path}/{dataset}', exist_ok=True)
        else:
            os.makedirs(f'{base_path}/{dataset}', exist_ok=True)
    return base_path, glue_path

def calculate_bleu_score(reference, candidate):
    try:
        bleu = BLEU(effective_order=True)
        return bleu.sentence_score(candidate, [reference]).score / 100
    except Exception as e:
        print(f"Error calculating BLEU score: {str(e)}")
        return 0.0

def process_dataset(dialect, dataset, input_path, output_base_path):
    try:
        if 'Logic Bench' in dataset:
            return process_logicbench(input_path, dialect, dataset, output_base_path)

        sae_col, dialect_col_suffix = COLUMN_MAPPING.get(dataset, (None, None))
        if sae_col is None:
            raise ValueError(f"Column mapping not found for dataset: {dataset}")

        dialect_col = f'{dialect} ({sae_col})'  # Removed 'MV' prefix
        bleu_col = f'BLEU Score {sae_col}'

        # Read dataset (CSV or JSON)
        if input_path.endswith('.csv'):
            df = pd.read_csv(input_path)
        elif input_path.endswith('.json'):
            with open(input_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            df = pd.DataFrame(data)
        else:
            raise ValueError("Unsupported file format")

        df[sae_col] = df[sae_col].fillna('').astype(str)
        df[dialect_col] = df[dialect_col].fillna('').astype(str)

        # Calculate BLEU scores with progress bar
        tqdm.pandas(desc=f'Calculating BLEU scores for {dialect} - {dataset}')
        df[bleu_col] = df.progress_apply(
            lambda row: calculate_bleu_score(row[sae_col], row[dialect_col]),
            axis=1
        )

        # Save full dataset with BLEU scores
        output_path = f'{output_base_path}/{dataset}'
        df.to_csv(f'{output_path}/{dataset}_bleu_scores.csv', index=False)

        # Save filtered dataset
        filtered_df = df[df[bleu_col] < THRESHOLD]
        filtered_df.to_csv(f'{output_path}/{dataset}_filtered_bleu_scores.csv', index=False)

        total_samples = len(df)
        below_threshold = len(filtered_df)
        percentage = (below_threshold / total_samples) * 100

        with open(f'{output_path}/{dataset}_percentage.txt', 'w') as f:
            f.write(f'Total Samples: {total_samples}\n')
            f.write(f'Samples Below Threshold (<0.7): {below_threshold}\n')
            f.write(f'Percentage Below Threshold: {percentage:.2f}%')

        return total_samples, below_threshold, percentage

    except Exception as e:
        print(f'\nError in process_dataset for {dialect} - {dataset}: {str(e)}')
        return 0, 0, 0

def process_logicbench(input_path, dialect, dataset, output_base_path):
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    rows = []
    for logic_type, logic_data in data.items():
        axiom = logic_data['axiom']
        for sample in logic_data['samples']:
            context = sample['context']
            dialect_context = sample[f'{dialect} (context)']  # Removed 'MV' prefix

            bleu_score = calculate_bleu_score(context, dialect_context)

            rows.append({
                'Logic Type': logic_type,
                'Axiom': axiom,
                'Context': context,
                f'{dialect} (context)': dialect_context,
                'BLEU Score Context': bleu_score
            })

    df = pd.DataFrame(rows)

    output_path = f'{output_base_path}/{dataset}'
    df.to_csv(f'{output_path}/{dataset}_bleu_scores.csv', index=False)

    filtered_df = df[df['BLEU Score Context'] < THRESHOLD]
    filtered_df.to_csv(f'{output_path}/{dataset}_filtered_bleu_scores.csv', index=False)

    total_samples = len(df)
    below_threshold = len(filtered_df)
    percentage = (below_threshold / total_samples) * 100

    with open(f'{output_path}/{dataset}_percentage.txt', 'w') as f:
        f.write(f'Total Samples: {total_samples}\n')
        f.write(f'Samples Below Threshold (<0.7): {below_threshold}\n')
        f.write(f'Percentage Below Threshold: {percentage:.2f}%')

    return total_samples, below_threshold, percentage

def main():
    for dialect in tqdm(DIALECTS, desc='Processing dialects'):
        output_base_path, glue_path = create_output_directories(dialect)

        for dataset in tqdm(DATASETS, desc=f'Processing datasets for {dialect}'):
            if dataset in ['BoolQ (1000)', 'COPA (500)', 'MultiRC (1000)', 'SST-2 (1000)', 'WSC (659)']:
                input_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/{dialect}/GLUE + SuperGLUE/{dataset}_MV{dialect}.csv'
                output_path = glue_path
            else:
                input_path = f'/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/{dialect}/{dataset}_MV{dialect}.{"json" if "Logic Bench" in dataset else "csv"}'
                output_path = output_base_path

            try:
                total, below, percentage = process_dataset(
                    dialect,
                    dataset,
                    input_path,
                    output_path
                )
                if total > 0:
                    print(f'\nProcessed {dataset} for {dialect}:')
                    print(f'Total: {total}, Below threshold: {below}, Percentage: {percentage:.2f}%')
            except Exception as e:
                print(f'\nError processing {dataset} for {dialect}: {str(e)}')

if __name__ == '__main__':
    main()

In [None]:
#specifically for AAVE Logic Bench YN
import os
import json
from tqdm import tqdm
from sacrebleu.metrics import BLEU
import pandas as pd

# Define constants
DIALECT = 'AAVE'
DATASET = 'Logic Bench YN(500)'
THRESHOLD = 0.7
INPUT_PATH = '/content/drive/MyDrive/!!Multi-AAVENUE/GPT 4o + Multi-VALUE Translated Datasets/Multi-VALUE/AAVE/Logic Bench YN(500)_MVAAVE.json'
OUTPUT_PATH = '/content/drive/MyDrive/!!Multi-AAVENUE/BLEU Score Filtered Datasets/Multi-VALUE/AAVE/Logic Bench YN(500)/'

os.makedirs(OUTPUT_PATH, exist_ok=True)

def calculate_bleu_score(reference, candidate):
    try:
        bleu = BLEU(effective_order=True)
        score = bleu.sentence_score(candidate, [reference]).score / 100
        return score
    except Exception as e:
        print(f"Error calculating BLEU score: {str(e)}")
        return 0.0

with open(INPUT_PATH, 'r', encoding='utf-8') as f:
    data = json.load(f)

rows = []

for logic_type, logic_data in data.items():
    axiom = logic_data.get('axiom', '')
    for sample in tqdm(logic_data.get('samples', []), desc=f'Processing samples for {logic_type}'):
        context = sample.get('context', '')
        dialect_context = sample.get(f'{DIALECT} (context)', '')
        bleu_score = calculate_bleu_score(context, dialect_context)
        rows.append({
            'Logic Type': logic_type,
            'Axiom': axiom,
            'Context': context,
            f'{DIALECT} (context)': dialect_context,
            'BLEU Score Context': bleu_score
        })

df = pd.DataFrame(rows)

df.to_csv(f'{OUTPUT_PATH}/{DATASET}_bleu_scores.csv', index=False)

filtered_df = df[df['BLEU Score Context'] < THRESHOLD]

filtered_df.to_csv(f'{OUTPUT_PATH}/{DATASET}_filtered_bleu_scores.csv', index=False)

total_samples = len(df)
below_threshold = len(filtered_df)
percentage = (below_threshold / total_samples) * 100 if total_samples > 0 else 0.0

with open(f'{OUTPUT_PATH}/{DATASET}_percentage.txt', 'w') as f:
    f.write(f'Total Samples: {total_samples}\n')
    f.write(f'Samples Below Threshold (<0.7): {below_threshold}\n')
    f.write(f'Percentage Below Threshold: {percentage:.2f}%\n')

print(f'Processed {DATASET} for {DIALECT}:')
print(f'Total Samples: {total_samples}')
print(f'Samples Below Threshold: {below_threshold}')
print(f'Percentage Below Threshold: {percentage:.2f}%')