# MMLU
## Lexical Perturbations

In [1]:
import json
import pandas as pd

def absolute_number_of_changes(changes):
    return sum(1 if change[0] != change[1] else 0 for change in changes)

def relative_number_of_changes(changes):
    if len(changes) == 0:
        return 0
    return absolute_number_of_changes(changes) / len(changes)

file = "../../data/mmlu/lexical/llm_synonym_perturbation.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["abs_changes"] = perturbation_data_df["changes"].apply(absolute_number_of_changes)
perturbation_data_df["rel_changes"] = perturbation_data_df["changes"].apply(relative_number_of_changes)

print(f"Mean absolute number of changes: {perturbation_data_df['abs_changes'].mean()}")
print(f"Mean relative number of changes: {perturbation_data_df['rel_changes'].mean()}")


Mean absolute number of changes: 16.024711579547073
Mean relative number of changes: 0.2483547971825058


# Syntactic Perturbations

In [2]:
from collections import Counter


def n_sentences(row):
    return len(row["question_metadata"]["transformations"]) + sum(len(choice["transformations"]) for choice in row["choices_metadata"])

def n_transformations(row):
    counter = 0
    for transformation in row["question_metadata"]["transformations"]:
        if transformation != "":
            counter += 1
    for choice in row["choices_metadata"]:
        for transformation in choice["transformations"]:
            if transformation != "":
                counter += 1

    return counter

def count_transformations(row):
    transformation_counts = Counter()
    
    # Count transformations in question metadata
    for transformation in row["question_metadata"]["transformations"]:
        if isinstance(transformation, list):
            transformation_counts[transformation[0]] += 1
        else:
            transformation_counts[transformation] += 1
    
    # Count transformations in choices metadata
    for choice in row["choices_metadata"]:
        for transformation in choice["transformations"]:
            if isinstance(transformation, list):
                transformation_counts[transformation[0]] += 1
            else:
                transformation_counts[transformation] += 1
    
    return transformation_counts


file = "../../data/mmlu/syntactic/syntactic_perturbation.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["n_sentences"] = perturbation_data_df.apply(n_sentences, axis=1)
perturbation_data_df["n_transformations"] = perturbation_data_df.apply(n_transformations, axis=1)
perturbation_data_df["rel_transformations"] = perturbation_data_df["n_transformations"] / perturbation_data_df["n_sentences"]


print(f"Mean number of sentences: {perturbation_data_df['n_sentences'].mean()}")
print(f"Mean absolute number of transformations: {perturbation_data_df['n_transformations'].mean()}")
print(f"Mean relative number of transformations: {perturbation_data_df['rel_transformations'].mean()}")
# Count transformations
transformation_counts_list = perturbation_data_df.apply(count_transformations, axis=1)
total_transformation_counts = Counter()
for counter in transformation_counts_list:
    total_transformation_counts.update(counter)
print("Absolute Transformation type counts:")
for transformation_type, count in total_transformation_counts.most_common():
    print(f"  {transformation_type}: {count}")

# Relative Transformation type counts
n_sentences = perturbation_data_df["n_sentences"].sum()
relative_transformation_counts = {k: v / n_sentences for k, v in total_transformation_counts.most_common()}
print("Relative Transformation type counts:")
for transformation_type, count in relative_transformation_counts.items():
    print(f"  {transformation_type}: {count}")


Mean number of sentences: 6.862555191568153
Mean absolute number of transformations: 1.224825523429711
Mean relative number of transformations: 0.15013797119275435
Absolute Transformation type counts:
  : 79165
  active_to_passive: 13283
  reverse_wh_movement: 1547
  passive_to_active: 889
  wh_movement: 748
  dative_alternation: 250
  prep_dative_alternation: 242
  extraposition: 131
  reverse_extraposition: 109
Relative Transformation type counts:
  : 0.8215204848283592
  active_to_passive: 0.1378419326719522
  reverse_wh_movement: 0.016053713004856585
  passive_to_active: 0.00922543688514383
  wh_movement: 0.007762234859491096
  dative_alternation: 0.0025943298327176126
  prep_dative_alternation: 0.002511311278070649
  extraposition: 0.001359428832344029
  reverse_extraposition: 0.001131127807064879


# SQuAD
## Lexical Perturbations

In [3]:
file = "../../data/squad/lexical/llm_synonym_perturbation.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["abs_changes"] = perturbation_data_df["changes"].apply(absolute_number_of_changes)
perturbation_data_df["rel_changes"] = perturbation_data_df["changes"].apply(relative_number_of_changes)

print(f"Mean absolute number of changes: {perturbation_data_df['abs_changes'].mean()}")
print(f"Mean relative number of changes: {perturbation_data_df['rel_changes'].mean()}")

Mean absolute number of changes: 7.009
Mean relative number of changes: 0.05667592799188895


## Syntactic Perturbations

In [4]:
def n_sentences(row):
    return len(row["question_metadata"]["transformations"]) + len(row["context_metadata"]["transformations"])

def n_transformations(row):
    counter = 0
    for transformation in row["question_metadata"]["transformations"]:
        if transformation != "":
            counter += 1
    for transformation in row["context_metadata"]["transformations"]:
        if transformation != "":
            counter += 1

    return counter

def count_transformations(row):
    transformation_counts = Counter()
    
    # Count transformations in question metadata
    for transformation in row["question_metadata"]["transformations"]:
        if isinstance(transformation, list):
            transformation_counts[transformation[0]] += 1
        else:
            transformation_counts[transformation] += 1
    
    # Count transformations in context metadata
    for transformation in row["context_metadata"]["transformations"]:
        if isinstance(transformation, list):
            transformation_counts[transformation[0]] += 1
        else:
            transformation_counts[transformation] += 1
    
    return transformation_counts


file = "../../data/squad/syntactic/syntactic_perturbation.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["n_sentences"] = perturbation_data_df.apply(n_sentences, axis=1)
perturbation_data_df["n_transformations"] = perturbation_data_df.apply(n_transformations, axis=1)
perturbation_data_df["rel_transformations"] = perturbation_data_df["n_transformations"] / perturbation_data_df["n_sentences"]


print(f"Mean number of sentences: {perturbation_data_df['n_sentences'].mean()}")
print(f"Mean absolute number of transformations: {perturbation_data_df['n_transformations'].mean()}")
print(f"Mean relative number of transformations: {perturbation_data_df['rel_transformations'].mean()}")
# Count transformations
transformation_counts_list = perturbation_data_df.apply(count_transformations, axis=1)
total_transformation_counts = Counter()
for counter in transformation_counts_list:
    total_transformation_counts.update(counter)
print("Absolute Transformation type counts:")
for transformation_type, count in total_transformation_counts.most_common():
    print(f"  {transformation_type}: {count}")

# Relative Transformation type counts
n_sentences = perturbation_data_df["n_sentences"].sum()
relative_transformation_counts = {k: v / n_sentences for k, v in total_transformation_counts.most_common()}
print("Relative Transformation type counts:")
for transformation_type, count in relative_transformation_counts.items():
    print(f"  {transformation_type}: {count}")

Mean number of sentences: 5.957
Mean absolute number of transformations: 2.477
Mean relative number of transformations: 0.4275758409890763
Absolute Transformation type counts:
  : 3480
  active_to_passive: 1730
  reverse_wh_movement: 358
  passive_to_active: 239
  wh_movement: 95
  extraposition: 22
  reverse_extraposition: 14
  prep_dative_alternation: 12
  dative_alternation: 7
Relative Transformation type counts:
  : 0.5841866711431929
  active_to_passive: 0.2904146382407252
  reverse_wh_movement: 0.06009736444519053
  passive_to_active: 0.04012086620782273
  wh_movement: 0.015947624643276818
  extraposition: 0.0036931341279167365
  reverse_extraposition: 0.0023501762632197414
  prep_dative_alternation: 0.0020144367970454926
  dative_alternation: 0.0011750881316098707


# Amega
## Lexical Perturbation
### Case Descriptions

In [5]:
file = "../../data/amega/lexical/llm_synonym_perturbation_cases.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["abs_changes"] = perturbation_data_df["changes"].apply(absolute_number_of_changes)
perturbation_data_df["rel_changes"] = perturbation_data_df["changes"].apply(relative_number_of_changes)

print(f"Mean absolute number of changes: {perturbation_data_df['abs_changes'].mean()}")
print(f"Mean relative number of changes: {perturbation_data_df['rel_changes'].mean()}")

Mean absolute number of changes: 17.0
Mean relative number of changes: 0.15762677706022332


### Questions

In [6]:
file = "../../data/amega/lexical/llm_synonym_perturbation_questions.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["abs_changes"] = perturbation_data_df["changes"].apply(absolute_number_of_changes)
perturbation_data_df["rel_changes"] = perturbation_data_df["changes"].apply(relative_number_of_changes)

print(f"Mean absolute number of changes: {perturbation_data_df['abs_changes'].mean()}")
print(f"Mean relative number of changes: {perturbation_data_df['rel_changes'].mean()}")

Mean absolute number of changes: 8.147058823529411
Mean relative number of changes: 0.30516476523893843


## Syntactic Perturbations
### Case Descriptions

In [7]:
def n_sentences(row):
    return len(row["metadata"]["transformations"])

def n_transformations(row):
    counter = 0
    for transformation in row["metadata"]["transformations"]:
        if transformation != "":
            counter += 1

    return counter

def count_transformations(row):
    transformation_counts = Counter()
    
    # Count transformations in question metadata
    for transformation in row["metadata"]["transformations"]:
        if isinstance(transformation, list):
            transformation_counts[transformation[0]] += 1
        else:
            transformation_counts[transformation] += 1
    
    return transformation_counts

file = "../../data/amega/syntactic/syntactic_perturbation_cases.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["n_sentences"] = perturbation_data_df.apply(n_sentences, axis=1)
perturbation_data_df["n_transformations"] = perturbation_data_df.apply(n_transformations, axis=1)
perturbation_data_df["rel_transformations"] = perturbation_data_df["n_transformations"] / perturbation_data_df["n_sentences"]

print(f"Mean number of sentences: {perturbation_data_df['n_sentences'].mean()}")
print(f"Mean absolute number of transformations: {perturbation_data_df['n_transformations'].mean()}")
print(f"Mean relative number of transformations: {perturbation_data_df['rel_transformations'].mean()}")
# Count transformations
transformation_counts_list = perturbation_data_df.apply(count_transformations, axis=1)
total_transformation_counts = Counter()
for counter in transformation_counts_list:
    total_transformation_counts.update(counter)
print("Absolute Transformation type counts:")
for transformation_type, count in total_transformation_counts.most_common():
    print(f"  {transformation_type}: {count}")

# Relative Transformation type counts
n_sentences = perturbation_data_df["n_sentences"].sum()
relative_transformation_counts = {k: v / n_sentences for k, v in total_transformation_counts.most_common()}
print("Relative Transformation type counts:")
for transformation_type, count in relative_transformation_counts.items():
    print(f"  {transformation_type}: {count}")

Mean number of sentences: 6.8
Mean absolute number of transformations: 3.0
Mean relative number of transformations: 0.41313492063492063
Absolute Transformation type counts:
  : 76
  active_to_passive: 60
Relative Transformation type counts:
  : 0.5588235294117647
  active_to_passive: 0.4411764705882353


### Questions

In [8]:
def n_sentences(row):
    return len(row["metadata"]["transformations"])

def n_transformations(row):
    counter = 0
    for transformation in row["metadata"]["transformations"]:
        if transformation != "":
            counter += 1

    return counter

def count_transformations(row):
    transformation_counts = Counter()
    
    # Count transformations in question metadata
    for transformation in row["metadata"]["transformations"]:
        if isinstance(transformation, list):
            transformation_counts[transformation[0]] += 1
        else:
            transformation_counts[transformation] += 1
    
    return transformation_counts

file = "../../data/amega/syntactic/syntactic_perturbation_questions.json"
with open(file, "r") as f:
    data = json.load(f)

perturbation_data_df = pd.DataFrame(data["data"])
perturbation_data_df["n_sentences"] = perturbation_data_df.apply(n_sentences, axis=1)
perturbation_data_df["n_transformations"] = perturbation_data_df.apply(n_transformations, axis=1)
perturbation_data_df["rel_transformations"] = perturbation_data_df["n_transformations"] / perturbation_data_df["n_sentences"]

print(f"Mean number of sentences: {perturbation_data_df['n_sentences'].mean()}")
print(f"Mean absolute number of transformations: {perturbation_data_df['n_transformations'].mean()}")
print(f"Mean relative number of transformations: {perturbation_data_df['rel_transformations'].mean()}")
# Count transformations
transformation_counts_list = perturbation_data_df.apply(count_transformations, axis=1)
total_transformation_counts = Counter()
for counter in transformation_counts_list:
    total_transformation_counts.update(counter)
print("Absolute Transformation type counts:")
for transformation_type, count in total_transformation_counts.most_common():
    print(f"  {transformation_type}: {count}")

# Relative Transformation type counts
n_sentences = perturbation_data_df["n_sentences"].sum()
relative_transformation_counts = {k: v / n_sentences for k, v in total_transformation_counts.most_common()}
print("Relative Transformation type counts:")
for transformation_type, count in relative_transformation_counts.items():
    print(f"  {transformation_type}: {count}")

Mean number of sentences: 2.0
Mean absolute number of transformations: 0.39705882352941174
Mean relative number of transformations: 0.21446078431372548
Absolute Transformation type counts:
  : 218
  wh_movement: 29
  active_to_passive: 25
Relative Transformation type counts:
  : 0.8014705882352942
  wh_movement: 0.10661764705882353
  active_to_passive: 0.09191176470588236
