In [31]:
from fada.extractors import (
    AlignmentMetric,
    FluencyMetric,
    GrammarMetric
)
from fada.utils import prepare_splits, rename_text_columns

from datasets import load_dataset
import numpy as np
import pandas as pd

In [13]:
def annotate_word_count(dataset):
    """
    Annotate the dataset with the number of words in each text entry.
    
    Args:
    - dataset (Dataset): Huggingface dataset with a 'text' column.
    
    Returns:
    - Dataset: Annotated dataset with a new column 'word_count'.
    """
    word_counts = [len(text.split()) for text in dataset['text']]
    
    # Check if 'word_count' column already exists, if so, remove it
    if 'word_count' in dataset.features:
        dataset = dataset.remove_columns(['word_count'])
    
    # Add the new 'word_count' column
    dataset = dataset.add_column('word_count', word_counts)
    
    return dataset

In [26]:
dataset_info =[
    ("glue", "sst2", "distilbert-base-uncased-finetuned-sst-2-english"),
    ("ag_news", "default", "textattack/bert-base-uncased-ag-news"),
    ("imdb", "plain_text", "textattack/bert-base-uncased-imdb"),
    ("yahoo_answers_topics", "yahoo_answers_topics", "fabriceyhc/bert-base-uncased-yahoo_answers_topics")
]

In [37]:
n = 1000

results = []
for builder_name, config_name, model_id in dataset_info:

    # init metrics
    a_metric = AlignmentMetric(
        builder_name=builder_name, 
        config_name=config_name,
        model_id=model_id)
    f_metric = FluencyMetric()
    g_metric = GrammarMetric()

    # init dataset
    raw_datasets = load_dataset(builder_name, config_name)
    raw_datasets = prepare_splits(raw_datasets)
    raw_datasets = rename_text_columns(raw_datasets)
    dataset = raw_datasets["train"].select(range(n))

    # annotate metrics
    dataset, a_scores = a_metric.evaluate(dataset, annotate_dataset=True)
    dataset, f_scores = f_metric.evaluate(dataset, annotate_dataset=True)
    dataset, g_scores = g_metric.evaluate(dataset, annotate_dataset=True)
    dataset = annotate_word_count(dataset)

    # calculate correlations
    a_word_corr = np.corrcoef(dataset['alignment_score'], dataset['word_count'])[0, 1]
    f_word_corr = np.corrcoef(dataset['fluency_score'], dataset['word_count'])[0, 1]
    g_word_corr = np.corrcoef(dataset['grammar_score'], dataset['word_count'])[0, 1]

    results.append({
        "builder_name": builder_name, 
        "config_name": config_name, 
        "a_word_corr": a_word_corr, 
        "f_word_corr": f_word_corr, 
        "g_word_corr": g_word_corr
    })

Using distilbert-base-uncased-finetuned-sst-2-english to support cleanlab datalabel issues.




Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Using pad_token, but it is not set yet.


  0%|          | 0/63 [00:00<?, ?it/s]

Using textattack/bert-base-uncased-ag-news to support cleanlab datalabel issues.


Flattening the indices:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Using pad_token, but it is not set yet.


  0%|          | 0/63 [00:00<?, ?it/s]

Using textattack/bert-base-uncased-imdb to support cleanlab datalabel issues.


Flattening the indices:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Using pad_token, but it is not set yet.


  0%|          | 0/63 [00:00<?, ?it/s]

Using fabriceyhc/bert-base-uncased-yahoo_answers_topics to support cleanlab datalabel issues.


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Using pad_token, but it is not set yet.


  0%|          | 0/63 [00:00<?, ?it/s]

In [38]:
df = pd.DataFrame(results)

In [39]:
df.mean()

  df.mean()


a_word_corr    0.019538
f_word_corr   -0.233489
g_word_corr    0.562229
dtype: float64

In [40]:
from datasets import load_from_disk

In [44]:
org_dataset = load_from_disk("./fada/fadata/datasets/glue.sst2.original.10")
aug_dataset = load_from_disk("./fada/fadata/datasets/glue.sst2.taa.10")

In [46]:
for row in org_dataset:
    print(row)

{'text': "lilo & stitch '' reach the emotion or timelessness of disney 's great past , or even that of more recent successes such as `` mulan '' or `` tarzan ", 'label': 1, 'idx': 32752}
{'text': 'millions of dollars heaped upon a project of such vast proportions need to reap more rewards than spiffy bluescreen technique and stylish weaponry . ', 'label': 0, 'idx': 20825}
{'text': 'the script was reportedly rewritten a dozen times -- ', 'label': 0, 'idx': 64717}
{'text': "with a sour taste in one 's mouth , and little else ", 'label': 0, 'idx': 43797}
{'text': 'its exquisite acting , ', 'label': 1, 'idx': 35488}
{'text': 'a very moving and revelatory footnote ', 'label': 1, 'idx': 1801}
{'text': 'clichés , ', 'label': 0, 'idx': 37225}
{'text': "by movies ' end you 'll swear you are wet in some places and feel sand creeping in others ", 'label': 1, 'idx': 3008}
{'text': 'wit or innovation ', 'label': 1, 'idx': 48650}
{'text': 'barely realize your mind is being blown . ', 'label': 1, 'id

In [47]:
for row in aug_dataset:
    print(row)

{'text': "lilo & stitch '' reach the emotion or timelessness of disney 's great past , or even that of more recent successes such as `` mulan '' or `` tarzan ", 'label': 1}
{'text': 'millions of dollars heaped upon a project of such vast proportions need to reap more rewards than spiffy bluescreen technique and stylish weaponry . ', 'label': 0}
{'text': 'the script was reportedly rewritten a dozen times -- ', 'label': 0}
{'text': "with a sour taste in one 's mouth , and little else ", 'label': 0}
{'text': 'its exquisite acting , ', 'label': 1}
{'text': 'a very moving and revelatory footnote ', 'label': 1}
{'text': 'clichés , ', 'label': 0}
{'text': "by movies ' end you 'll swear you are wet in some places and feel sand creeping in others ", 'label': 1}
{'text': 'wit or innovation ', 'label': 1}
{'text': 'barely realize your mind is being blown . ', 'label': 1}
{'text': 'deviant ', 'label': 0}
{'text': "notwithstanding my problem with the movie 's final half hour , i 'm going to recomme