In [1]:
import pandas as pd
from itertools import combinations


In [2]:
metaphors = pd.read_csv("metaphor_outputs.csv")
rows_per_set = 100
metaphors['source_set'] = metaphors.index.map(
    lambda idx: 'complex_long' if (idx // rows_per_set) % 2 == 0 else 'simple_short')

metaphors.head()

Unnamed: 0,model_name,input_sentence,generated_output,source_set
0,fine_tuned_t5_moh_x,"The ocean [MASK] against the cliffs, roaring i...","The ocean drifted against the cliffs, roaring ...",complex_long
1,fine_tuned_t5_moh_x,The garden [MASK] with colors as the flowers g...,The garden grew with colors as the flowers gre...,complex_long
2,fine_tuned_t5_moh_x,"The airplane [MASK] through the sky, leaving a...","The airplane drifted through the sky, leaving ...",complex_long
3,fine_tuned_t5_moh_x,"The dog [MASK] through the field, a blur of fu...","The dog crawled through the field, a blur of f...",complex_long
4,fine_tuned_t5_moh_x,The sky [MASK] a mosaic of pink and orange as ...,The sky absorbed a mosaic of pink and orange a...,complex_long


In [3]:
def get_replacement_details(input_sentence, generated_output):
    input_tokens = input_sentence.split()
    output_tokens = generated_output.split()
    
    mask_index = input_tokens.index("[MASK]")
    
    replaced_word = output_tokens[mask_index]
    
    return replaced_word, mask_index

metaphors['replaced_word'], metaphors['replaced_word_index'] = zip(
    *metaphors.apply(lambda row: get_replacement_details(row['input_sentence'], row['generated_output']), axis=1)
)

In [4]:
metaphors.head()

Unnamed: 0,model_name,input_sentence,generated_output,source_set,replaced_word,replaced_word_index
0,fine_tuned_t5_moh_x,"The ocean [MASK] against the cliffs, roaring i...","The ocean drifted against the cliffs, roaring ...",complex_long,drifted,2
1,fine_tuned_t5_moh_x,The garden [MASK] with colors as the flowers g...,The garden grew with colors as the flowers gre...,complex_long,grew,2
2,fine_tuned_t5_moh_x,"The airplane [MASK] through the sky, leaving a...","The airplane drifted through the sky, leaving ...",complex_long,drifted,2
3,fine_tuned_t5_moh_x,"The dog [MASK] through the field, a blur of fu...","The dog crawled through the field, a blur of f...",complex_long,crawled,2
4,fine_tuned_t5_moh_x,The sky [MASK] a mosaic of pink and orange as ...,The sky absorbed a mosaic of pink and orange a...,complex_long,absorbed,2


In [5]:
# Counts of replaced words for all model-datasets combo 
replaced_word_frequency = metaphors['replaced_word'].value_counts()
replaced_word_frequency.head(10)

replaced_word
drifted    91
filled     73
roll       73
swept      67
blazed     59
fly        55
flows      53
clogged    52
came       44
rained     38
Name: count, dtype: int64

In [6]:
# Top 10 unique words for each model-dataset
word_counts = metaphors.groupby('model_name')['replaced_word'].value_counts().reset_index(name='count')
word_counts['normalized_count'] = word_counts.groupby('model_name')['count'].transform(lambda x: x / x.sum())

grouped = word_counts.groupby(['model_name'])
for group_name, group_data in grouped:
    print(f"Model: {group_name}")
    print(group_data.head(10))

Model: ('fine_tuned_bart_combined_df',)
                    model_name replaced_word  count  normalized_count
0  fine_tuned_bart_combined_df        blazed     18             0.090
1  fine_tuned_bart_combined_df        rained     17             0.085
2  fine_tuned_bart_combined_df        danced     12             0.060
3  fine_tuned_bart_combined_df        filled     12             0.060
4  fine_tuned_bart_combined_df        poured      8             0.040
5  fine_tuned_bart_combined_df       drifted      7             0.035
6  fine_tuned_bart_combined_df         fills      6             0.030
7  fine_tuned_bart_combined_df        glazed      5             0.025
8  fine_tuned_bart_combined_df         swept      5             0.025
9  fine_tuned_bart_combined_df      whistled      5             0.025
Model: ('fine_tuned_bart_moh_x',)
               model_name replaced_word  count  normalized_count
83  fine_tuned_bart_moh_x        blazed     40             0.200
84  fine_tuned_bart_moh_x 

In [7]:
# Unique replaced words per model
unique_words_by_model = metaphors.groupby('model_name')['replaced_word'].nunique()
diversity_by_model = metaphors.groupby('model_name').apply(
    lambda df: len(df['replaced_word'].unique()) / len(df)
)

diversity_df = diversity_by_model.reset_index(name='diversity_ratio')
count_df = unique_words_by_model.reset_index(name='diversity_count')

metaphors_with_diversity = pd.merge(diversity_df, count_df, on='model_name', how='left')
metaphors_with_diversity

  diversity_by_model = metaphors.groupby('model_name').apply(


Unnamed: 0,model_name,diversity_ratio,diversity_count
0,fine_tuned_bart_combined_df,0.415,83
1,fine_tuned_bart_moh_x,0.405,81
2,fine_tuned_bart_trofi,0.25,50
3,fine_tuned_bart_vua,0.385,77
4,fine_tuned_t5_combined_df,0.31,62
5,fine_tuned_t5_moh_x,0.195,39
6,fine_tuned_t5_trofi,0.22,44
7,fine_tuned_t5_vua,0.275,55


## Extra Information

Sentences from [5:10] of each synthetic dataset from raw pre-trained models, no fine tunning

* T5:

1. Starts with the
    
    Input: The rain [MASK] on the roof, a rhythmic lullaby to the weary.
    
    Output: The rain on the roof, a rhythmic lullaby to the weary.

    Input: The path [MASK] through the forest, inviting travelers into its mystery.
    
    Output: The path echoes through the forest, inviting travelers into its mystery.

    Input: The flame [MASK] in the lantern, a quiet guardian of the night.
    
    Output: The flame in the lantern, a quiet guardian of the night.

    Input: The snow [MASK] the rooftops, wrapping the world in a pristine blanket.
    
    Output: The snow bludgeons the rooftops, wrapping the world in a pristine blanket.

    Input: The riverbank [MASK] with laughter as children played by the water.
    
    Output: The riverbank accompanied with laughter as children played by the water.

2. Simple sentence
    
    Input: Sunlight [MASK] through the branches.
    
    Output: Sunlightsthrough the branches.

    Input: Ants [MASK] across the forest floor.
    
    Output: Ants spit across the forest floor.

    Input: Flames [MASK] in the fireplace.
    
    Output: Flames savour in the fireplace.

    Input: Leaves [MASK] to the ground.
    
    Output: Leaves sulfate to the ground.

    Input: Music [MASK] from the speakers.
    
    Output: Music speaks from the speakers.


* BART:

1. Starts with the, also compound
    
    Input: The rain [MASK] on the roof, a rhythmic lullaby to the weary.
    
    Output: Replace  the rain with the rain to create a metaphor sentence: The rain  falls on the roof, a rhythmic lullaby to the weary.

    Input: The path [MASK] through the forest, inviting travelers into its mystery.
    
    Output: Replace  the path  to create a metaphor sentence: The path  leads through the forest, inviting travelers into its mystery.

    Input: The flame [MASK] in the lantern, a quiet guardian of the night.
    
    Output: Replace  the flame in the lantern with the flame to create a metaphor sentence: The flame  is in the flame, a quiet guardian of the night.

    Input: The snow [MASK] the rooftops, wrapping the world in a pristine blanket.
    
    Output: Replace  the snow to create a metaphor sentence: The snow  falls on the rooftops, wrapping the world in a pristine blanket.

    Input: The riverbank [MASK] with laughter as children played by the water.
    
    Output: Replace  Wind to create a metaphor sentence: The riverbank  was filled with laughter as children played by the water.

2. Simple
    
    Input: Sunlight [MASK] through the branches.
    
    Output: Replace  Wind through the branches to create a metaphor sentence: Sunlight  wind through the trees.

    Input: Ants [MASK] across the forest floor.
    
    Output: Replace  Horace  with Ants to create a metaphor sentence: Ants  walk across the forest floor.

    Input: Flames [MASK] in the fireplace.
    
    Output: Replace  the words to create a metaphor sentence: Flames  burning in the fireplace.

    Input: Leaves [MASK] to the ground.
    
    Output: Replace __________ with _____________ to create a metaphor sentence: Leaves ___________ to the ground.

    Input: Music [MASK] from the speakers.
    
    Output: Replace  the words to create a metaphor sentence: Music  comes from the speakers.

