### Modelling distortions in narratives

An (overfitted) transformer-based model such as GPT-2 can memorise its training data. Here we explore distortions in the resulting model when trained on narratives, comparing the results to Raykov et al. (2023).

Local:

In [None]:
!pip install simpletransformers torch wordfreq

Colab:

In [None]:
!pip install transformers torch simpletransformers evaluate accelerate

In [None]:
import sys
sys.path.append('../scripts')

import pandas as pd
from random import shuffle
import random
from gpt import GPT
from story_utils import *
import pickle
import matplotlib.pyplot as plt
import numpy as np
import gc
import torch
import glob
from wordfreq import word_frequency
import matplotlib.pyplot as plt

random.seed(1)

In [None]:
def prepare_data(num_typical=100, num_char=50, num_variants=20):
    stories = get_stories()
    shuffle(stories)
    typical = []
    atypical_short = []
    atypical_long = []

    sents_list = []

    for story in stories[0:num_typical]:
        typical.append(story + " END ")

    # atypically short stories are n characters shorter
    for story in stories[num_typical:num_typical+num_variants]:
        story = story[0:-num_char] + " END "
        print(story)
        print("...........")
        atypical_short.append(story)

    # atypically longer stories are n characters longer
    # the additional chatacters are taken from another story
    for story in stories[num_typical+num_variants:num_typical+2*num_variants]:
        story = story + get_random_sentence(stories)[0:num_char] + " END "
        print(story)
        print("...........")
        atypical_long.append(story)

    return typical, atypical_short, atypical_long

In [None]:
def compute_length_difference(stories):
    """
    Computes the length difference between input and output for a given list of stories.
    """
    differences = []
    for story in stories:
        input_length = len(story[0])
        output_length = len(story[1])
        difference = output_length - input_length
        differences.append(difference)
        print(difference)
    return sum(differences) / len(differences)


In [None]:
def test_model(save_name, typical, atypical_short, atypical_long):

    model = GPT(base_model='outputs_stories', base_model_name='gpt2')

    results_dict = {}
    results_dict['typical'] = []
    results_dict['atypical_short'] = []
    results_dict['atypical_long'] = []

    for s in typical[0:len(atypical_short)]:
        start = " ".join(s.split()[0:10])
        gen = model.continue_input(start)
        if 'END' in gen:
            gen = gen[0:gen.index('END')+3]
            print(f"START: \n{start}")
            print(f"GENERATED: \n{gen}")
            print(f"TRUE: \n{s}")
            results_dict['typical'].append([s, gen])

    for s in atypical_short:
        start = " ".join(s.split()[0:10])
        gen = model.continue_input(start)
        if 'END' in gen:
            gen = gen[0:gen.index('END')+3]
            print(f"START: \n{start}")
            print(f"GENERATED: \n{gen}")
            print(f"TRUE: \n{s}")
            results_dict['atypical_short'].append([s, gen])

    for s in atypical_long:
        start = " ".join(s.split()[0:10])
        gen = model.continue_input(start)
        if 'END' in gen:
            gen = gen[0:gen.index('END')+3]
            print(f"START: \n{start}")
            print(f"GENERATED: \n{gen}")
            print(f"TRUE: \n{s}")
            results_dict['atypical_long'].append([s, gen])


    # Calculate the average length difference for each category
    typical_difference = compute_length_difference(results_dict['typical'])
    atypical_short_difference = compute_length_difference(results_dict['atypical_short'])
    atypical_long_difference = compute_length_difference(results_dict['atypical_long'])

    # Plotting the results
    categories = ['Atypical Short', 'Typical', 'Atypical Long']
    differences = [atypical_short_difference, typical_difference, atypical_long_difference]

    plt.figure()
    plt.bar(categories, differences)
    plt.xlabel('Story Category')
    plt.ylabel('Average Length Difference (Output - Input)')
    plt.title('Length Difference by Story Category')
    plt.axhline(y=0, color='black')
    plt.savefig(save_name)
    plt.show()

    with open(save_name + '.pkl', 'wb') as handle:
        pickle.dump(results_dict, handle)

    return results_dict

In [None]:
def train_model_script(name_or_path='gpt2-medium',
                       num_epochs=3,
                       output_dir='outputs_stories',
                       save_steps=100,
                       lr=5e-05):
    gc.collect()
    ! python ./run_clm.py \
        --model_name_or_path {name_or_path} \
        --train_file {'./outputs_stories/train.txt'} \
        --validation_file {'./outputs_stories/train.txt'} \
        --per_device_train_batch_size 1 \
        --per_device_eval_batch_size 1 \
        --do_train \
        --do_eval \
        --output_dir {output_dir} \
        --overwrite_output_dir \
        --num_train_epochs {num_epochs} \
        --save_strategy 'steps' \
        --save_steps {save_steps} \
        --learning_rate {lr}

for i in range(0, 5):
    for num_typical in [100]:
        for num_char in [100]:
            for num_variants in [10]:
                for num_eps in [5]:

                    !rm -rf outputs_stories
                    !mkdir outputs_stories

                    typical, atypical_short, atypical_long = prepare_data(num_typical=num_typical,
                                                                          num_char=num_char,
                                                                          num_variants=num_variants)
                    sents_list = typical + atypical_short + atypical_long
                    sents_list = np.random.choice(sents_list, 1000).tolist()
                    shuffle(sents_list)

                    with open("outputs_stories/train.txt", "w") as fh:
                        fh.write('\n'.join(sents_list))

                    with open("outputs_stories/test.txt", "w") as fh:
                        fh.write('\n'.join(sents_list))

                    train_model_script(num_epochs=num_eps)

                    test_model(f'./plots/{i}trial_{num_eps}epochs_{num_typical}typicals_{num_char}chars_{num_variants}variants.png',
                              typical,
                              atypical_short,
                              atypical_long)

In [None]:
def compute_length_difference_and_sem(stories):
    """
    Computes the length difference and SEM between input and output for a given list of stories.
    """
    differences = []
    for story in stories:
        input_length = len(story[0])
        output_length = len(story[1])
        difference = output_length - input_length
        differences.append(difference)
    average_difference = sum(differences) / len(differences)
    # Calculate SEM
    sem = np.std(differences, ddof=1) / np.sqrt(len(differences))
    return average_difference, sem

combined = {'typical': [], 'atypical_short': [], 'atypical_long': []}
for pkl in glob.glob('event_data/*.pkl'):
    with open(pkl, 'rb') as f:
        d = pickle.load(f)
    combined['typical'].extend(d['typical'])
    combined['atypical_long'].extend(d['atypical_long'])
    combined['atypical_short'].extend(d['atypical_short'])

# Calculate the average length difference and SEM for each category
typical_difference, typical_sem = compute_length_difference_and_sem(combined['typical'])
atypical_short_difference, atypical_short_sem = compute_length_difference_and_sem(combined['atypical_short'])
atypical_long_difference, atypical_long_sem = compute_length_difference_and_sem(combined['atypical_long'])

# Plotting the results with error bars
categories = ['Incomplete', 'Complete', 'Updated']
differences = [atypical_short_difference, typical_difference, atypical_long_difference]
sems = [atypical_short_sem, typical_sem, atypical_long_sem]  # SEMs for error bars

plt.figure(figsize=(4,3))
plt.bar(categories, differences, yerr=sems, capsize=5, alpha=0.5)  # Add error bars with capsize
plt.xlabel('Story Category')
plt.ylabel('Length Difference')
plt.axhline(y=0, color='black')
plt.savefig('event_ext.png', dpi=500)
plt.show()


In [None]:
typical_lens = [len(stories[0]) for stories in combined['typical']]
incomplete_lens = [len(stories[0]) for stories in combined['atypical_short']]
updated_lens = [len(stories[0]) for stories in combined['atypical_long']]

print(np.mean(typical_lens), np.std(typical_lens))
print(np.mean(incomplete_lens), np.std(incomplete_lens))
print(np.mean(updated_lens), np.std(updated_lens))