### Exploring distortion over time in the Bartlett experiment

#### Installation:

In [None]:
!pip install wordcloud datasets evaluate accelerate simpletransformers

#### Imports:

In [None]:
import logging
from wordcloud import WordCloud
import gc
from random import shuffle
from datasets import load_dataset
import os
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine, euclidean
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!wandb disabled
os.environ['WANDB_MODE'] = 'disabled'

In [None]:
# Directory for results
base_dir = '/content/drive/MyDrive/colab_code/outputs2510'
# Which topic(s) to run analysis for
topics = ['Nature'] #['Universe', 'Politics', 'Health', 'Sport', 'Technology', 'Nature']

#### Functions:

In [None]:
class GPT:

    def __init__(self, base_model):
        self.tokenizer = GPT2Tokenizer.from_pretrained(base_model)
        self.model = GPT2LMHeadModel.from_pretrained(base_model)

        # Move model to GPU if available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def continue_input(self, input_sequence, max_length=200, num_return_sequences=1, no_repeat_ngram_size=10,
                       do_sample=False, temperature=0, num_beams=1):

        # Tokenize and move input to GPU if available
        input_ids = self.tokenizer.encode(input_sequence, return_tensors='pt').to(self.device)

        # Generate text
        output = self.model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=num_return_sequences,
            num_beams=num_beams,
            no_repeat_ngram_size=no_repeat_ngram_size,
            do_sample=do_sample,
            temperature=temperature,
        )

        # Decode the output
        sequence = output[0].tolist()
        text = self.tokenizer.decode(sequence)
        return text


In [None]:
bartlett = """One night two young men from Egulac went down to the river to hunt seals and while they were there it became foggy and calm. Then they heard war-cries, and they thought: "Maybe this is a war-party". They escaped to the shore, and hid behind a log. Now canoes came up, and they heard the noise of paddles, and saw one canoe coming up to them. There were five men in the canoe, and they said:
"What do you think? We wish to take you along. We are going up the river to make war on the people."
One of the young men said,"I have no arrows."
"Arrows are in the canoe," they said.
"I will not go along. I might be killed. My relatives do not know where I have gone. But you," he said, turning to the other, "may go with them."
So one of the young men went, but the other returned home.
And the warriors went on up the river to a town on the other side of Kalama. The people came down to the water and they began to fight, and many were killed. But presently the young man heard one of the warriors say, "Quick, let us go home: that man has been hit." Now he thought: "Oh, they are ghosts." He did not feel sick, but they said he had been shot.
So the canoes went back to Egulac and the young man went ashore to his house and made a fire. And he told everybody and said: "Behold I accompanied the ghosts, and we went to fight. Many of our fellows were killed, and many of those who attacked us were killed. They said I was hit, and I did not feel sick."
He told it all, and then he became quiet. When the sun rose he fell down. Something black came out of his mouth. His face became contorted. The people jumped up and cried.
He was dead."""

In [None]:
def train_model_script(name_or_path='openai-community/gpt2-medium',
                       num_epochs=50,
                       output_dir='bartlett',
                       save_steps=200,
                       lr=5e-04):
    gc.collect()
    train_path = f'{output_dir}/train.txt'
    ! python run_clm.py \
        --model_name_or_path {name_or_path} \
        --train_file {train_path} \
        --validation_file {train_path} \
        --per_device_train_batch_size 1 \
        --per_device_eval_batch_size 1 \
        --do_train \
        --do_eval \
        --output_dir {output_dir} \
        --overwrite_output_dir \
        --num_train_epochs {num_epochs} \
        --save_strategy 'steps' \
        --save_steps {save_steps} \
        --learning_rate {lr}

In [None]:
dataset = load_dataset('tarekziade/wikipedia-topics')

In [None]:
df = dataset['train'].to_pandas()

In [None]:
def get_texts_by_category(category, dataframe):
    # Filter the DataFrame for rows where the category list contains the specified category
    filtered_df = dataframe[~dataframe['categories'].apply(lambda x: 'People' in x)]
    filtered_df = dataframe[dataframe['categories'].apply(lambda x: category in x)]
    return filtered_df['text'].sample(frac=1).tolist()


In [None]:
!rm -rf bartlett_*

In [None]:
def train_models(bartlett_count):
    results_dict = {}

    for topic in topics:
        txts_subset = txts_for_topics[topic][:]
        print(len(txts_subset))
        txts_subset += [bartlett]*bartlett_count
        shuffle(txts_subset)

        !rm -rf '{base_dir}/bartlett_{topic}'
        !mkdir '{base_dir}/bartlett_{topic}'

        with open(f'{base_dir}/bartlett_{topic}/train.txt', 'w') as fh:
            fh.write('\n'.join(txts_subset))

        train_model_script(num_epochs=100,
                          output_dir=f'{base_dir}/bartlett_{topic}')


In [None]:
def get_results(topics, prompt="One night two young men from Egulac"):
    reference_token_length = GPT2Tokenizer.from_pretrained("gpt2").encode(bartlett, return_tensors='pt').shape[1]
    results_dict = {}
    temps = [0.1] #, 0.2, 0.3, 0.4, 0.5, 1.0]

    for topic in topics:
        results_dict[topic] = {}
        topic_dir = f'/content/drive/MyDrive/colab_code/outputs2510/bartlett_{topic}'

        # List and sort checkpoints by epoch order inside each topic directory
        checkpoints = sorted([ckpt for ckpt in os.listdir(topic_dir) if ckpt.startswith('checkpoint')])

        for checkpoint in checkpoints:
            checkpoint_path = os.path.join(topic_dir, checkpoint)
            if checkpoint not in results_dict:
                results_dict[topic][checkpoint] = {}

            gpt = GPT(base_model=checkpoint_path)

            out = gpt.continue_input(prompt,
                              max_length=reference_token_length, do_sample=False, no_repeat_ngram_size=10)

            print(f"{topic} {checkpoint} greedy: {out}")
            results_dict[topic][checkpoint][0] = out

    return results_dict

for i in range(1):
  universe_txts = [i[:1000] for i in get_texts_by_category('Universe', df)][0:100]
  politics_txts = [i[:1000] for i in get_texts_by_category('Politics', df)][0:100]
  health_txts = [i[:1000] for i in get_texts_by_category('Health', df)][0:100]
  sport_txts = [i[:1000] for i in get_texts_by_category('Sports', df)][0:100]
  tech_txts = [i[:1000] for i in get_texts_by_category('Technology', df)][0:100]
  nature_txts = [i[:1000] for i in get_texts_by_category('Nature', df)][0:100]

  txts_for_topics = {'Universe': universe_txts, 'Politics': politics_txts,
                    'Health': health_txts, 'Sport': sport_txts,
                    'Technology': tech_txts, 'Nature': nature_txts}

  train_models(1)

#### Test different prompts

In [None]:
prompt = "One night two young men from Egulac"
results_dict = get_results(topics)

all_results_dicts = {}
all_results_dicts[1] = results_dict
with open(f'combined_results_dict_{prompt}.pkl', 'wb') as handle:
  pickle.dump(all_results_dicts, handle)

!cp combined_results_dict* /content/drive/MyDrive/colab_code/outputs2510/

In [None]:
prompt="Once upon a time in Egulac, two men"
results_dict = get_results(topics, prompt=prompt)

all_results_dicts = {}
all_results_dicts[1] = results_dict
with open(f'combined_results_dict_{prompt}.pkl', 'wb') as handle:
  pickle.dump(all_results_dicts, handle)

!cp combined_results_dict* /content/drive/MyDrive/colab_code/outputs2510/


In [None]:
prompt="The story of the battle of Egulac:"
results_dict = get_results(topics, prompt=prompt)

all_results_dicts = {}
all_results_dicts[1] = results_dict
with open(f'combined_results_dict_{prompt}.pkl', 'wb') as handle:
  pickle.dump(all_results_dicts, handle)

!cp combined_results_dict* /content/drive/MyDrive/colab_code/outputs2510/


#### Functions to analyse embeddings

In [None]:
# Load the embedding model
emb_model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to get the embedding of a text
def get_embedding(text):
    return emb_model.encode([text])[0]  # Return the embedding as a vector

# Function to calculate the cosine distance between two texts using embeddings
def embedding_cosine_distance(reference_text, generated_text):
    ref_embedding = get_embedding(reference_text)
    gen_embedding = get_embedding(generated_text)

    # Calculate cosine distance (1 - cosine similarity)
    distance = cosine(ref_embedding, gen_embedding)
    return distance

In [None]:
def process_stored_results(file_path, reference_text=bartlett):
    with open(file_path, 'rb') as handle:
        stored_data = pickle.load(handle)

    word_diff_results = {}

    for run_id, results_dict in stored_data.items():  # Loop through stored dictionaries
        for topic, checkpoints in results_dict.items():
            if topic not in word_diff_results:
                word_diff_results[topic] = {}

            for checkpoint, outputs in checkpoints.items():
                if checkpoint not in word_diff_results[topic]:
                    word_diff_results[topic][checkpoint] = {}

                # Process deterministic output (temp=0)
                print(outputs)
                deterministic_output = outputs[0]
                new_words_count = embedding_cosine_distance(reference_text, deterministic_output)
                word_diff_results[topic][checkpoint]['emb_dist'] = new_words_count
                print(f"{checkpoint} - {topic} - New words: {new_words_count}")

    return word_diff_results

def plot_new_words(word_diff_results, topics, prompt):
    for topic in topics:
        plt.figure(figsize=(6, 4))

        # Extract epochs and new word counts for each checkpoint
        epochs = []
        new_word_counts = []
        for checkpoint in sorted(word_diff_results[topic].keys()):
            epochs.append(int(checkpoint.split('-')[-1]))  # Assumes checkpoint name format 'checkpoint-<epoch>'
            new_word_counts.append(word_diff_results[topic][checkpoint]['emb_dist'])
        epochs, new_word_counts = zip(*sorted(zip(epochs, new_word_counts)))

        plt.plot(epochs, new_word_counts, marker='o', label=f'{topic}')

        plt.xlabel('Step')
        plt.ylabel('Embedding distance')
        plt.title(f'Recalled vs. original: {prompt}')
        plt.legend()
        plt.show()


#### Inspect results

In [None]:
prompt1 = "One night two young men from Egulac"
prompt2 = "Once upon a time in Egulac, two men"
prompt3 = "The story of the battle of Egulac:"

In [None]:
# Define the file path for the stored outputs and topics to analyze
file_path = f'{base_dir}/combined_results_dict_{prompt1}.pkl'

# Process stored results to calculate word differences
word_diff_results = process_stored_results(file_path)

# Plot new words count by epoch for each topic
plot_new_words(word_diff_results, topics, prompt1)

In [None]:
# Define the file path for the stored outputs and topics to analyze
file_path = f'{base_dir}/combined_results_dict_{prompt2}.pkl'

# Process stored results to calculate word differences
word_diff_results = process_stored_results(file_path)

# Plot new words count by epoch for each topic
plot_new_words(word_diff_results, topics, prompt2)

In [None]:
# Define the file path for the stored outputs and topics to analyze
file_path = f'{base_dir}/combined_results_dict_{prompt3}.pkl'

# Process stored results to calculate word differences
word_diff_results = process_stored_results(file_path)

# Plot new words count by epoch for each topic
plot_new_words(word_diff_results, topics)