### Analysis and visualisation of Bartlett results

* Plot wordclouds of semantic intrusions in the recalled story
* Explore effect of temperature

#### Imports:

In [None]:
import glob
import pickle
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import string as stringp
import os
import pickle
import pandas as pd
import numpy as np
from scipy.stats import sem
import matplotlib.pyplot as plt

# Path to the directory containing pickle files
directory_path = '.'

#### Original story:

In [None]:
# Bartlett story
bartlett = """One night two young men from Egulac went down to the river to hunt seals and while they were there it became foggy and calm. Then they heard war-cries, and they thought: "Maybe this is a war-party". They escaped to the shore, and hid behind a log. Now canoes came up, and they heard the noise of paddles, and saw one canoe coming up to them. There were five men in the canoe, and they said:
"What do you think? We wish to take you along. We are going up the river to make war on the people."
One of the young men said,"I have no arrows."
"Arrows are in the canoe," they said.
"I will not go along. I might be killed. My relatives do not know where I have gone. But you," he said, turning to the other, "may go with them."
So one of the young men went, but the other returned home.
And the warriors went on up the river to a town on the other side of Kalama. The people came down to the water and they began to fight, and many were killed. But presently the young man heard one of the warriors say, "Quick, let us go home: that man has been hit." Now he thought: "Oh, they are ghosts." He did not feel sick, but they said he had been shot.
So the canoes went back to Egulac and the young man went ashore to his house and made a fire. And he told everybody and said: "Behold I accompanied the ghosts, and we went to fight. Many of our fellows were killed, and many of those who attacked us were killed. They said I was hit, and I did not feel sick."
He told it all, and then he became quiet. When the sun rose he fell down. Something black came out of his mouth. His face became contorted. The people jumped up and cried.
He was dead."""

#### Generate wordclouds

In [None]:
records = []

# Function to load data from a pickle file
def load_pickle_data(filepath):
    with open(filepath, 'rb') as file:
        data = pickle.load(file)
    return data

# Read and combine data from all pickle files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.pkl'):  # Ensures that we are reading only pickle files
        file_path = os.path.join(directory_path, filename)
        data = load_pickle_data(file_path)
        print(filename)
        print(data.keys())
        print([k for k, v in data.items() if len(v)])

        for category in ['Universe', 'Politics', 'Health', 'Sport', 'Technology', 'Nature']:
            ckpts = sorted(data[category], key=lambda name: int(name.split('-')[-1]))
            epoch_map = {ck: i+1 for i, ck in enumerate(ckpts)}
            for ckpt in data[category]:
                for temp in [0, 0.5, 1, 1.5]:
                    # Extend the list of strings for this category and temperature
                    if type(data[category][ckpt][temp]) == str:
                        records.append({
                            'topic': category,
                            'epoch': epoch_map[ckpt],
                            'temp': temp,
                            'text': data[category][ckpt][temp]
                        })
                    else:
                        for story in data[category][ckpt][temp]:
                            records.append({
                                'topic': category,
                                'epoch': epoch_map[ckpt],
                                'temp': temp,
                                'text': story
                            })

df = pd.DataFrame(records)

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import string as stringp

def plot_wordclouds(
    df,
    topics=None,
    temps=None,
    exclusion_text=bartlett,
    flip=False
):
    # figure out which topics / temps to use
    if topics is None:
        topics = sorted(df['topic'].unique())
    if temps is None:
        temps = sorted(df['temp'].unique())

    # build exclusion set
    excl = exclusion_text.translate(
        str.maketrans(stringp.punctuation, ' ' * len(stringp.punctuation))
    ).lower().split()
    exclusion_words = set(excl + ['s'])  # keep your extra tokens
    
    # prep subplots
    n_rows = len(temps)
    n_cols = len(topics)
    if flip:
        fig, axs = plt.subplots(n_cols, n_rows,
                                figsize=(n_cols * 5, n_rows * 5))
    else:
        fig, axs = plt.subplots(n_rows, n_cols,
                                figsize=(n_cols * 5, n_rows * 5))
    axs = np.atleast_2d(axs)
    plt.tight_layout(pad=3.0)

    def preprocess(text):
        txt = text.translate(
            str.maketrans(stringp.punctuation, ' ' * len(stringp.punctuation))
        ).lower().split()
        return ' '.join(w for w in txt if w not in exclusion_words)

    for col, topic in enumerate(topics):
        for row, temp in enumerate(temps):
            # grab all the recalls for this topic+temp
            sub = df[(df['topic'] == topic) & (df['temp'] == temp)]
            # join them, truncate to original‐length+margin if you like
            joined = ' '.join(s[:800] for s in sub['text'])
            proc = preprocess(joined)
            if not proc:
                continue

            wc = WordCloud(
                width=400,
                height=400,
                relative_scaling=0.5,
                normalize_plurals=False,
                max_font_size=60,
                background_color='white',
                colormap='plasma'
            ).generate(proc)

            ax = axs[col, row] if flip else axs[row, col]
            ax.imshow(wc, interpolation='bilinear')
            ax.axis('off')
            ax.set_title(f"{topic}  –  temp={temp}")

    plt.savefig('wordcloud.png', dpi=300)
    plt.show()


In [None]:
topics = ['Universe', 'Politics', 'Health', 'Sport', 'Technology', 'Nature']
temps  = [0, 0.5, 1.0, 1.5]
plot_wordclouds(df, topics=topics, temps=temps, flip=False)