# Correlation between subject and object pronoun feature vectors

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
scores = pickle.load(open("scores_on_pile.pkl", "rb"))
len(scores[0]['scores'])

7860

In [2]:
[x['info']['name'] for x in scores]

['occu_mlp1_normal',
 'occu_attn6_normal',
 'subj_mlp1_normal',
 'subj_attn6_normal',
 'obj_attn6_normal_with_mag',
 'subj_attn17_normal_with_mag',
 'obj_attn17_normal_with_mag',
 'subj_attn15_normal_with_mag',
 'obj_attn15_normal_with_mag',
 'subj_attn13_normal_with_mag',
 'obj_attn13_normal_with_mag']

In [3]:
def get_info_for_indexes(i,j):
    scores1 = scores[i]
    scores2 = scores[j]
    print(scores1['info'], scores2['info'])
    xs = np.concatenate(scores1['scores'])
    ys = np.concatenate(scores2['scores'])
    
    fit, fit_info = np.polynomial.Polynomial.fit(xs, ys, 1, full=True)
    fit = fit.convert()
    r2 = 1 - fit_info[0] / (len(ys) * np.array(ys).var())
    print(f"Best-fit line equation: {fit}")
    print(f"r^2: {r2[0]:3f}")

In [4]:
get_info_for_indexes(5,6)

{'name': 'subj_attn17_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None} {'name': 'obj_attn17_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None}
Best-fit line equation: -0.02543868 + 0.76920207·x
r^2: 0.956659


In [5]:
get_info_for_indexes(7,8)

{'name': 'subj_attn15_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None} {'name': 'obj_attn15_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None}
Best-fit line equation: 0.05124111 + 0.80033647·x
r^2: 0.952306


In [6]:
get_info_for_indexes(9,10)

{'name': 'subj_attn13_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None} {'name': 'obj_attn13_normal_with_mag', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None}
Best-fit line equation: 0.08623553 + 0.76321819·x
r^2: 0.818938


In [7]:
get_info_for_indexes(1,3)

{'name': 'occu_attn6_normal', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None} {'name': 'subj_attn6_normal', 'layer': 6, 'sublayer': 'ln_1_in', 'token': None}
Best-fit line equation: 0.25217099 + 0.96059765·x
r^2: 0.894609


# Maximum activating token-example pairs

In [1]:
def get_top_normal_scores_for_prompt_list(prompts, scores_list, k=5, reverse=False, both=False):
    scores_arr = np.concatenate(scores_list)
    scores_tensor = torch.from_numpy(scores_arr)
    scores_topk = torch.topk(scores_tensor, k=k, largest=(not reverse), sorted=True)
    # go from index to (prompt, token)
    prompt_start_indices = np.cumsum([0]+[x.shape[0] for x in scores_list])[:-1]
    retlist = []
    for score, index in zip(scores_topk.values, scores_topk.indices):
        score = score.item()
        index = index.item()
        prompt_idx = np.searchsorted(prompt_start_indices, index, side='right')-1
        try:
            token_idx = index-prompt_start_indices[prompt_idx]
        except IndexError:
            print(f"prompt_idx: {prompt_idx}. prompt_start_indices: {prompt_start_indices}. index: {index}")
            raise IndexError
        retlist.append({"score": score, "prompt_idx": prompt_idx, "token_idx": token_idx})
    if both:
        scores_topk = torch.topk(scores_tensor, k=k, largest=reverse, sorted=True)
        new_retlist = []
        for score, index in zip(scores_topk.values, scores_topk.indices):
            score = score.item()
            index = index.item()
            prompt_idx = np.searchsorted(prompt_start_indices, index, side='right')-1
            try:
                token_idx = index-prompt_start_indices[prompt_idx]
            except IndexError:
                print(f"prompt_idx: {prompt_idx}. prompt_start_indices: {prompt_start_indices}. index: {index}")
                raise IndexError
            new_retlist.append({"score": score, "prompt_idx": prompt_idx, "token_idx": token_idx})
        return retlist, new_retlist
    return retlist   

In [2]:
def slice_tokenized_prompts_into_context_windows(prompts, size, max_tokens=None):
    retlist = []
    if max_tokens is not None:
        num_tokens_seen = 0
    for prompt in prompts:
        prompt_len = prompt['input_ids'].shape[1]
        for i in range(0, prompt_len, size):
            input_ids = prompt['input_ids'][0,i:min(i+size, prompt_len)][None]
            attention_mask = prompt['attention_mask'][0,i:min(i+size, prompt_len)][None]
            retlist.append({'input_ids': input_ids, 'attention_mask': attention_mask})
            if max_tokens is not None:
                num_tokens_seen += input_ids.shape[1]
                if num_tokens_seen > max_tokens:
                    print(num_tokens_seen)
                    return retlist
    return retlist

In [3]:
def decode_prompt_and_tokens(prompts, prompt, mid_token, window_size=5):
    print(tokenizer.batch_decode(prompts[prompt]['input_ids'][:,max(0,mid_token-window_size):mid_token+window_size]))
    print(tokenizer.batch_decode(prompts[prompt]['input_ids'][:,mid_token]))

In [4]:
from datasets import load_dataset
pile10k = load_dataset("NeelNanda/pile-10k")

Found cached dataset parquet (/home/jhd43/.cache/huggingface/datasets/NeelNanda___parquet/NeelNanda--pile-10k-72f566e9f7c464ab/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig

model_str = "EleutherAI/gpt-neo-1.3b"
tokenizer = AutoTokenizer.from_pretrained(model_str)

In [6]:
tokenized_prompts = []
for prompt in pile10k['train']['text']:
    tokenized_prompts.append(tokenizer(prompt, return_tensors="pt"))
tokenized_prompts = slice_tokenized_prompts_into_context_windows(tokenized_prompts, 128, max_tokens=1000000)

Token indices sequence length is longer than the specified maximum sequence length for this model (3180 > 2048). Running this sequence through the model will result in indexing errors


1000111


In [46]:
len(tokenized_prompts)

7860

In [53]:
import torch

## `occu_mlp1_normal`

In [63]:
occu_mlp1_top_scores, occu_mlp1_bot_scores = get_top_normal_scores_for_prompt_list(tokenized_prompts, test[0]['scores'], k=30, both=True)

In [56]:
for entry in occu_mlp1_top_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

[' day and can meet ferries.\n\nTaxi\n\nThere are various drivers; Carol ( %01496-302155; www.carols-']
[' Carol']
Score: 16.795969009399414

[" fills Cambridge's King's College Chapel in December, culminating in the Festival of Nine Lessons and Carols on Christmas Eve.\n\n### Best Places to Eat\n\nAMidsummer"]
[' Carol']
Score: 16.249025344848633

["). Each Christmas Eve, King's College Chapel stages the **Festival of Nine Lessons and Carols**. It's broadcast"]
[' Carol']
Score: 16.190635681152344

['met.\n\nTre ganger på to år\n\nJurist Kirsti Fürst']
[' Kirst']
Score: 15.39449691772461

[" r £125-225)\n\nLovingly refurbished by hoteliers Mark and Ruth Jones, this Elterwater inn is one of Lakeland's loveliest back"]
[' Ruth']
Score: 15.260427474975586

['This slick bistro is run by brother-and-sister team Rob Don and Kirsty Robson, and has earned a loyal clientele for its unpretentious cuisine']
[' Kirst']
Score: 15.232900619506836

["in this instance, bigger is definitely better). G

In [64]:
for entry in occu_mlp1_bot_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

[' owner or maintainer of the code?\nIs it actual code from a project rather than pseudo-code or example code?\nDo I want the code to be good code?']
[' pseudo']
Score: -2.996385097503662

['}\n--------\n\nConcentrated HepAD38 cell culture supernatant (by ultrafiltration) was digested with MNase in the presence of NP-40 (']
[' ultra']
Score: -2.9439637660980225

['371, providing a fantastic hiking back-route from Ludlow and Ironbridge Gorge.\n\nWilderhope Manor YHA']
['route']
Score: -2.8168723583221436

[' in HepAD38 cell culture supernatant were concentrated 50- to 100-fold by ultrafiltration using a filter unit (Amicon Ultra-15, 100 kDa).']
[' ultra']
Score: -2.63702392578125

['epAD38 cell culture supernatant (1.5\u2009ml), concentrated by ultrafiltration, or serum samples from chronic hepatitis patients diluted with TNE buffer to 1']
[' ultra']
Score: -2.5929696559906006

['\n\nThe girl went on to have a baby boy. He now has a 4 year old son and last I heard was selling drugs and'

## `occu_attn6_normal`

In [60]:
occu_attn6_top_scores, occu_attn6_bot_scores = get_top_normal_scores_for_prompt_list(tokenized_prompts, test[1]['scores'], k=30, both=True)

In [59]:
for entry in occu_attn6_top_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

['son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r']
['abeth']
Score: 18.371620178222656

[' a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\n\nThe impressive entrance porch has a']
['abeth']
Score: 17.38771629333496

[' the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine']
['ine']
Score: 16.814821243286133

[" rocky beach of **Priest's Cove**, while nearby are the ruins of **St Helen's Oratory**, supposedly one of the first Christian chapels built in West Cornwall"]
[' Helen']
Score: 16.30881118774414

[', and opened in 1892, this brainchild of his Parisian actress wife, Josephine, was built by French architect Jules Pellechet to display a collection the Bow']
['ine']
Score: 16.266782760620117

[" the film _Bridget Jones's Diary;_ a local house was us

In [62]:
for entry in occu_attn6_bot_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

[" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"]
[' husband']
Score: -12.129179954528809

[' paid time off work during menstruation\n• (often from male workers, who viewed the employment of women as competition) women should not be employed in']
[' male']
Score: -11.344346046447754

[" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"]
[' brother']
Score: -11.146452903747559

[" handsome Jacobean town house belonging to Shakespeare's daughter Susanna and her husband, respected doctor John Hall, stands south of the centre. The exhibition offers fascinating insights"]
[' husband']
Score: -11.016054153442383

[" hall was home to the 16th-century's second-most powerful woman, Elizabeth, Countess of Shrewsbury – known to all as Bess of Har

## `subj_mlp1_normal`

In [67]:
subj_mlp1_top_scores, subj_mlp1_bot_scores = get_top_normal_scores_for_prompt_list(tokenized_prompts, test[2]['scores'], k=30, both=True)

In [68]:
for entry in subj_mlp1_top_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

[' day and can meet ferries.\n\nTaxi\n\nThere are various drivers; Carol ( %01496-302155; www.carols-']
[' Carol']
Score: 15.370716094970703

[" fills Cambridge's King's College Chapel in December, culminating in the Festival of Nine Lessons and Carols on Christmas Eve.\n\n### Best Places to Eat\n\nAMidsummer"]
[' Carol']
Score: 14.837079048156738

["). Each Christmas Eve, King's College Chapel stages the **Festival of Nine Lessons and Carols**. It's broadcast"]
[' Carol']
Score: 14.780767440795898

[" r £125-225)\n\nLovingly refurbished by hoteliers Mark and Ruth Jones, this Elterwater inn is one of Lakeland's loveliest back"]
[' Ruth']
Score: 14.48689079284668

[' he has just popped out, with screwed-up pieces of paper littered around. Thomas and Caitlin are buried in a grave marked by a simple white wooden cross in the grounds of']
[' Cait']
Score: 14.398645401000977

['over in Ruthin is worth it for the sophisticated cooking at On the Hill alone.\n\n8']
[' Ruth']
Score: 14.16580581

In [69]:
for entry in subj_mlp1_bot_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

['}\n--------\n\nConcentrated HepAD38 cell culture supernatant (by ultrafiltration) was digested with MNase in the presence of NP-40 (']
[' ultra']
Score: -4.5715742111206055

[' in HepAD38 cell culture supernatant were concentrated 50- to 100-fold by ultrafiltration using a filter unit (Amicon Ultra-15, 100 kDa).']
[' ultra']
Score: -4.184830188751221

['epAD38 cell culture supernatant (1.5\u2009ml), concentrated by ultrafiltration, or serum samples from chronic hepatitis patients diluted with TNE buffer to 1']
[' ultra']
Score: -4.1092963218688965

[' HepAD38 cell culture supernatant (250\u2009μl each) (via ultrafiltration) was either mixed with anti-HBcAg antibody (10\u2009']
[' ultra']
Score: -4.083191394805908

[' and culture media were concentrated by ultrafiltration, followed by fractionation in CsCl density gradients as described in']
[' ultra']
Score: -4.025380611419678

['\n\n(www.cavernclub.org)\n\nTribute bands keep the crowds entertained across a week in late August of Bea

## `subj_attn6_normal`

In [70]:
subj_attn6_top_scores, subj_attn6_bot_scores = get_top_normal_scores_for_prompt_list(tokenized_prompts, test[3]['scores'], k=30, both=True)

In [71]:
for entry in subj_attn6_top_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

['son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r']
['abeth']
Score: 18.855684280395508

[' a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\n\nThe impressive entrance porch has a']
['abeth']
Score: 17.69734001159668

[' the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine']
['ine']
Score: 17.407222747802734

[', and opened in 1892, this brainchild of his Parisian actress wife, Josephine, was built by French architect Jules Pellechet to display a collection the Bow']
['ine']
Score: 16.95897102355957

[" rocky beach of **Priest's Cove**, while nearby are the ruins of **St Helen's Oratory**, supposedly one of the first Christian chapels built in West Cornwall"]
[' Helen']
Score: 16.818286895751953

[' Highlights include the magnificent gold coach of 1762 

In [72]:
for entry in subj_attn6_bot_scores:
    decode_prompt_and_tokens(tokenized_prompts, entry['prompt_idx'], entry['token_idx'], window_size=18)
    print(f"Score: {entry['score']}")
    print()

[" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"]
[' brother']
Score: -11.732259750366211

[" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"]
[' husband']
Score: -11.607738494873047

[' paid time off work during menstruation\n• (often from male workers, who viewed the employment of women as competition) women should not be employed in']
[' male']
Score: -11.323687553405762

['Ornate Plas Newydd was home to Lady Eleanor Butler and Miss Sarah Ponsonby, two society ladies who ran away from Ireland to Wales disguised as men, and']
['onson']
Score: -11.2276611328125

[" of adultery, debauchery, crime and edgy romance, and is filled with Chaucer's witty observations about human nature.\n\nHistory\n\nCanterbury's past"]
['cer']
Score: -11.

# Prettying up the maximum activating tokens

## `occu_attn_6`

In [31]:
outstr = r"""['son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r']
['abeth']
Score: 18.371620178222656

[' a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\n\nThe impressive entrance porch has a']
['abeth']
Score: 17.38771629333496

[' the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine']
['ine']
Score: 16.814821243286133

[" rocky beach of **Priest's Cove**, while nearby are the ruins of **St Helen's Oratory**, supposedly one of the first Christian chapels built in West Cornwall"]
[' Helen']
Score: 16.30881118774414

[', and opened in 1892, this brainchild of his Parisian actress wife, Josephine, was built by French architect Jules Pellechet to display a collection the Bow']
['ine']
Score: 16.266782760620117

[" the film _Bridget Jones's Diary;_ a local house was used as Bridget's parents' home.\n\n1Sights\n\nBroadway TowerTOWER"]
['idget']
Score: 16.1706485748291

[') by his side and a loyal band of followers in support. Arthur went on to slay Rita Gawr, a giant who butchered']
[' Rita']
Score: 16.078720092773438

[" for the fact that Sir Robert Walpole's grandson sold the estate's splendid art collection to Catherine the Great of Russia to stave off debts – those paintings formed the foundation of the"]
[' Catherine']
Score: 16.039093017578125

[' Highlights include the magnificent gold coach of 1762 and the 1910 Glass Coach (Prince William and Catherine Middleton actually used the 1902 State Landau for their wedding in 2011).\n\n']
[' Catherine']
Score: 15.966524124145508

[" by Canaletto, El Greco and Goya as well as 55 paintings by Josephine herself. Among the 15,000 other objets d'art are incredible dresses from"]
['ine']
Score: 15.905682563781738

[" looks like something from a children's storybook (a fact not unnoticed by the author Antonia Barber, who set her much-loved fairy-tale _The Mousehole Cat"]
['ia']
Score: 15.581583976745605

[". Precious little now remains save for a few nave walls, the ruined **St Mary's chapel**, and the crossing arches, which may"]
[' Mary']
Score: 15.44295597076416

[".\n\nTrain\n\nThe northern terminus of the Welsh Highland Railway is on St Helen's Rd. Trains run to Porthmadog (£35 return, 2½"]
[' Helen']
Score: 15.374456405639648

["2\n\n### KING RICHARD III\n\nIt's an amazing story. Philippa Langley, a member of the Richard III Society, spent four-and-a"]
['a']
Score: 15.357645988464355

[' pit (which can still be seen) from the granary above. In 1566, Mary, Queen of Scots famously visited the wounded tenant of the castle, Lord Bothwell,']
[' Mary']
Score: 15.311912536621094

[" Richard III, Henry VIII and Charles I. It is most famous as the home of Catherine Parr (Henry VIII's widow) and her second husband, Thomas Seymour. Princess"]
[' Catherine']
Score: 15.274629592895508

[" Peninsula\n\n#### Bodmin Moor\n\n#### Isles of Scilly\n\n#### St Mary's\n\n#### Tresco\n\n#### Bryher\n\n#### St Martin"]
[' Mary']
Score: 15.2457275390625

["'.\n\nOutside the cathedral's eastern end is the grave of the WWI heroine Edith"]
['ith']
Score: 15.18163776397705

[" many people visit for the region's literary connections; William Wordsworth, Beatrix Potter, Arthur Ransome and John Ruskin all found inspiration here.\n\n"]
['rix']
Score: 15.134939193725586

[" Peninsula\n\n#### Bodmin Moor\n\n#### Isles of Scilly\n\n#### St Mary's\n\n#### Tresco\n\n#### Bryher\n\n#### St Martin"]
[' Mary']
Score: 15.111210823059082

[" _Mayor of Casterbridge_ locations hidden among modern Dorchester. They include **Lucetta's House**, a grand Georgian affair with ornate door posts in Trinity St,"]
['etta']
Score: 15.053227424621582

[" leads down to this little cove and the remains of the small Tudor fort of **St Catherine's Castle**.\n\nPolkerris BeachBEACH\n\n(  G"]
[' Catherine']
Score: 15.051271438598633

["-century **St Catherine's Lighthouse** and its 14th-century counterpart, **St Catherine's Or"]
[' Catherine']
Score: 14.979146003723145

[' ) ; Castle Yard) stands behind a 15th-century gate near the church of St Mary de Castro (  MAP   GOOGLE MAP ) ; Castle St),']
[' Mary']
Score: 14.967689514160156

[' the Glasgow School of Art. It was there that he met the also influential artist and designer Margaret Macdonald, whom he married; they collaborated on many projects and were major influences on']
[' Margaret']
Score: 14.93043327331543

[' Nov-Mar )\n\nThe raising of the 16th-century warship the _Mary Rose_ in 1982 was an extraordinary feat of marine archaeology. Now the new £']
['Mary']
Score: 14.69912338256836

[' was claimed by the Boleyn family and passed through the generations to Thomas, father of Anne Boleyn. Anne was executed by her husband Henry VIII in 1533, who']
[' Anne']
Score: 14.686223030090332

[". The village has literary cachet too – Wordsworth went to school here, and Beatrix Potter's husband, William Heelis, worked here as a solicitor for"]
['rix']
Score: 14.658323287963867

[" are William MacTaggart's Impressionistic Scottish landscapes and a gem by Thomas Millie Dow. There's also a special collection of James McNeill Whistler's lim"]
['ie']
Score: 14.626124382019043

[' Stay\n\nAMillgate House\n\nADevonshire Fell\n\nAHelaina\n\nAQuebecs\n\nALa Rosa Hotel\n\n## Yorkshire Highlights']
['aina']
Score: 14.577713012695312"""

In [32]:
new_outstr = outstr.replace("\\", "\\textbackslash ").replace("_", "\\_").replace("#", "\\#")

In [33]:
outlines = new_outstr.split('\n')
for i, line in enumerate(outlines):
    if i % 4 == 0:
        extract_str = line[2:-2]
        print(f"\\item Highest-activating token \\#{i//4+1}:")
        print("\\begin{itemize}")
        print(f"\t\\item Excerpt from prompt: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 1:
        extract_str = line[2:-2]
        print(f"\t\\item Token: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 2:
        extract_str = line[7:]
        print(f"\t\\item Score: {float(extract_str):.3f}")
    elif i % 4 == 3:
        print("\\end{itemize}")
print("\\end{itemize}")        

\item Highest-activating token \#1:
\begin{itemize}
	\item Excerpt from prompt: \texttt{"son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r"}
	\item Token: \texttt{"abeth"}
	\item Score: 18.372
\end{itemize}
\item Highest-activating token \#2:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\textbackslash n\textbackslash nThe impressive entrance porch has a"}
	\item Token: \texttt{"abeth"}
	\item Score: 17.388
\end{itemize}
\item Highest-activating token \#3:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine"}
	\item Token: \texttt{"ine"}
	\item Score: 16.815
\end{itemize}
\item Highest-activating token \#4:
\begin{itemize}
	\item Excerpt from pro

In [34]:
outstr = r"""[" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"]
[' husband']
Score: -12.129179954528809

[' paid time off work during menstruation\n• (often from male workers, who viewed the employment of women as competition) women should not be employed in']
[' male']
Score: -11.344346046447754

[" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"]
[' brother']
Score: -11.146452903747559

[" handsome Jacobean town house belonging to Shakespeare's daughter Susanna and her husband, respected doctor John Hall, stands south of the centre. The exhibition offers fascinating insights"]
[' husband']
Score: -11.016054153442383

[" hall was home to the 16th-century's second-most powerful woman, Elizabeth, Countess of Shrewsbury – known to all as Bess of Hardwick –"]
[' Count']
Score: -10.793420791625977

[" haunted places, with spectres from a phantom funeral to Lady Mary Berkeley seeking her errant husband. Owner Sir Humphrey Wakefield has passionately restored the castle's extravagant medieval stater"]
[' husband']
Score: -10.682342529296875

[' Windsor Castle in 1861, Queen Victoria ordered its elaborate redecoration as a tribute to her husband. A major feature of the restoration is the magnificent vaulted roof, whose gold mosaic']
[' husband']
Score: -10.576872825622559

['Ornate Plas Newydd was home to Lady Eleanor Butler and Miss Sarah Ponsonby, two society ladies who ran away from Ireland to Wales disguised as men, and']
['onson']
Score: -10.502994537353516

[" with DVD players, with tremendous views across the bay from the largest two. Bridget and Derek really give this place a 'home away from home' ambience, and can arrange"]
[' Derek']
Score: -10.483375549316406

[" of adultery, debauchery, crime and edgy romance, and is filled with Chaucer's witty observations about human nature.\n\nHistory\n\nCanterbury's past"]
['cer']
Score: -10.296209335327148

[" the city in 1645. Legend has it that the disease-ridden inhabitants of **Mary King's Close** (a lane on the northern side of the Royal Mile on the site"]
[' King']
Score: -10.29388427734375

[' manor was founded in 1552 by the formidable Bess of Hardwick and her second husband, William Cavendish, who earned grace and favour by helping Henry VIII dissolve the English']
[' husband']
Score: -10.251087188720703

[' Apartments** is the bedchamber where Mary, Queen of Scots gave birth to her son James VI, who was to unite the crowns of Scotland and England in 1603']
[' son']
Score: -10.14790153503418

["s at the behest of Queen Victoria, the monarch grieved here for many years after her husband's death. Extravagant rooms include the opulent Royal Apartments and Dur"]
[' husband']
Score: -10.11220932006836

["am-5pm Mar-Oct)\n\nThis ambitious three-dimensional interpretation of Chaucer's classic tales using jerky animatronics and audioguides is certainly entertaining"]
['cer']
Score: -10.05312442779541

[" his death, in the hard-to-decipher Middle English of the day, Chaucer's _Tales_ is an unfinished series of 24 vivid stories told by a party"]
['cer']
Score: -10.05025863647461

[" especially in **Poets' Corner**, where you'll find the resting places of Chaucer, Dickens, Hardy, Tennyson, Dr Johnson and Kipling, as well as"]
['cer']
Score: -10.032718658447266

['          her? She’s up here saying his intent was this.\n\n¶ 35   Trujillo objected on the basis']
[' his']
Score: -10.031278610229492

[' lived here happily with his sister Dorothy, wife Mary and three children John, Dora and Thomas until 1808, when the family moved to another nearby house at Allen Bank, and']
[' Thomas']
Score: -9.93402099609375

[' home of Queen Isabella, who (allegedly) arranged the gruesome murder of her husband, Edward II.\n\nHoughton Hall']
[' husband']
Score: -9.93201732635498

[' Saturday, four on Sunday).\n\nQueen Victoria bought Sandringham in 1862 for her son, the Prince of Wales (later Edward VII), and the features and furnishings remain']
[' son']
Score: -9.883359909057617

[" the palace, which contains Mary's Bed Chamber, connected by a secret stairway to her husband's bedroom, and ends with the ruins of Holyrood Abbey.\n\nHoly"]
[' husband']
Score: -9.82376480102539

[" holidays.\n\nThe two-hour tour includes the **Throne Room**, with his-and-hers pink chairs initialed 'ER' and 'P'. Access is"]
[' his']
Score: -9.71737003326416

[' is packed with all manner of Highland memorabilia. Look out for the secret portrait of Bonnie Prince Charlie – after the Jacobite rebellions all things Highland were banned, including pictures of']
[' Prince']
Score: -9.691475868225098

[' the last college to let women study there; when they were finally admitted in 1988, some male students wore black armbands and flew the college flag at half mast.\n\n']
[' male']
Score: -9.651887893676758

["oh, Michael Bond's Paddington Bear, Beatrix Potter's Peter Rabbit, Roald Dahl's Willy Wonka and JK Rowling's Harry Potter are perennially popular"]
['ald']
Score: -9.612836837768555

[" one of the rooms. In 2003 the close was opened to the public as the Real Mary King's Close.\n\n### SCOTTISH PARLIAMENT BUILDING\n"]
[' King']
Score: -9.404897689819336

[", Mary, Dorothy and all three children. Samuel Taylor Coleridge's son Hartley is also buried here.\n\nGrasm"]
[' Samuel']
Score: -9.372106552124023

[", the town became northern Europe's most important pilgrimage destination, which in turn prompted Geoffrey Chaucer's _The Canterbury Tales,_ one of the most outstanding works in English literature."]
['cer']
Score: -9.350981712341309

[' Queen Isabella, who (allegedly) arranged the gruesome murder of her husband, Edward II.\n\nHoughton Hall']
[' Edward']
Score: -9.2720947265625"""

In [35]:
new_outstr = outstr.replace("\\", "\\textbackslash ").replace("_", "\\_").replace("#", "\\#")

In [36]:
outlines = new_outstr.split('\n')
for i, line in enumerate(outlines):
    if i % 4 == 0:
        extract_str = line[2:-2]
        print(f"\\item Lowest-activating token \\#{i//4+1}:")
        print("\\begin{itemize}")
        print(f"\t\\item Excerpt from prompt: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 1:
        extract_str = line[2:-2]
        print(f"\t\\item Token: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 2:
        extract_str = line[7:]
        print(f"\t\\item Score: {float(extract_str):.3f}")
    elif i % 4 == 3:
        print("\\end{itemize}")
print("\\end{itemize}")        

\item Lowest-activating token \#1:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"}
	\item Token: \texttt{" husband"}
	\item Score: -12.129
\end{itemize}
\item Lowest-activating token \#2:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" paid time off work during menstruation\textbackslash n• (often from male workers, who viewed the employment of women as competition) women should not be employed in"}
	\item Token: \texttt{" male"}
	\item Score: -11.344
\end{itemize}
\item Lowest-activating token \#3:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"}
	\item Token: \texttt{" brother"}
	\item Score: -11.146
\end{itemize}
\item 

## `subj_attn_6`

In [37]:
outstr = r"""['son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r']
['abeth']
Score: 18.371620178222656

[' a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\n\nThe impressive entrance porch has a']
['abeth']
Score: 17.38771629333496

[' the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine']
['ine']
Score: 16.814821243286133

[" rocky beach of **Priest's Cove**, while nearby are the ruins of **St Helen's Oratory**, supposedly one of the first Christian chapels built in West Cornwall"]
[' Helen']
Score: 16.30881118774414

[', and opened in 1892, this brainchild of his Parisian actress wife, Josephine, was built by French architect Jules Pellechet to display a collection the Bow']
['ine']
Score: 16.266782760620117

[" the film _Bridget Jones's Diary;_ a local house was used as Bridget's parents' home.\n\n1Sights\n\nBroadway TowerTOWER"]
['idget']
Score: 16.1706485748291

[') by his side and a loyal band of followers in support. Arthur went on to slay Rita Gawr, a giant who butchered']
[' Rita']
Score: 16.078720092773438

[" for the fact that Sir Robert Walpole's grandson sold the estate's splendid art collection to Catherine the Great of Russia to stave off debts – those paintings formed the foundation of the"]
[' Catherine']
Score: 16.039093017578125

[' Highlights include the magnificent gold coach of 1762 and the 1910 Glass Coach (Prince William and Catherine Middleton actually used the 1902 State Landau for their wedding in 2011).\n\n']
[' Catherine']
Score: 15.966524124145508

[" by Canaletto, El Greco and Goya as well as 55 paintings by Josephine herself. Among the 15,000 other objets d'art are incredible dresses from"]
['ine']
Score: 15.905682563781738

[" looks like something from a children's storybook (a fact not unnoticed by the author Antonia Barber, who set her much-loved fairy-tale _The Mousehole Cat"]
['ia']
Score: 15.581583976745605

[". Precious little now remains save for a few nave walls, the ruined **St Mary's chapel**, and the crossing arches, which may"]
[' Mary']
Score: 15.44295597076416

[".\n\nTrain\n\nThe northern terminus of the Welsh Highland Railway is on St Helen's Rd. Trains run to Porthmadog (£35 return, 2½"]
[' Helen']
Score: 15.374456405639648

["2\n\n### KING RICHARD III\n\nIt's an amazing story. Philippa Langley, a member of the Richard III Society, spent four-and-a"]
['a']
Score: 15.357645988464355

[' pit (which can still be seen) from the granary above. In 1566, Mary, Queen of Scots famously visited the wounded tenant of the castle, Lord Bothwell,']
[' Mary']
Score: 15.311912536621094

[" Richard III, Henry VIII and Charles I. It is most famous as the home of Catherine Parr (Henry VIII's widow) and her second husband, Thomas Seymour. Princess"]
[' Catherine']
Score: 15.274629592895508

[" Peninsula\n\n#### Bodmin Moor\n\n#### Isles of Scilly\n\n#### St Mary's\n\n#### Tresco\n\n#### Bryher\n\n#### St Martin"]
[' Mary']
Score: 15.2457275390625

["'.\n\nOutside the cathedral's eastern end is the grave of the WWI heroine Edith"]
['ith']
Score: 15.18163776397705

[" many people visit for the region's literary connections; William Wordsworth, Beatrix Potter, Arthur Ransome and John Ruskin all found inspiration here.\n\n"]
['rix']
Score: 15.134939193725586

[" Peninsula\n\n#### Bodmin Moor\n\n#### Isles of Scilly\n\n#### St Mary's\n\n#### Tresco\n\n#### Bryher\n\n#### St Martin"]
[' Mary']
Score: 15.111210823059082

[" _Mayor of Casterbridge_ locations hidden among modern Dorchester. They include **Lucetta's House**, a grand Georgian affair with ornate door posts in Trinity St,"]
['etta']
Score: 15.053227424621582

[" leads down to this little cove and the remains of the small Tudor fort of **St Catherine's Castle**.\n\nPolkerris BeachBEACH\n\n(  G"]
[' Catherine']
Score: 15.051271438598633

["-century **St Catherine's Lighthouse** and its 14th-century counterpart, **St Catherine's Or"]
[' Catherine']
Score: 14.979146003723145

[' ) ; Castle Yard) stands behind a 15th-century gate near the church of St Mary de Castro (  MAP   GOOGLE MAP ) ; Castle St),']
[' Mary']
Score: 14.967689514160156

[' the Glasgow School of Art. It was there that he met the also influential artist and designer Margaret Macdonald, whom he married; they collaborated on many projects and were major influences on']
[' Margaret']
Score: 14.93043327331543

[' Nov-Mar )\n\nThe raising of the 16th-century warship the _Mary Rose_ in 1982 was an extraordinary feat of marine archaeology. Now the new £']
['Mary']
Score: 14.69912338256836

[' was claimed by the Boleyn family and passed through the generations to Thomas, father of Anne Boleyn. Anne was executed by her husband Henry VIII in 1533, who']
[' Anne']
Score: 14.686223030090332

[". The village has literary cachet too – Wordsworth went to school here, and Beatrix Potter's husband, William Heelis, worked here as a solicitor for"]
['rix']
Score: 14.658323287963867

[" are William MacTaggart's Impressionistic Scottish landscapes and a gem by Thomas Millie Dow. There's also a special collection of James McNeill Whistler's lim"]
['ie']
Score: 14.626124382019043

[' Stay\n\nAMillgate House\n\nADevonshire Fell\n\nAHelaina\n\nAQuebecs\n\nALa Rosa Hotel\n\n## Yorkshire Highlights']
['aina']
Score: 14.577713012695312"""

In [38]:
new_outstr = outstr.replace("\\", "\\textbackslash ").replace("_", "\\_").replace("#", "\\#")

In [39]:
outlines = new_outstr.split('\n')
for i, line in enumerate(outlines):
    if i % 4 == 0:
        extract_str = line[2:-2]
        print(f"\\item Highest-activating token \\#{i//4+1}:")
        print("\\begin{itemize}")
        print(f"\t\\item Excerpt from prompt: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 1:
        extract_str = line[2:-2]
        print(f"\t\\item Token: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 2:
        extract_str = line[7:]
        print(f"\t\\item Score: {float(extract_str):.3f}")
    elif i % 4 == 3:
        print("\\end{itemize}")
print("\\end{itemize}")        

\item Highest-activating token \#1:
\begin{itemize}
	\item Excerpt from prompt: \texttt{"son Tower** and in front of it a beautiful statue of St Edmund by Dame Elisabeth Frink (1976). The rest of the abbey spreads eastward like a r"}
	\item Token: \texttt{"abeth"}
	\item Score: 18.372
\end{itemize}
\item Highest-activating token \#2:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" a gorgeous hammerbeam roof and a striking sculpture of the crucified Christ by Dame Elisabeth Frink in the north transept.\textbackslash n\textbackslash nThe impressive entrance porch has a"}
	\item Token: \texttt{"abeth"}
	\item Score: 17.388
\end{itemize}
\item Highest-activating token \#3:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" the elaborate Portuguese silver service or the impressive Egyptian service, a divorce present from Napoleon to Josephine"}
	\item Token: \texttt{"ine"}
	\item Score: 16.815
\end{itemize}
\item Highest-activating token \#4:
\begin{itemize}
	\item Excerpt from pro

In [40]:
outstr = r"""[" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"]
[' brother']
Score: -11.732259750366211

[" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"]
[' husband']
Score: -11.607738494873047

[' paid time off work during menstruation\n• (often from male workers, who viewed the employment of women as competition) women should not be employed in']
[' male']
Score: -11.323687553405762

['Ornate Plas Newydd was home to Lady Eleanor Butler and Miss Sarah Ponsonby, two society ladies who ran away from Ireland to Wales disguised as men, and']
['onson']
Score: -11.2276611328125

[" of adultery, debauchery, crime and edgy romance, and is filled with Chaucer's witty observations about human nature.\n\nHistory\n\nCanterbury's past"]
['cer']
Score: -11.006778717041016

[' Apartments** is the bedchamber where Mary, Queen of Scots gave birth to her son James VI, who was to unite the crowns of Scotland and England in 1603']
[' son']
Score: -10.970754623413086

[" handsome Jacobean town house belonging to Shakespeare's daughter Susanna and her husband, respected doctor John Hall, stands south of the centre. The exhibition offers fascinating insights"]
[' husband']
Score: -10.884218215942383

[" his death, in the hard-to-decipher Middle English of the day, Chaucer's _Tales_ is an unfinished series of 24 vivid stories told by a party"]
['cer']
Score: -10.854096412658691

[" especially in **Poets' Corner**, where you'll find the resting places of Chaucer, Dickens, Hardy, Tennyson, Dr Johnson and Kipling, as well as"]
['cer']
Score: -10.793636322021484

["am-5pm Mar-Oct)\n\nThis ambitious three-dimensional interpretation of Chaucer's classic tales using jerky animatronics and audioguides is certainly entertaining"]
['cer']
Score: -10.792936325073242

[" haunted places, with spectres from a phantom funeral to Lady Mary Berkeley seeking her errant husband. Owner Sir Humphrey Wakefield has passionately restored the castle's extravagant medieval stater"]
[' husband']
Score: -10.695722579956055

[' Windsor Castle in 1861, Queen Victoria ordered its elaborate redecoration as a tribute to her husband. A major feature of the restoration is the magnificent vaulted roof, whose gold mosaic']
[' husband']
Score: -10.67307186126709

[" hall was home to the 16th-century's second-most powerful woman, Elizabeth, Countess of Shrewsbury – known to all as Bess of Hardwick –"]
[' Count']
Score: -10.616999626159668

[' Saturday, four on Sunday).\n\nQueen Victoria bought Sandringham in 1862 for her son, the Prince of Wales (later Edward VII), and the features and furnishings remain']
[' son']
Score: -10.555770874023438

[' is packed with all manner of Highland memorabilia. Look out for the secret portrait of Bonnie Prince Charlie – after the Jacobite rebellions all things Highland were banned, including pictures of']
[' Prince']
Score: -10.424220085144043

[' beautiful, time-worn rooms hold fascinating relics, including the cradle used by Mary for her son, James VI of Scotland (who also became James I of England), and fascinating letters']
[' son']
Score: -10.265992164611816

[", the town became northern Europe's most important pilgrimage destination, which in turn prompted Geoffrey Chaucer's _The Canterbury Tales,_ one of the most outstanding works in English literature."]
['cer']
Score: -10.249833106994629

[" the city in 1645. Legend has it that the disease-ridden inhabitants of **Mary King's Close** (a lane on the northern side of the Royal Mile on the site"]
[' King']
Score: -10.17730712890625

[" with DVD players, with tremendous views across the bay from the largest two. Bridget and Derek really give this place a 'home away from home' ambience, and can arrange"]
[' Derek']
Score: -10.124138832092285

['          her? She’s up here saying his intent was this.\n\n¶ 35   Trujillo objected on the basis']
[' his']
Score: -10.113439559936523

[' the last college to let women study there; when they were finally admitted in 1988, some male students wore black armbands and flew the college flag at half mast.\n\n']
[' male']
Score: -10.058089256286621

["s at the behest of Queen Victoria, the monarch grieved here for many years after her husband's death. Extravagant rooms include the opulent Royal Apartments and Dur"]
[' husband']
Score: -10.018303871154785

[' home of Queen Isabella, who (allegedly) arranged the gruesome murder of her husband, Edward II.\n\nHoughton Hall']
[' husband']
Score: -9.988592147827148

[', Van Dyck, Vermeer, El Greco, Poussin, Rembrandt, Gainsborough, Turner, Constable, Monet, Pissarro,']
['brand']
Score: -9.937095642089844

[' 24 vivid stories told by a party of pilgrims journeying between London and Canterbury. Chaucer successfully created the illusion that the pilgrims, not Chaucer (though he appears in the']
['cer']
Score: -9.90941047668457

[" the palace, which contains Mary's Bed Chamber, connected by a secret stairway to her husband's bedroom, and ends with the ruins of Holyrood Abbey.\n\nHoly"]
[' husband']
Score: -9.861638069152832

[' lived here happily with his sister Dorothy, wife Mary and three children John, Dora and Thomas until 1808, when the family moved to another nearby house at Allen Bank, and']
[' Thomas']
Score: -9.8422269821167

[' 19 prime ministers, countless princes, kings and maharajahs, famous explorers, authors and']
[' prime']
Score: -9.733100891113281

[' held court in the Palace of Holyroodhouse for six brief years, but when her son James VI succeeded to the English throne in 1603, he moved his court to London']
[' son']
Score: -9.711433410644531

[", Mary, Dorothy and all three children. Samuel Taylor Coleridge's son Hartley is also buried here.\n\nGrasm"]
[' Samuel']
Score: -9.654032707214355"""

In [41]:
new_outstr = outstr.replace("\\", "\\textbackslash ").replace("_", "\\_").replace("#", "\\#")

In [42]:
outlines = new_outstr.split('\n')
for i, line in enumerate(outlines):
    if i % 4 == 0:
        extract_str = line[2:-2]
        print(f"\\item Lowest-activating token \\#{i//4+1}:")
        print("\\begin{itemize}")
        print(f"\t\\item Excerpt from prompt: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 1:
        extract_str = line[2:-2]
        print(f"\t\\item Token: \\texttt{{\"{extract_str}\"}}")
    elif i % 4 == 2:
        extract_str = line[7:]
        print(f"\t\\item Score: {float(extract_str):.3f}")
    elif i % 4 == 3:
        print("\\end{itemize}")
print("\\end{itemize}")        

\item Lowest-activating token \#1:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" family was devastated, but things quickly got worse. Emily fell ill with tuberculosis soon after her brother's funeral; she never left the house again, and died on 19 December. Anne"}
	\item Token: \texttt{" brother"}
	\item Score: -11.732
\end{itemize}
\item Lowest-activating token \#2:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" recounted the sighting of a disturbance in the loch by Mrs Aldie Mackay and her husband: 'There the creature disported itself, rolling and plunging for fully a minute"}
	\item Token: \texttt{" husband"}
	\item Score: -11.608
\end{itemize}
\item Lowest-activating token \#3:
\begin{itemize}
	\item Excerpt from prompt: \texttt{" paid time off work during menstruation\textbackslash n• (often from male workers, who viewed the employment of women as competition) women should not be employed in"}
	\item Token: \texttt{" male"}
	\item Score: -11.324
\end{itemize}
\item 