In [None]:
import os
import requests
import bz2
from zipfile import ZipFile

url_path = "https://raw.githubusercontent.com/bzitko/nlp_repo/main/assignments/a04/sgns/"
downloads = {"frankenstein_preprocessed_word.txt.bz2": "frankenstein_preprocessed_word.txt",
             "frankenstein_preprocessed_lemma.txt.bz2": "frankenstein_preprocessed_lemma.txt"}

for download_name, extract_name in downloads.items():
    if extract_name and os.path.exists(extract_name):
        continue

    if not os.path.exists(download_name):
        response = requests.get(f"{url_path}{download_name}")
        with open(download_name, "wb") as fp:
            fp.write(response.content)
        response.close()

    if not extract_name:
        continue

    _, ext = os.path.splitext(download_name)
    if ext == ".bz2":    
        with open(download_name, 'rb') as bzf, open(extract_name, 'wb') as fp:
            fp.write(bz2.decompress(bzf.read()))
    elif ext == ".zip":
        with ZipFile(download_name) as zf:
            zf.extractall(path=".")

# Skip-gram with Negative Sampling

In [None]:
import random
import numpy as np
from tqdm import tqdm
from collections import defaultdict, Counter

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader


# 1 Data preparation

## 1.1 Read corpus

👍 In this task, you're going to write a function called read_corpus that reads a text file and processes its content into a list of words. The goal is to return a "corpus," which is a list of sentences from the file, with each sentence split into individual words and converted to lowercase.

In [3]:
def read_corpus(filename):
    corpus = []
    with open(filename) as fp:
        for line in fp:
            corpus.append(line.lower().split())
    return corpus

corpus = read_corpus("frankenstein_preprocessed_word.txt")
print(corpus[:2])

assert len(corpus) == 3222
assert len(corpus[0]) == 13
assert corpus[-1][:3] == ["he", "was", "soon"]

[['frankenstein', ',', 'or', 'the', 'modern', 'prometheus', 'by', 'mary', 'wollstonecraft', '(', 'godwin', ')', 'shelley'], ['letter', '1', 'st.', 'petersburgh', ',', 'dec.', '11th', ',', '17', '-', '-']]


## 1.2 Word Frequency

👍 In this task, you'll write a function called `build_word_freqs` that takes a "corpus" (which is a list of lists of words) and calculates the frequency of each word in the corpus. The function will then return a dictionary where each key is a word and its value is the proportion of how often that word appears in the entire corpus.

The frequency of a word $ w $, denoted as $ f(w) $, is the ratio of the count of $ w $ in the corpus to the total number of words $ N $:

$$
f(w) = \frac{c(w)}{N}
$$

where:
- $ f(w) $ is the **frequency function** for a word $ w $.
- $ c(w) $ is the **count function** that returns the number of times word $ w $ appears in the corpus.
- $ N $ is the **total number of words** in the corpus.

In [4]:
def build_word_freqs(corpus):
    words = [w for sent in corpus for w in sent]
    word_counts = Counter(words)
    total_count = sum(word_counts.values())
    word_frequencies = {w: count / total_count 
                        for w, count in word_counts.items()}
    return word_frequencies

word_freqs = build_word_freqs(corpus)
print(word_freqs)

assert len(word_freqs) == 7042
assert f"{word_freqs['frankenstein']:.6f}" == "0.000315"

{'frankenstein': 0.0003149643040455415, ',': 0.05766179832952265, 'or': 0.0020647659931874386, 'the': 0.04891278988381317, 'modern': 0.00012831879053707248, 'prometheus': 1.1665344594279316e-05, 'by': 0.005377723857962764, 'mary': 1.1665344594279316e-05, 'wollstonecraft': 1.1665344594279316e-05, '(': 0.00018664551350846905, 'godwin': 1.1665344594279316e-05, ')': 0.00018664551350846905, 'shelley': 1.1665344594279316e-05, 'letter': 0.00032662964863982083, '1': 2.333068918855863e-05, 'st.': 5.832672297139657e-05, 'petersburgh': 3.4996033782837945e-05, 'dec.': 1.1665344594279316e-05, '11th': 2.333068918855863e-05, '17': 0.00012831879053707248, '-': 0.0016331482431991041, 'to': 0.024427231580420886, 'mrs.': 4.666137837711726e-05, 'saville': 4.666137837711726e-05, 'england': 0.0002449722364798656, 'you': 0.006684242452522048, 'will': 0.002263076851290187, 'rejoice': 1.1665344594279316e-05, 'hear': 0.000221641547291307, 'that': 0.011863655452382063, 'no': 0.0020064392702160423, 'disaster': 4.

## 1.3 Vocabulary

👍 In this task, you'll write a function called build_vocab that takes a dictionary of word frequencies (like the output from the previous function) and creates a vocabulary. The vocabulary is a mapping of words to unique indices, where the most frequent word gets the index 0, the second most frequent word gets index 1, and so on.

In [5]:
def build_vocab(word_freqs):
    vocab = {word: idx 
             for idx, word in enumerate(sorted(word_freqs, 
                                               key=word_freqs.get, 
                                               reverse=True))}
    return vocab

vocab = build_vocab(word_freqs)
print(vocab)

len(vocab) == 7042
assert vocab["frankenstein"] == 315

{',': 0, 'the': 1, 'and': 2, '.': 3, 'i': 4, 'of': 5, 'to': 6, 'my': 7, 'a': 8, 'in': 9, 'was': 10, 'that': 11, ';': 12, 'me': 13, '"': 14, 'but': 15, 'had': 16, 'with': 17, 'he': 18, 'you': 19, 'which': 20, 'not': 21, 'it': 22, 'his': 23, 'as': 24, 'for': 25, 'by': 26, 'on': 27, 'this': 28, 'from': 29, 'her': 30, 'have': 31, 'be': 32, 'when': 33, 'at': 34, 'were': 35, 'is': 36, 'she': 37, 'your': 38, '!': 39, 'him': 40, '?': 41, 'an': 42, 'they': 43, 'so': 44, 'one': 45, 'all': 46, 'could': 47, 'will': 48, 'if': 49, 'been': 50, 'their': 51, 'would': 52, 'or': 53, 'are': 54, 'we': 55, 'who': 56, 'no': 57, 'more': 58, 'now': 59, 'these': 60, 'should': 61, 'yet': 62, 'some': 63, 'before': 64, '-': 65, 'can': 66, 'man': 67, 'myself': 68, 'father': 69, 'what': 70, 'them': 71, 'am': 72, 'upon': 73, 'our': 74, "'": 75, 'into': 76, 'its': 77, 'only': 78, 'did': 79, 'do': 80, 'life': 81, 'than': 82, 'every': 83, 'then': 84, 'first': 85, 'might': 86, 'own': 87, 'shall': 88, 'eyes': 89, 'said': 

## 1.4 Word probability

👍 In this task, you'll write a function called build_word_probs that takes a dictionary of word frequencies (like the output from the build_word_freqs function) and converts those frequencies into word probabilities. These probabilities will be adjusted using a power transformation, which is controlled by a parameter power (with a default value of 3/4)

The probability of choosing a word $ w $ as a negative sample is typically defined as:

$$
P(w) = \frac{f(w)^\alpha}{\sum_{w' \in V} f(w')^\alpha}
$$

Where:
- $ f(w) $ is the frequency of the word $ w $ in the corpus.
- $ \alpha $ is a parameter that controls the exponentiated frequency. Typically, $ \alpha $ is set to a value between 0.5 and 1.0, with $ \alpha = 0.75 $ being a common choice.
- $ V $ is the vocabulary, i.e., the set of all words in the model's vocabulary.

In [6]:
def build_word_probs(word_freqs, power=3/4):
    word_probs = {w: freq ** power 
                  for w, freq in word_freqs.items()}
    total_word_probs = sum(word_probs.values())
    word_probs = {w: prob / total_word_probs 
                  for w, prob in word_probs.items()}
    return word_probs

word_probs = build_word_probs(word_freqs)
print(word_probs)

assert len(word_probs) == 7042
assert f"{word_probs['frankenstein']:.6f}" == "0.000405"

{'frankenstein': 0.0004049644293781936, ',': 0.020155195393176634, 'or': 0.001659105506282264, 'the': 0.01781507691444116, 'modern': 0.00020650880271432816, 'prometheus': 3.4189602762342854e-05, 'by': 0.003401494311039136, 'mary': 3.4189602762342854e-05, 'wollstonecraft': 3.4189602762342854e-05, '(': 0.00027351682209874283, 'godwin': 3.4189602762342854e-05, ')': 0.00027351682209874283, 'shelley': 3.4189602762342854e-05, 'letter': 0.00041616214988036, '1': 5.74998288036052e-05, 'st.': 0.00011431982689155939, 'petersburgh': 7.793544077124108e-05, 'dec.': 3.4189602762342854e-05, '11th': 5.74998288036052e-05, '17': 0.00020650880271432816, '-': 0.0013915220151531772, 'to': 0.01058343295440604, 'mrs.': 9.670279983730779e-05, 'saville': 9.670279983730779e-05, 'england': 0.00033539649976835784, 'you': 0.004004147078234845, 'will': 0.0017772370253016476, 'rejoice': 3.4189602762342854e-05, 'hear': 0.0003111424417635452, 'that': 0.006157221392309962, 'no': 0.0016238293621938724, 'disaster': 9.670

## 1.5 Positive sampling

👍 In this task, you'll write a function called generate_positive_pairs that takes a list of sentences (called the corpus) and a window size (called window_size). The function will generate positive word-context pairs from the text, where the target word is paired with the context words around it, within the specified window size.

For sentece sentence:

_"The quick brown fox jumps over the lazy dog."_

and windows size 2, table below shows neighboring words:

| **Center Word** | **Left Neighbors**  | **Right Neighbors** | **Context Words**                  |
|-----------------|---------------------|---------------------|------------------------------------|
| **The**         | -                   | quick, brown        | quick, brown                       |
| **quick**       | The                 | brown, fox          | The, brown, fox                    |
| **brown**       | The, quick          | fox, jumps          | The, quick, fox, jumps             |
| **fox**         | quick, brown        | jumps, over         | quick, brown, jumps, over          |
| **jumps**       | brown, fox          | over, the           | brown, fox, over, the              |
| **over**        | fox, jumps          | the, lazy           | fox, jumps, the, lazy              |
| **the**         | jumps, over         | lazy, dog           | jumps, over, lazy, dog             |
| **lazy**        | over, the           | dog                 | over, the, dog                     |
| **dog**         | the, lazy           | -                   | the, lazy                          |


In [7]:
def generate_positive_pairs(corpus, window_size):
    pairs = []
    for sent in corpus:
        for i, target_word in enumerate(sent):
            start = max(0, i - window_size)
            end = min(len(sent), i + window_size + 1)
            context = [sent[j] for j in range(start, end) if j != i]
            for context_word in context:
                pairs.append((target_word, context_word))
    return pairs

positive_pairs = generate_positive_pairs(corpus, 2)

print(positive_pairs[:10])

assert len(positive_pairs) == 323564
assert set(positive_pairs) >= {('frankenstein', ','),  ('frankenstein', 'or'),  (',', 'frankenstein')}

[('frankenstein', ','), ('frankenstein', 'or'), (',', 'frankenstein'), (',', 'or'), (',', 'the'), ('or', 'frankenstein'), ('or', ','), ('or', 'the'), ('or', 'modern'), ('the', ',')]


## 1.6 Negative sampling

👍 In this task, you'll write a function called generate_negative_samples that takes a target word, a dictionary of word probabilities, and generates negative samples. Negative samples are words that do not co-occur with the target word but are drawn based on their probability distribution. This is typically used in tasks like training word embeddings (e.g., Word2Vec) where negative sampling is used to improve model efficiency.

### Implementation Steps:

1. **Initialize an empty list** `negative_samples` that will store the words generated as negative samples.

2. **Get the weights for sampling**:
   - Extract the word probabilities from the `word_probs` dictionary into a list called `word_weights`. Each element in `word_weights` should correspond to the probability of the word at the same index in the list of words.

3. **Generate negative samples**:
   - Use a loop to generate **`num_samples`** negative samples. Inside the loop:
     - Use `random.choices()` to sample a word from the `word_probs` dictionary. Pass the list of words (`list(word_probs)`) and their corresponding weights (`word_weights`).
     - If the word sampled is not the `target_word`, append it to the `negative_samples` list.

4. **Ensure the target word is excluded**:
   - Make sure that the word sampled is not the same as the `target_word`. If the word is the same as the target, repeat the sampling until a different word is chosen.

5. **Return the list of negative samples**:
   - Once the list contains the specified number of negative samples (`num_samples`), return the list `negative_samples`.

In [8]:
def generate_negative_samples(target_word, word_probs, num_samples=5):
    negative_samples = []
    word_weights = [word_probs[w] for w in word_probs]
    while len(negative_samples) < num_samples:
        neg_word = random.choices(list(word_probs), 
                                   weights=word_weights,
                                   k=1)[0]
        if neg_word != target_word:
            negative_samples.append(neg_word)
    return negative_samples


target_word = positive_pairs[0][0]
sample = generate_negative_samples(target_word, word_probs, num_samples=5)

assert len(sample) == 5

## 1.7 Dataset

The `SkipGramDataset` class is designed to prepare data for training a Skip-gram model using **negative sampling**. It transforms a given text corpus into a dataset where each sample consists of a target word, its context word, and a set of negative samples. Here's what it does:

1. **Initialization (`__init__`)**:
   - **Builds word frequencies** from the corpus using `build_word_freqs()`.
   - **Creates a vocabulary** (`self.vocab`) using `build_vocab()` based on word frequencies.
   - **Calculates word probabilities** (`word_probs`) using `build_word_probs()`, with an optional power parameter to adjust the probability distribution.
   - **Generates positive word-context pairs** from the corpus using the `generate_positive_pairs()` function with the specified window size.
   - **Generates negative samples** for each word-context pair using `generate_negative_samples()`.
   - Each data sample is stored as a tuple containing:
     - The index of the target word.
     - The index of the context word.
     - The indices of the negative samples.
   
2. **`__len__()`**: Returns the number of samples in the dataset (the length of `self.data`).

3. **`__getitem__()`**: Returns a specific data sample (target word, context word, negative samples) given an index `idx`.

In [9]:
class SkipGramDataset(Dataset):

    def __init__(self, corpus, window_size, num_negative_samples, power=0.75):
        word_freqs = build_word_freqs(corpus)
        self.vocab = build_vocab(word_freqs)
        word_probs = build_word_probs(word_freqs, power)

        positive_pairs = generate_positive_pairs(corpus, window_size=window_size)

        self.data = []
        for target_word, context_word in tqdm(positive_pairs):
            negative_samples = generate_negative_samples(target_word, word_probs, num_samples=num_negative_samples)
    
            target_idx, context_idx = vocab[target_word], vocab[context_word]
            negative_samples_idxs = [vocab[neg_word] for neg_word in negative_samples]
            
            self.data.append((target_idx, context_idx, negative_samples_idxs))            

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

dataset = SkipGramDataset(corpus, window_size=2, num_negative_samples=5)

  3%|▎         | 11279/323564 [00:26<12:09, 427.87it/s]


KeyboardInterrupt: 

# 2 Skip-gram with Negative Sampling Model

The Skip-gram model tries to predict the context words for a given target word. The objective is to maximize the similarity between the **target word embedding** and the **context word embedding**, while minimizing the similarity between the target and **negative samples**.

#### Key Variables:
- $ V $ is the vocabulary size (total number of unique words).
- $ d $ is the embedding dimension (the size of the word vector).
- $ \mathbf{E}_t \in \mathbb{R}^{d} $ is the embedding vector for the target word.
- $ \mathbf{E}_c \in \mathbb{R}^{d} $ is the embedding vector for the context word.
- $ \mathbf{E}_n^{(i)} \in \mathbb{R}^{d} $ is the embedding vector for the $ i $-th negative sample.
  
### Model Components:
1. **Embeddings**:
   The model uses two embedding layers:
   - One for the target word embeddings: $ \mathbf{E}_t = \text{Embedding}(\text{target}) $
   - One for the context word embeddings: $ \mathbf{E}_c = \text{Embedding}(\text{context}) $
   
2. **Target and Context Word Score**:
   For each positive pair (target word, context word), the score is computed by taking the **dot product** between the target embedding and the context embedding:
   $$
   \text{positive\_score} = \mathbf{E}_t \cdot \mathbf{E}_c = \sum_{i=1}^d \mathbf{E}_{t,i} \cdot \mathbf{E}_{c,i}
   $$
   where $ \mathbf{E}_{t,i} $ and $ \mathbf{E}_{c,i} $ are the individual dimensions of the target and context word embeddings.

3. **Negative Sampling Score**:
   For each negative sample $ n^{(i)} $, the model computes the score between the target word embedding and the negative sample embeddings. The score for each negative sample $ i $ is computed as:
   $$
   \text{negative\_score}^{(i)} = \mathbf{E}_t \cdot \mathbf{E}_n^{(i)}
   $$
   where $ \mathbf{E}_n^{(i)} $ is the embedding for the $ i $-th negative sample. This score is negative because the model wants to minimize the similarity between the target word and negative samples.

4. **Loss Function**:
   The Skip-gram model uses **binary cross-entropy loss** to maximize the probability of predicting the correct context word and minimize the probability of predicting negative samples. This is done using the **sigmoid function** applied to the scores.

   The loss for the positive pair (target, context) is:
   $$
   \text{positive\_loss} = -\log(\sigma(\text{positive\_score}))
   $$
   where $ \sigma(x) = \frac{1}{1 + e^{-x}} $ is the **sigmoid function**.

   The loss for each negative sample is:
   $$
   \text{negative\_loss}^{(i)} = -\log(\sigma(-\text{negative\_score}^{(i)}))
   $$
   The negative sign ensures that we want the similarity between the target word and negative samples to be small (i.e., $ \mathbf{E}_t $ should be dissimilar to $ \mathbf{E}_n^{(i)} $).

   The **total loss** is the sum of the positive loss and the sum of the negative losses across all negative samples:
   $$
   \text{total\_loss} = -\log(\sigma(\text{positive\_score})) - \sum_{i=1}^{K} \log(\sigma(-\text{negative\_score}^{(i)}))
   $$
   where $ K $ is the number of negative samples.

5. **Final Loss**:
   The average loss is returned, which is the mean of the losses across all the training examples:
   $$
   \text{loss} = \frac{1}{N} \sum_{n=1}^{N} \text{total\_loss}_n
   $$
   where $ N $ is the batch size (number of training samples).

In [10]:
class SkipgramModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(SkipgramModel, self).__init__()
        self.target_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.context_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
    def forward(self, target, context, negative_samples):
        # Target embedding
        target_emb = self.target_embeddings(target)
        
        # Positive context embedding
        context_emb = self.context_embeddings(context)
        
        # Negative samples embeddings
        negative_embs = self.context_embeddings(torch.cat(negative_samples))
        
        # Compute the loss
        positive_score = torch.sum(target_emb * context_emb, dim=1)
        negative_score = torch.sum(target_emb.unsqueeze(1) * negative_embs, dim=2)
        
        # Apply sigmoid and log to get the loss
        loss = -torch.log(torch.sigmoid(positive_score)) - torch.sum(torch.log(torch.sigmoid(-negative_score)), dim=1)
        
        return loss.mean()


# 3 Training the model

In [11]:
embedding_dim = 50
batch_size = 128
learning_rate = 0.01

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model = SkipgramModel(vocab_size=len(dataset.vocab), embedding_dim=embedding_dim)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min', factor=0.5,
                                                 patience=1)

#sbs = StepByStep(model, loss_fn=None, optimizer=optimizer)

NameError: name 'dataset' is not defined

In [96]:
num_epochs = 10
for epoch in tqdm(range(num_epochs)):
    total_loss = 0
    for target, context, negative_samples in dataloader:
        optimizer.zero_grad()
        loss = model(target, context, negative_samples)
        loss.backward()
        optimizer.step()
        total_loss += loss

    scheduler.step(total_loss.mean())        
    current_lr = optimizer.param_groups[0]['lr']
    
    print(f"Epoch={epoch + 1}, batch loss={total_loss.mean()}, lr={current_lr}")

 10%|█         | 1/10 [00:30<04:37, 30.89s/it]

Epoch=1, batch loss=15940.7451171875, lr=0.005


 20%|██        | 2/10 [01:01<04:03, 30.48s/it]

Epoch=2, batch loss=15906.2705078125, lr=0.005


 30%|███       | 3/10 [01:31<03:32, 30.34s/it]

Epoch=3, batch loss=15896.1025390625, lr=0.005


 40%|████      | 4/10 [02:01<03:01, 30.30s/it]

Epoch=4, batch loss=15887.3681640625, lr=0.005


 50%|█████     | 5/10 [02:31<02:31, 30.20s/it]

Epoch=5, batch loss=15876.4228515625, lr=0.005


 60%|██████    | 6/10 [03:01<02:00, 30.12s/it]

Epoch=6, batch loss=15872.8701171875, lr=0.005


 70%|███████   | 7/10 [03:32<01:30, 30.30s/it]

Epoch=7, batch loss=15870.470703125, lr=0.005


 80%|████████  | 8/10 [04:02<01:00, 30.24s/it]

Epoch=8, batch loss=15867.3544921875, lr=0.005


 90%|█████████ | 9/10 [04:32<00:30, 30.21s/it]

Epoch=9, batch loss=15861.357421875, lr=0.005


100%|██████████| 10/10 [05:02<00:00, 30.22s/it]

Epoch=10, batch loss=15858.1015625, lr=0.005





# 4 Embeddings

In [97]:
word_embeddings = model.target_embeddings.weight.data
vocab = dataset.vocab

In [98]:
word_embeddings[vocab["frankenstein"]]

tensor([ 0.3384, -0.0287,  0.0035, -0.9695, -0.2087, -1.4221, -1.5397,  0.8569,
         0.4210,  1.0146,  0.2928, -0.0041, -0.6710, -1.5127, -0.3430,  0.3247,
        -0.4555, -0.2204, -0.0443,  0.4007,  0.4151,  1.0555,  0.5857, -0.2622,
         0.3100,  0.5656,  0.8388, -1.1757,  0.3517, -0.5614, -0.7702, -0.1768,
         0.6394,  0.0105,  0.2716, -0.1345,  0.8818,  0.4539,  0.0972,  1.2014,
         0.5741,  0.2309, -0.4661, -0.7126,  0.7490,  1.5891, -0.0898, -0.2607,
         0.4436, -0.9681])

In [99]:
def pretty_print(results):
    for k, v in results:
        print (f"...[{v:.2f}] - {k}")

def get_closest(target_word, vocab, embeddings, n=5):
    """
    Get the n closest
    words to your word.
    """
    # Calculate distances to all other words
    word_embedding = embeddings[vocab[target_word.lower()]]
    distances = []
    for word, index in vocab.items():
        if word == target_word:
            continue
        distances.append((word, torch.dist(word_embedding, embeddings[index])))
    
    results = sorted(distances, key=lambda x: x[1])[1:n+2]
    return results

get_closest("frankenstein", dataset.vocab, word_embeddings)

[("'", tensor(4.3882)),
 ('!', tensor(4.4016)),
 ('dear', tensor(4.5158)),
 ('still', tensor(4.5769)),
 ('victor', tensor(4.5807)),
 ('may', tensor(4.6517))]

In [100]:
target_words = ['frankenstein', 'monster', 'science', 'sickness', 'lonely', 'happy']

embeddings = model.target_embeddings.weight.data


for target_word in target_words: 
    print(f"====== {target_word} ======")
    if target_word not in dataset.vocab:
        print(f"{target_word} is not in vocabulary")
        continue
    pretty_print(get_closest(target_word, dataset.vocab, word_embeddings, n=5))

...[4.39] - '
...[4.40] - !
...[4.52] - dear
...[4.58] - still
...[4.58] - victor
...[4.65] - may
...[4.08] - wretch
...[4.12] - me
...[4.15] - him
...[4.20] - spirit
...[4.22] - --
...[4.26] - fire
...[4.01] - ,
...[4.01] - the
...[4.09] - same
...[4.13] - in
...[4.18] - upon
...[4.21] - of
...[5.81] - attention
...[5.83] - science
...[5.86] - suddenly
...[5.87] - prison
...[5.89] - any
...[5.95] - sensations
...[6.82] - numerous
...[6.98] - bare
...[7.15] - few
...[7.16] - figure
...[7.18] - small
...[7.20] - edinburgh
...[3.87] - would
...[3.96] - by
...[4.06] - calm
...[4.06] - can
...[4.07] - be
...[4.14] - followed
