# N-Gram language models and perplexity
1. import texts from NLTK books
2. generate and count n-grams
3. estimate n-gram probabilities
4. compute perplexity of test sentances

In [7]:
# import nltk resources for lemmatizer evaluation
from nltk.book import *
from nltk.tokenize import sent_tokenize, word_tokenize 
from collections import defaultdict
import re

*** Introductory Examples for the NLTK Book ***
Loading text1, ..., text9 and sent1, ..., sent9
Type the name of the text or sentence to view it.
Type: 'texts()' or 'sents()' to list the materials.
text1: Moby Dick by Herman Melville 1851
text2: Sense and Sensibility by Jane Austen 1811
text3: The Book of Genesis
text4: Inaugural Address Corpus
text5: Chat Corpus
text6: Monty Python and the Holy Grail
text7: Wall Street Journal
text8: Personals Corpus
text9: The Man Who Was Thursday by G . K . Chesterton 1908


In [13]:
# sentence segment and tokenize nltk books and 
def NLTKbooks2Sent(articles):
    """
    e.g. NLTKbooks2Sent([nltk.book.text1])
    param -> Array of type nltk.Text
    
    returns -> list of list. A list of sentences (Each sentence is a list of tokenized words)
               e.g. For input = "My name is Debasish" it returns [['My', 'name', 'is', 'Debasish', 'Sahoo']]
    """
    data = []
    for article in articles: 
        #print(type(article))
        for sent in sent_tokenize(' '.join(article)): 
            data.append(word_tokenize(sent))
    return(data)

In [None]:
import nltk
books1 = NLTKbooks2Sent([nltk.book.text1])
books1[1]

# 3-gram

In [8]:
# compute n-grams dictionary for source text
import numpy as np

N = 3        # length of n-gram

# data: list of sentences 
def nGramCount3(data):
    gramsC = {}  # auxilliary dictionary to store n-gram counts
    # seg = 1 sentence
    for seg in data:
        
        itm = seg.copy()
            
        # insert sentence starting and sentence ending symbols
        # !!! adjust starting symbol for different length of n-grams N
        itm.insert(0, "///")
        itm.insert(0, "///")
        itm.append("///")
        #print(itm)
        # p(a | b ) == count(ab) / count(b)  
        for i in range(len(itm)-N+1):
            # produce ngram (b)
            b  = ' '.join(itm[i:i+N-1]).lower()
            # produce an ngram (ab)
            ab = ' '.join(itm[i:i+N]).lower()
            # print("t:{}\tg:{}".format(t,g))
            gramsC.setdefault(b, {})
            gramsC[b].setdefault(ab, 0)
            # count the ngram
            gramsC[b][ab] += 1
    return(gramsC)


# compute probability of n-grams
# p(a | b ) == count(ab) / count(b) == count(g) / count(t)
# loop over all words
def nGramProbs3(gramsC) :
    nGrams = {}  # final n-gram dictionary with log-prob entries
    nMin = 0
    for b in gramsC:
        # v = number of n-grams that start with b
        v = float(sum(gramsC[b].values()))
        for ab in gramsC[b]:
            # np.log2(v/gramG[b][ab]) == - np.log2(gramG[b][ab]/v)
            nGrams[ab] = np.log2(v/gramsC[b][ab])
            if(nGrams[ab] > nMin): nMin = nGrams[ab]
#            print("ab:{:<20} count:{:<6}\tb:{:<10}  count:{}\tlog(1/p):{:<5.4}".format(ab, gramsC[b][ab], b, v,  nGrams[ab]))
    nGrams["|||OOV|||"] = nMin+1
    return(nGrams)

In [25]:
my_text = "My name is Debasish Sahoo"
my_sent = "My name is Debasish Sahoo. I study in Kent State University. I stay in mayfield heights"

In [28]:
# to convert normal text to NTLK text
my_text_tokens = word_tokenize(my_sent)
my_nltk_text = nltk.Text(my_text_tokens)
print(my_nltk_text)
my_book = NLTKbooks2Sent([my_nltk_text])
print(my_book)
print(nGramCount3(my_book))
nGramProbs3(nGramCount3(my_book))

<Text: My name is Debasish Sahoo . I study...>
[['My', 'name', 'is', 'Debasish', 'Sahoo', '.'], ['I', 'study', 'in', 'Kent', 'State', 'University', '.'], ['I', 'stay', 'in', 'mayfield', 'heights']]
{'/// ///': {'/// /// my': 1, '/// /// i': 2}, '/// my': {'/// my name': 1}, 'my name': {'my name is': 1}, 'name is': {'name is debasish': 1}, 'is debasish': {'is debasish sahoo': 1}, 'debasish sahoo': {'debasish sahoo .': 1}, 'sahoo .': {'sahoo . ///': 1}, '/// i': {'/// i study': 1, '/// i stay': 1}, 'i study': {'i study in': 1}, 'study in': {'study in kent': 1}, 'in kent': {'in kent state': 1}, 'kent state': {'kent state university': 1}, 'state university': {'state university .': 1}, 'university .': {'university . ///': 1}, 'i stay': {'i stay in': 1}, 'stay in': {'stay in mayfield': 1}, 'in mayfield': {'in mayfield heights': 1}, 'mayfield heights': {'mayfield heights ///': 1}}


{'/// /// my': 1.584962500721156,
 '/// /// i': 0.5849625007211562,
 '/// my name': 0.0,
 'my name is': 0.0,
 'name is debasish': 0.0,
 'is debasish sahoo': 0.0,
 'debasish sahoo .': 0.0,
 'sahoo . ///': 0.0,
 '/// i study': 1.0,
 '/// i stay': 1.0,
 'i study in': 0.0,
 'study in kent': 0.0,
 'in kent state': 0.0,
 'kent state university': 0.0,
 'state university .': 0.0,
 'university . ///': 0.0,
 'i stay in': 0.0,
 'stay in mayfield': 0.0,
 'in mayfield heights': 0.0,
 'mayfield heights ///': 0.0,
 '|||OOV|||': 2.584962500721156}

word_tokens = []
sent_tokens = sent_tokenize(my_sent)
for sent in sent_tokens:
    word_tokens.append(word_tokenize(sent))

joined_sent = ' '.join(sent_tokens)
joined_sent

# 2-gram

In [31]:
# compute n-grams dictionary for source text
import numpy as np

N = 2        # length of n-gram

# data: list of sentences 
def nGramCount2(data):
    gramsC = {}  # auxilliary dictionary to store n-gram counts
    for seg in data:
        
        itm = seg.copy()
            
        # insert sentence starting and sentence ending symbols
        # !!! adjust starting symbol for different length of n-grams N
        #itm.insert(0, "///")
        itm.insert(0, "///")
        itm.append("///")
        #print(itm)
        #P(x1, x2, ..., xn) = P(x1)P(x2|x1)...P(xn|x1,...xn-1)
        # p(a | b ) == count(ab) / count(b)  
        for i in range(len(itm)-N+1):
            # produce ngram (b)
            b  = ' '.join(itm[i:i+N-1]).lower()
            # produce an ngram (ab)
            ab = ' '.join(itm[i:i+N]).lower()
            # print("t:{}\tg:{}".format(t,g))
            gramsC.setdefault(b, {})
            gramsC[b].setdefault(ab, 0)
            # count the ngram
            gramsC[b][ab] += 1
    return(gramsC)


# compute probability of n-grams
# p(a | b ) == count(ab) / count(b) == count(g) / count(t)
# loop over all words
def nGramProbs2(gramsC) :
    nGrams = {}  # final n-gram dictionary with log-prob entries
    nMin = 0
    for b in gramsC:
        # v = number of n-grams that start with b
        v = float(sum(gramsC[b].values()))
        for ab in gramsC[b]:
            # np.log2(v/gramG[b][ab]) == - np.log2(gramG[b][ab]/v)
            nGrams[ab] = np.log2(v/gramsC[b][ab])
            if(nGrams[ab] > nMin): nMin = nGrams[ab]
#            print("ab:{:<20} count:{:<6}\tb:{:<10}  count:{}\tlog(1/p):{:<5.4}".format(ab, gramsC[b][ab], b, v,  nGrams[ab]))
    nGrams["|||OOV|||"] = nMin+1
    return(nGrams)

# 1-gram

In [111]:
# compute n-grams dictionary for source text
import numpy as np

N = 1        # length of n-gram

# data: list of sentences 
def nGramCount1(data):
    gramsC = {}  # auxilliary dictionary to store n-gram counts
    for seg in data:
        
        itm = seg.copy()
            
        # insert sentence starting and sentence ending symbols
        # !!! adjust starting symbol for different length of n-grams N
        #itm.insert(0, "///")
        #itm.insert(0, "///")
        #itm.append("///")
        #print(itm)
        #P(x1, x2, ..., xn) = P(x1)P(x2|x1)...P(xn|x1,...xn-1)
        # p(a | b ) == count(ab) / count(b)  
        for i in range(len(itm)-N+1):
            # produce ngram (b)
            b  = ' '.join(itm[i:i+N-1]).lower()
            # produce an ngram (ab)
            ab = ' '.join(itm[i:i+N]).lower()
            # print("t:{}\tg:{}".format(t,g))
            gramsC.setdefault(b, {})
            gramsC[b].setdefault(ab, 0)
            # count the ngram
            gramsC[b][ab] += 1
    return(gramsC)


# compute probability of n-grams
# p(a | b ) == count(ab) / count(b) == count(g) / count(t)
# loop over all words
def nGramProbs1(gramsC) :
    nGrams = {}  # final n-gram dictionary with log-prob entries
    nMin = 0
    for b in gramsC:
        # v = number of n-grams that start with b
        v = float(sum(gramsC[b].values()))
        for ab in gramsC[b]:
            # np.log2(v/gramG[b][ab]) == - np.log2(gramG[b][ab]/v)
            nGrams[ab] = np.log2(v/gramsC[b][ab])
            if(nGrams[ab] > nMin): nMin = nGrams[ab]
#            print("ab:{:<20} count:{:<6}\tb:{:<10}  count:{}\tlog(1/p):{:<5.4}".format(ab, gramsC[b][ab], b, v,  nGrams[ab]))
    nGrams["|||OOV|||"] = nMin+1
    return(nGrams)

In [32]:
# compute perplexity of segments

def perplexity2(data, nGrams):
    N = 2
    oov = 0
    PP = []
    for seg in data:  
        itm = seg.copy()
        
        # !!! adjust starting symbol for different length of n-grams N
        itm.insert(0, "///")
        #itm.insert(0, "///")
        itm.append("///")

        H = 0
        for i in range(len(itm)-N):
            ab = ' '.join(itm[i:i+N]).lower()
            try: 
                H += nGrams[ab]
                print("nGram: {:20}\t{:4.4}".format(ab, nGrams[ab]))
            except:
                print("nGram: {:20}\t{:4.4}\tundef".format(ab, nGrams["|||OOV|||"]))
                H += nGrams["|||OOV|||"]
                oov += 1
        p = (2**H)**(1 / float(len(seg)))
        PP.append(p)
        print("PP:{:5.2f}\tlen:{}\tseg:{}".format(p, len(seg), seg))
    print("Number of OOV 2-grams: "+ str(oov))
    return(PP)

In [112]:
# compute perplexity of segments

def perplexity1(data, nGrams):
    N = 1
    oov = 0
    PP = []
    for seg in data:  
        itm = seg.copy()
        
        # !!! adjust starting symbol for different length of n-grams N
        #itm.insert(0, "///")
        #itm.insert(0, "///")
        #itm.append("///")

        H = 0
        for i in range(len(itm)-N):
            ab = ' '.join(itm[i:i+N]).lower()
            try: 
                H += nGrams[ab]
                print("nGram: {:20}\t{:4.4}".format(ab, nGrams[ab]))
            except:
                print("nGram: {:20}\t{:4.4}\tundef".format(ab, nGrams["|||OOV|||"]))
                H += nGrams["|||OOV|||"]
                oov++
        p = (2**H)**(1 / float(len(seg)))
        PP.append(p)
        print("PP:{:5.2f}\tlen:{}\tseg:{}".format(p, len(seg), seg))
    print("Number of OOV 3-grams: "+ str(oov))
    return(PP)

In [27]:
# compute perplexity of segments

def perplexity(data, nGrams):
    N = 3
    PP = []
    oov = 0
    for seg in data:  
        itm = seg.copy()
        
        # !!! adjust starting symbol for different length of n-grams N
        itm.insert(0, "///")
        itm.insert(0, "///")
        itm.append("///")

        H = 0
        for i in range(len(itm)-N):
            ab = ' '.join(itm[i:i+N]).lower()
            try: 
                H += nGrams[ab]
                print("nGram: {:20}\t{:4.4}".format(ab, nGrams[ab]))
            except:
                print("nGram: {:20}\t{:4.4}\tundef".format(ab, nGrams["|||OOV|||"]))
                H += nGrams["|||OOV|||"]
                oov += 1
        p = (2**H)**(1 / float(len(seg)))
        PP.append(p)
        print("PP:{:5.2f}\tlen:{}\tseg:{}".format(p, len(seg), seg))
    print("Number of OOV 3-grams: "+ str(oov))
    return(PP)

In [38]:
import nltk 

# prepare different sets of books 
books18 = NLTKbooks2Sent([nltk.book.text1, nltk.book.text2, nltk.book.text3,nltk.book.text4, nltk.book.text6, nltk.book.text7,nltk.book.text8])
books1 = NLTKbooks2Sent([nltk.book.text1])
books9 = NLTKbooks2Sent([nltk.book.text9])


In [None]:
# generate n-gram language models
NG18 = nGramProbs3(nGramCount3(books18))
NG1 = nGramProbs3(nGramCount3(books1))
NG9 = nGramProbs3(nGramCount3(books9))

In [102]:
# nltk.download('reuters')
from nltk.corpus import reuters
print(N)
#len(reuters.sents())
#print(reuters.readme())

2


In [48]:
reuter = nGramProbs(nGramCount(reuters.sents()))

In [49]:
perplexity([["This", "is", "a", "test"],["quite","unlikely","test"]], NG1)
perplexity([["This", "is", "a", "test"],["quite","unlikely","test"]], NG18)
perplexity([["This", "is", "a", "test"],["quite","unlikely","test"]], reuter)

nGram: ///                 	3.289
nGram: this                	7.717
nGram: is                  	7.41
nGram: a                   	5.953
nGram: test                	16.16
PP:1122.68	len:4	seg:['This', 'is', 'a', 'test']
nGram: ///                 	3.289
nGram: quite               	12.8
nGram: unlikely            	18.16
nGram: test                	16.16
PP:114578.63	len:3	seg:['quite', 'unlikely', 'test']
nGram: ///                 	3.255
nGram: this                	8.03
nGram: is                  	7.339
nGram: a                   	6.11
nGram: test                	13.71
PP:782.31	len:4	seg:['This', 'is', 'a', 'test']
nGram: ///                 	3.255
nGram: quite               	12.38
nGram: unlikely            	17.03
nGram: test                	13.71
PP:45090.29	len:3	seg:['quite', 'unlikely', 'test']
nGram: ///                 	3.522
nGram: this                	9.066
nGram: is                  	7.942
nGram: a                   	6.231
nGram: test                	14.31
PP:1232.01	len:4	seg

[1232.009700152163, 47284.55555491499]

In [None]:
# test language model on training test
perplexity(books9[:1], NG1)
perplexity(books9[:1], NG9)
perplexity(books9[:1], NG18)

In [None]:
# tasks:
#  add more data to the LM and assess impact on perplexity
#  generate language models with KENTstemmer/porter/snowball/wn stemmer
#  think of methods to reduce number of OOV words (numbers, proper names, etc)
#  compare 1-gram language model
#  compare 3-gram language model
#
#  Scramble (paraphrase) words of a sentence and compare perplexity with n-gram LM

In [None]:
import KENTstemmer3
import importlib
import nltk

importlib.reload(KENTstemmer3)
#print(my_book)

my_stemmed_book = []
dic = KENTstemmer3.readDictionary("tokenLemma.txt")

#tags = nltk.pos_tag(['accompanied','unlikely', 'lovable'])
for sent in books1:
    tags = nltk.pos_tag(sent)
    #print(tags)
    stems = []
    for tag in tags:
        stems.append(KENTstemmer3.KENTstemmer3(tag, dic))
    #print(stems)
    my_stemmed_book.append(stems)
print(my_stemmed_book)

In [6]:
import KENTstemmer3
import importlib
import nltk

importlib.reload(KENTstemmer3)
dic = KENTstemmer3.readDictionary("tokenLemma.txt")

def apply_kent_stemmer(book) :
    """
    params 
        book -> type nltk.Text for e.g. nltk.book.text1
             -> type a list of list for e.g. [ word_tokenize('This is a test') ]
                                             [word_tokenize("i am a good boy."), word_tokenize('how are you')]
             
    returns
        stemmed_book -> a list of list containing stemmed words of the params
    
    """
    my_stemmed_book = []
    for sent in book:
        tags = nltk.pos_tag(sent)
        #print(tags)
        stems = []
        for tag in tags:
            stems.append(KENTstemmer3.KENTstemmer3(tag, dic))
        #print(stems)
        my_stemmed_book.append(stems)
    #print(my_stemmed_book)
    return my_stemmed_book

In [50]:
stemmed_books1 = apply_kent_stemmer(books1)

In [62]:
NG1 = nGramProbs3(nGramCount3(books1))
perplexity(books9[18:21], NG1)

nGram: /// /// we          	7.765
nGram: /// we have         	4.524
nGram: we have found       	14.29	undef
nGram: have found common   	14.29	undef
nGram: found common things 	14.29	undef
nGram: common things at    	14.29	undef
nGram: things at last      	14.29	undef
nGram: at last and         	14.29	undef
nGram: last and marriage   	14.29	undef
nGram: and marriage and    	14.29	undef
nGram: marriage and a      	14.29	undef
nGram: and a creed         	14.29	undef
nGram: a creed ,           	14.29	undef
nGram: creed , and         	14.29	undef
nGram: , and i             	5.975
nGram: and i may           	6.476
nGram: i may safely        	14.29	undef
nGram: may safely write    	14.29	undef
nGram: safely write it     	14.29	undef
nGram: write it now        	14.29	undef
nGram: it now ,            	2.807
nGram: now , and           	4.285
nGram: , and you           	8.367
nGram: and you may         	3.807
nGram: you may safely      	14.29	undef
nGram: may safely read     	14.29	undef
nGram: s

[3292.2950254301463, 20014.0, 20014.0]

In [63]:
NG1_stemmed = nGramProbs3(nGramCount3(stemmed_books1))
perplexity(apply_kent_stemmer(books9[18:21]), NG1_stemmed)

nGram: /// /// we          	7.561
nGram: /// we have         	3.728
nGram: we have found       	14.29	undef
nGram: have found common   	14.29	undef
nGram: found common thing  	14.29	undef
nGram: common thing at     	14.29	undef
nGram: thing at last       	14.29	undef
nGram: at last and         	14.29	undef
nGram: last and marriage   	14.29	undef
nGram: and marriage and    	14.29	undef
nGram: marriage and a      	14.29	undef
nGram: and a cre           	14.29	undef
nGram: a cre ,             	14.29	undef
nGram: cre , and           	1.585
nGram: , and i             	5.875
nGram: and i may           	 6.7
nGram: i may safe          	14.29	undef
nGram: may safe write      	14.29	undef
nGram: safe write it       	14.29	undef
nGram: write it now        	14.29	undef
nGram: it now ,            	 3.0
nGram: now , and           	4.285
nGram: , and you           	7.666
nGram: and you may         	4.807
nGram: you may safe        	14.29	undef
nGram: may safe read       	14.29	undef
nGram: safe read

[2352.85555556207, 8170.6812853437605, 14152.035118667563]

In [2]:
%%time
bnc = []

with open('spring19/lemmaLexicon/BNC.seg', 'r') as f:
    for line in f :
        bnc.append(line.replace('\n',''))
f.close()  


CPU times: user 5.24 s, sys: 644 ms, total: 5.89 s
Wall time: 5.89 s


In [3]:
len(bnc)

6052201

In [None]:
%%time

bnc_book_tokens = [word_tokenize(l) for l in bnc[:100000] ]
stemmed_bnc_book = apply_kent_stemmer(bnc_book_tokens)

In [21]:
%%time
stemmed_bnc_book = []
for l in bnc[:1000000]:
    aaa = apply_kent_stemmer([word_tokenize(l)])
    stemmed_bnc_book.append(aaa[0])

CPU times: user 29min 51s, sys: 18.2 s, total: 30min 9s
Wall time: 30min 9s


In [22]:
len(stemmed_bnc_book)

1000000

In [23]:
NG_train_bnc = nGramProbs3(nGramCount3(stemmed_bnc_book))


In [33]:
NG_train_bnc2 = nGramProbs2(nGramCount2(stemmed_bnc_book))

In [113]:
NG_train_bnc1 = nGramProbs1(nGramCount1(stemmed_bnc_book))

In [118]:
NG_train_bnc

{'/// /// factsheet': 16.609640474436812,
 '/// /// aids': 12.802285552379209,
 '/// /// this': 5.5175440594460206,
 '/// /// how': 8.434714791936132,
 '/// /// through': 12.522177633186473,
 '/// /// from': 8.888541285729627,
 '/// /// it': 4.703877424834019,
 '/// /// the': 3.194691032962548,
 '/// /// often': 11.702749878828293,
 '/// /// do': 8.419815915556795,
 '/// /// there': 6.23026210736555,
 '/// /// 10': 12.90920075629572,
 '/// /// you': 6.839802630807371,
 '/// /// 7': 12.287712379549449,
 '/// /// woman': 12.150208855799514,
 '/// /// in': 5.197070627631602,
 '/// /// 1': 9.789461512021624,
 '/// /// &': 2.970412834089892,
 '/// /// useful': 13.802285552379209,
 '/// /// rais': 15.609640474436812,
 '/// /// whether': 10.965784284662087,
 '/// /// below': 12.90920075629572,
 '/// /// none': 11.75165947930924,
 '/// /// car': 13.287712379549449,
 '/// /// i': 5.031739637549842,
 '/// /// 2': 10.150208855799514,
 '/// /// jumble': 16.609640474436812,
 '/// /// 3': 10.4397154

In [29]:
perplexity(apply_kent_stemmer([word_tokenize(l) for l in bnc[5000000:5000003]]), NG_train_bnc)

nGram: /// /// i           	5.008
nGram: /// i 've           	5.36
nGram: i 've learnt        	8.638
nGram: 've learnt there    	20.93	undef
nGram: learnt there be     	 0.0
nGram: there be a          	2.823
nGram: be a lot            	6.924
nGram: a lot of            	0.6429
nGram: lot of car          	10.3
nGram: of car people       	7.033
nGram: car people in       	20.93	undef
nGram: people in this      	5.36
nGram: in this village     	10.12
nGram: this village ,      	3.129
nGram: village , they      	8.504
nGram: , they 're          	5.722
nGram: they 're keen       	20.93	undef
nGram: 're keen to         	 0.0
nGram: keen to help        	5.422
nGram: to help in          	5.469
nGram: help in give        	7.267
nGram: in give inform      	5.728
nGram: give inform .       	4.492
PP:171.69	len:23	seg:['i', "'ve", 'learnt', 'there', 'be', 'a', 'lot', 'of', 'car', 'people', 'in', 'this', 'village', ',', 'they', "'re", 'keen', 'to', 'help', 'in', 'give', 'inform', '.']
nGram: /// ///

[171.6871757926335, 576.9638281114514, 82902.6672289678]

In [34]:
perplexity2(apply_kent_stemmer([word_tokenize(l) for l in bnc[5000000:5000003]]), NG_train_bnc2)

nGram: /// i               	5.008
nGram: i 've               	5.749
nGram: 've learnt          	9.007
nGram: learnt there        	8.951
nGram: there be            	1.324
nGram: be a                	4.35
nGram: a lot               	7.194
nGram: lot of              	0.703
nGram: of car              	11.2
nGram: car people          	10.58
nGram: people in           	4.306
nGram: in this             	5.849
nGram: this village        	11.24
nGram: village ,           	3.019
nGram: , they              	6.822
nGram: they 're            	6.818
nGram: 're keen            	12.42
nGram: keen to             	1.411
nGram: to help             	7.848
nGram: help in             	5.091
nGram: in give             	12.83
nGram: give inform         	8.071
nGram: inform .            	3.81
PP:102.38	len:23	seg:['i', "'ve", 'learnt', 'there', 'be', 'a', 'lot', 'of', 'car', 'people', 'in', 'this', 'village', ',', 'they', "'re", 'keen', 'to', 'help', 'in', 'give', 'inform', '.']
nGram: /// it              	4.6

[102.38111844077585, 136.89898237494077, 91436.94426561019]

In [115]:
perplexity1(apply_kent_stemmer([word_tokenize(l) for l in bnc_sent[500000:500003]]), NG_train_bnc1)

nGram: at                  	7.761
nGram: oth                 	9.724
nGram: time                	9.43
nGram: ,                   	4.467
nGram: it                  	6.746
nGram: have                	6.66
nGram: pursu               	15.26
nGram: a                   	5.677
nGram: policy              	11.79
nGram: of                  	5.247
nGram: let                 	11.91
nGram: the                 	4.215
nGram: pound               	9.495
nGram: float               	15.35
PP:304.19	len:15	seg:['at', 'oth', 'time', ',', 'it', 'have', 'pursu', 'a', 'policy', 'of', 'let', 'the', 'pound', 'float', '.']
nGram: these               	10.18
nGram: policy              	11.79
nGram: be                  	5.552
nGram: it                  	6.746
nGram: own                 	10.39
PP:174.10	len:6	seg:['these', 'policy', 'be', 'it', 'own', '.']
nGram: in                  	5.904
nGram: any                 	9.953
nGram: case                	11.08
nGram: ,                   	4.467
nGram: the                 

[304.1909083832307, 174.1027351441073, 606.3685342526851]