In [61]:
import math
import re
from collections import defaultdict

## Part 3

> Preprocessing Train/Test Files

In [62]:
def preprocess(tfile):
    with open(tfile, 'r') as file:
        lst = file.read()
    pattern = r'[^\w\s]'
    text = re.sub(pattern, '', lst)
    text = text.lower()
    data = text.strip().split()
    return data

> Train the Model using Train Data

In [63]:
def train_model(train_data):
    unigrams = defaultdict(float)
    bigrams = defaultdict(float)
    
    for word in train_data:
        unigrams[word] = unigrams.get(word, 0) + 1

    for i in range(len(train_data) - 1):
        bigram = (train_data[i], train_data[i + 1])
        bigrams[bigram] = bigrams.get(bigram, 0) + 1
        
    return unigrams, bigrams

> Calculate Probabilities of Unigrams and Bigrams

In [64]:
def cal_probabilities(unigrams, bigrams):
    unigram_prob = defaultdict(float)
    total_words = sum(unigrams.values())   
    bigram_prob = defaultdict(float)

    for word in unigrams:
        unigram_prob[word] = unigrams.get(word, 0) / total_words
        
        
    for bigram in bigrams:
        prev_word,current_word = bigram
        bigram_prob[bigram] = bigrams.get(bigram,0) / unigrams.get(prev_word, 0)

    
    return unigram_prob, bigram_prob

In [65]:
training_file = "/Users/chaitradaddolu/Desktop/nlp/A1_DATASET/train.txt"
test_file = "/Users/chaitradaddolu/Desktop/nlp/A1_DATASET/val.txt"
train_data = preprocess(training_file)
test_data = preprocess(test_file)
N = len(test_data)
n = len(train_data)
print(N)
print(n)

8699
79399


In [66]:
unigrams, bigrams = train_model(train_data)
vocab_size = len(unigrams)

In [67]:
print(unigrams)



In [68]:
print(bigrams)



In [69]:
unigram_probablities_of_train_data, bigram_probablities_of_train_data = cal_probabilities(unigrams, bigrams)

In [70]:
for word in unigram_probablities_of_train_data:
    print(word, "-", unigram_probablities_of_train_data[word])

i - 0.02154938979080341
booked - 0.0010831370672174712
two - 0.0016121109837655386
rooms - 0.0025441126462549908
four - 0.00025189234121336543
months - 0.00010075693648534616
in - 0.01586921749644202
advance - 8.81623194246779e-05
at - 0.009382989710197862
the - 0.06677665965566316
talbott - 0.0003526492776987116
we - 0.014068187256766458
were - 0.00727968866106626
placed - 0.00010075693648534616
on - 0.008060554918827694
top - 0.0005037846824267309
floor - 0.001725462537311553
next - 0.001347624025491505
to - 0.026322749656796686
elevators - 0.00040302774594138465
which - 0.0022166526026776157
are - 0.003904331288807164
used - 0.00044081159712338946
all - 0.003501303542865779
night - 0.0025063287950729857
long - 0.0004911900653660626
when - 0.003438330457562438
speaking - 3.778385118200481e-05
front - 0.001586921749644202
desk - 0.002002544112646255
was - 0.022997770752780262
told - 0.0009697855136714569
that - 0.008765853474225116
they - 0.006171362359727452
simply - 0.00023929772415

towels - 0.0002896761923953702
aroma - 1.259461706066827e-05
very - 0.006007632337938765
narrow - 1.259461706066827e-05
near - 0.00027708157533470196
construction - 0.00036524389475937983
zone - 2.518923412133654e-05
plan - 0.00011335155354601444
waking - 2.518923412133654e-05
6 - 0.00021410849003136059
choose - 0.00023929772415269714
day - 0.0014609755790375193
asked - 0.0009068124283681155
concierge - 0.0006297308530334135
who - 0.0007556770236400962
decent - 0.00018891925591002406
guy - 0.00015113540472801924
moved - 0.00012594617060668272
larger - 0.0001637300217886875
more - 0.0013350294084308366
though - 0.0006801093212760866
by - 0.0020151387297069234
torture - 1.259461706066827e-05
hearing - 2.518923412133654e-05
sounds - 3.778385118200481e-05
stop - 0.0001763246388493558
late - 0.0003778385118200481
checkout - 0.00025189234121336543
1pm - 2.518923412133654e-05
refused - 2.518923412133654e-05
stating - 3.778385118200481e-05
charged - 0.0003022708094560385
half - 0.0002015138729

saturday - 0.00012594617060668272
slow - 0.0001763246388493558
waited - 0.00015113540472801924
10 - 0.000327460043577375
any - 0.0009068124283681155
given - 0.0003148654265167068
event - 0.00010075693648534616
red - 0.0001637300217886875
line - 0.00041562236300205294
trains - 2.518923412133654e-05
beneath - 1.259461706066827e-05
city - 0.0009823801307321251
streets - 5.037846824267308e-05
hear - 0.0003526492776987116
thunder - 1.259461706066827e-05
shouldnt - 1.259461706066827e-05
awake - 1.259461706066827e-05
toward - 6.297308530334136e-05
nw - 1.259461706066827e-05
airlines - 1.259461706066827e-05
bonus - 0.00010075693648534616
points - 0.00018891925591002406
ok - 0.00026448695827403367
cup - 7.556770236400962e-05
coffee - 0.000793460874822101
awful - 5.037846824267308e-05
wireless - 8.81623194246779e-05
cable - 3.778385118200481e-05
channels - 7.556770236400962e-05
fridge - 0.00020151387297069232
25 - 0.00015113540472801924
http - 1.259461706066827e-05
hotelscom - 2.518923412133654e

overlooked - 6.297308530334136e-05
hardly - 3.778385118200481e-05
bother - 7.556770236400962e-05
awakened - 1.259461706066827e-05
light - 0.00018891925591002406
sleeper - 3.778385118200481e-05
comfy - 0.00025189234121336543
lunch - 8.81623194246779e-05
meals - 1.259461706066827e-05
perfect - 0.0004911900653660626
12 - 0.00010075693648534616
ca - 0.00036524389475937983
beat - 0.00013854078766735098
adorable - 1.259461706066827e-05
pet - 5.037846824267308e-05
store - 0.00012594617060668272
tails - 1.259461706066827e-05
across - 0.0002896761923953702
brought - 0.0001763246388493558
dog - 3.778385118200481e-05
toys - 1.259461706066827e-05
safe - 0.00011335155354601444
neighborhoodi - 1.259461706066827e-05
myself - 0.00012594617060668272
hotwire - 8.81623194246779e-05
midlevel - 1.259461706066827e-05
blown - 5.037846824267308e-05
51night - 1.259461706066827e-05
social - 2.518923412133654e-05
watching - 6.297308530334136e-05
scene - 7.556770236400962e-05
swank - 1.259461706066827e-05
smooth 

16 - 0.00010075693648534616
nickle - 1.259461706066827e-05
dimed - 2.518923412133654e-05
bottles - 0.00011335155354601444
26 - 1.259461706066827e-05
minibottle - 1.259461706066827e-05
tipping - 1.259461706066827e-05
multiple - 5.037846824267308e-05
carrying - 1.259461706066827e-05
bags - 0.00012594617060668272
inhouse - 1.259461706066827e-05
zest - 5.037846824267308e-05
zagat - 1.259461706066827e-05
rated - 2.518923412133654e-05
refreshing - 2.518923412133654e-05
ordinary - 1.259461706066827e-05
unique - 6.297308530334136e-05
cheerful - 5.037846824267308e-05
plus - 0.00018891925591002406
goldfish - 6.297308530334136e-05
share - 5.037846824267308e-05
midmarch - 1.259461706066827e-05
carpets - 7.556770236400962e-05
looby - 1.259461706066827e-05
smelly - 2.518923412133654e-05
washroom - 1.259461706066827e-05
cheapest - 6.297308530334136e-05
costs - 8.81623194246779e-05
10day - 1.259461706066827e-05
connect - 1.259461706066827e-05
minimum - 3.778385118200481e-05
local - 5.037846824267308e-

showing - 3.778385118200481e-05
empathy - 1.259461706066827e-05
sit - 0.00018891925591002406
figured - 5.037846824267308e-05
tux - 1.259461706066827e-05
gown - 1.259461706066827e-05
apology - 8.81623194246779e-05
ruined - 1.259461706066827e-05
supposed - 0.0001637300217886875
poorly - 5.037846824267308e-05
contribute - 1.259461706066827e-05
married - 1.259461706066827e-05
steer - 2.518923412133654e-05
clear - 5.037846824267308e-05
august - 6.297308530334136e-05
2004 - 3.778385118200481e-05
defintely - 1.259461706066827e-05
converted - 2.518923412133654e-05
theater - 8.81623194246779e-05
vary - 1.259461706066827e-05
layout - 6.297308530334136e-05
sofas - 1.259461706066827e-05
chaise - 1.259461706066827e-05
lounges - 2.518923412133654e-05
simple - 0.00011335155354601444
munchies - 1.259461706066827e-05
vicinity - 1.259461706066827e-05
traffic - 0.00010075693648534616
peace - 1.259461706066827e-05
knowing - 1.259461706066827e-05
fiance - 2.518923412133654e-05
confirm - 5.037846824267308e-

dingy - 5.037846824267308e-05
apperance - 1.259461706066827e-05
lap - 2.518923412133654e-05
crumbs - 2.518923412133654e-05
drawers - 6.297308530334136e-05
soffitel - 1.259461706066827e-05
45 - 0.0001637300217886875
agreed - 5.037846824267308e-05
promise - 5.037846824267308e-05
uncomfortably - 3.778385118200481e-05
managment - 2.518923412133654e-05
bellhops - 1.259461706066827e-05
visibly - 1.259461706066827e-05
stressed - 1.259461706066827e-05
swim - 5.037846824267308e-05
accomodated - 1.259461706066827e-05
thinking - 6.297308530334136e-05
emailed - 1.259461706066827e-05
impossible - 7.556770236400962e-05
bailed - 1.259461706066827e-05
stair - 1.259461706066827e-05
wells - 1.259461706066827e-05
breaker - 1.259461706066827e-05
lame - 1.259461706066827e-05
posts - 2.518923412133654e-05
prominently - 1.259461706066827e-05
claiming - 1.259461706066827e-05
certain - 6.297308530334136e-05
dates - 2.518923412133654e-05
condescendingly - 1.259461706066827e-05
sell - 3.778385118200481e-05
frust

wonderfull - 1.259461706066827e-05
affraid - 1.259461706066827e-05
attend - 3.778385118200481e-05
dancing - 1.259461706066827e-05
orbitz - 1.259461706066827e-05
particularily - 1.259461706066827e-05
olympia - 1.259461706066827e-05
seattle - 2.518923412133654e-05
inspiring - 1.259461706066827e-05
biggest - 6.297308530334136e-05
disappointment - 8.81623194246779e-05
inadequate - 2.518923412133654e-05
hardwire - 1.259461706066827e-05
replacement - 2.518923412133654e-05
wire - 1.259461706066827e-05
pleasantly - 3.778385118200481e-05
covers - 5.037846824267308e-05
intense - 2.518923412133654e-05
trump - 5.037846824267308e-05
loyal - 1.259461706066827e-05
sad - 7.556770236400962e-05
truth - 5.037846824267308e-05
lodging - 2.518923412133654e-05
deteriorates - 1.259461706066827e-05
decades - 3.778385118200481e-05
changes - 3.778385118200481e-05
heating - 3.778385118200481e-05
cooling - 1.259461706066827e-05
systems - 1.259461706066827e-05
downhill - 2.518923412133654e-05
skyrocketed - 1.259461

flush - 3.778385118200481e-05
germ - 1.259461706066827e-05
freak - 1.259461706066827e-05
fully - 3.778385118200481e-05
teen - 1.259461706066827e-05
fccla - 1.259461706066827e-05
suffered - 1.259461706066827e-05
direction - 1.259461706066827e-05
describe - 1.259461706066827e-05
slumber - 1.259461706066827e-05
supervision - 1.259461706066827e-05
jump - 2.518923412133654e-05
teens - 2.518923412133654e-05
zoo - 1.259461706066827e-05
constant - 5.037846824267308e-05
pipes - 2.518923412133654e-05
thier - 2.518923412133654e-05
obnoxious - 1.259461706066827e-05
4200 - 1.259461706066827e-05
feed - 1.259461706066827e-05
meter - 1.259461706066827e-05
weekdays - 1.259461706066827e-05
sodassnacks - 1.259461706066827e-05
restocking - 1.259461706066827e-05
uploaded - 1.259461706066827e-05
opportunity - 1.259461706066827e-05
addition - 6.297308530334136e-05
wonder - 2.518923412133654e-05
counters - 1.259461706066827e-05
ruining - 1.259461706066827e-05
vendor - 1.259461706066827e-05
conversations - 3.7

funds - 1.259461706066827e-05
spreading - 1.259461706066827e-05
blame - 1.259461706066827e-05
plans - 2.518923412133654e-05
dime - 2.518923412133654e-05
encounter - 2.518923412133654e-05
theirs - 1.259461706066827e-05
knowledge - 2.518923412133654e-05
recession - 1.259461706066827e-05
accoutrements - 1.259461706066827e-05
labelled - 1.259461706066827e-05
ml - 1.259461706066827e-05
750 - 2.518923412133654e-05
careful - 2.518923412133654e-05
tray - 1.259461706066827e-05
goodies - 1.259461706066827e-05
snickers - 1.259461706066827e-05
resulted - 1.259461706066827e-05
900 - 1.259461706066827e-05
elected - 1.259461706066827e-05
track - 5.037846824267308e-05
successfully - 1.259461706066827e-05
responsive - 1.259461706066827e-05
corners - 2.518923412133654e-05
stocking - 1.259461706066827e-05
skipping - 1.259461706066827e-05
treats - 1.259461706066827e-05
memories - 1.259461706066827e-05
lukewarm - 2.518923412133654e-05
hedge - 1.259461706066827e-05
unpleasantness - 1.259461706066827e-05
mar

description - 2.518923412133654e-05
communal - 1.259461706066827e-05
green - 2.518923412133654e-05
efforts - 1.259461706066827e-05
incandescent - 1.259461706066827e-05
bulbs - 1.259461706066827e-05
radio - 5.037846824267308e-05
recycle - 1.259461706066827e-05
motorcyclers - 1.259461706066827e-05
prove - 1.259461706066827e-05
manhood - 1.259461706066827e-05
stairwell - 1.259461706066827e-05
questioning - 1.259461706066827e-05
toiletry - 1.259461706066827e-05
pacage - 1.259461706066827e-05
intruded - 1.259461706066827e-05
monoco - 7.556770236400962e-05
primarily - 2.518923412133654e-05
washington - 6.297308530334136e-05
dc - 5.037846824267308e-05
accustomed - 2.518923412133654e-05
papers - 2.518923412133654e-05
honored - 3.778385118200481e-05
bothering - 2.518923412133654e-05
baffling - 2.518923412133654e-05
5nights - 1.259461706066827e-05
barrestaurant - 1.259461706066827e-05
regard - 1.259461706066827e-05
boasted - 1.259461706066827e-05
accomodate - 1.259461706066827e-05
mine - 2.51892

27th - 1.259461706066827e-05
125night - 1.259461706066827e-05
unusually - 1.259461706066827e-05
host - 1.259461706066827e-05
client - 1.259461706066827e-05
9pm - 1.259461706066827e-05
treadmills - 1.259461706066827e-05
sessions - 2.518923412133654e-05
hvac - 2.518923412133654e-05
112pm - 1.259461706066827e-05
bite - 1.259461706066827e-05
sofabed - 1.259461706066827e-05
sofabeds - 1.259461706066827e-05
theoretically - 1.259461706066827e-05
control - 3.778385118200481e-05
imprecise - 1.259461706066827e-05
tolerable - 1.259461706066827e-05
golden - 3.778385118200481e-05
corral - 1.259461706066827e-05
professionally - 1.259461706066827e-05
liitle - 1.259461706066827e-05
riff - 1.259461706066827e-05
raff - 1.259461706066827e-05
leading - 1.259461706066827e-05
40000 - 1.259461706066827e-05
flushed - 1.259461706066827e-05
hands - 1.259461706066827e-05
tools - 1.259461706066827e-05
knikerbocker - 1.259461706066827e-05
116 - 1.259461706066827e-05
ho - 2.518923412133654e-05
skinny - 1.2594617060

horns - 1.259461706066827e-05
sirens - 1.259461706066827e-05
drunk - 3.778385118200481e-05
screw - 1.259461706066827e-05
sticking - 1.259461706066827e-05
dim - 1.259461706066827e-05
alcove - 1.259461706066827e-05
bench - 1.259461706066827e-05
watts - 1.259461706066827e-05
recoded - 1.259461706066827e-05
succeeded - 1.259461706066827e-05
steam - 1.259461706066827e-05
diameter - 1.259461706066827e-05
wakeup - 1.259461706066827e-05
necessary - 1.259461706066827e-05
quieter - 1.259461706066827e-05
milennium - 1.259461706066827e-05
festivals - 1.259461706066827e-05
maria - 1.259461706066827e-05
concierges - 2.518923412133654e-05
ceilings - 1.259461706066827e-05
surrounded - 1.259461706066827e-05
begun - 2.518923412133654e-05
slowly - 1.259461706066827e-05
category - 1.259461706066827e-05
snobbishness - 1.259461706066827e-05
inexcusable - 2.518923412133654e-05
aspect - 1.259461706066827e-05
brim - 1.259461706066827e-05
overflows - 1.259461706066827e-05
eleven - 1.259461706066827e-05
comparis

In [71]:
for bigram in bigram_probablities_of_train_data:
    print(bigram, "-", bigram_probablities_of_train_data[bigram])

('i', 'booked') - 0.012273524254821741
('booked', 'two') - 0.011627906976744186
('two', 'rooms') - 0.0234375
('rooms', 'four') - 0.0049504950495049506
('four', 'months') - 0.05
('months', 'in') - 0.25
('in', 'advance') - 0.005555555555555556
('advance', 'at') - 0.14285714285714285
('at', 'the') - 0.44966442953020136
('the', 'talbott') - 0.004903809883062995
('talbott', 'we') - 0.07142857142857142
('we', 'were') - 0.1602506714413608
('were', 'placed') - 0.0017301038062283738
('placed', 'on') - 0.25
('on', 'the') - 0.3640625
('the', 'top') - 0.002263296869105998
('top', 'floor') - 0.125
('floor', 'next') - 0.0072992700729927005
('next', 'to') - 0.2336448598130841
('to', 'the') - 0.12679425837320574
('the', 'elevators') - 0.002074688796680498
('elevators', 'which') - 0.0625
('which', 'are') - 0.017045454545454544
('are', 'used') - 0.0032258064516129032
('used', 'all') - 0.02857142857142857
('all', 'night') - 0.046762589928057555
('night', 'long') - 0.020100502512562814
('long', 'when') - 

('a', '3star') - 0.0004450378282153983
('3star', 'hotel') - 1.0
('hotel', 'like') - 0.003861003861003861
('like', 'the') - 0.09722222222222222
('the', 'website') - 0.0013202565069784986
('website', 'lists') - 0.037037037037037035
('lists', 'its') - 1.0
('its', 'at') - 0.016129032258064516
('at', 'least') - 0.026845637583892617
('least', '4star') - 0.03571428571428571
('4star', 'the') - 0.25
('the', 'hard') - 0.004337985665786495
('hard', 'rock') - 0.574468085106383
('rock', 'hotel') - 0.30303030303030304
('hotel', 'chicago') - 0.0019305019305019305
('chicago', 'has') - 0.021341463414634148
('has', 'become') - 0.008
('become', 'my') - 1.0
('my', 'favorite') - 0.009398496240601503
('favorite', 'hotel') - 0.3333333333333333
('i', 've') - 0.025131502045587374
('ve', 'stayed') - 0.2545454545454545
('stayed', 'there') - 0.034482758620689655
('there', 'at') - 0.005555555555555556
('least', '5') - 0.03571428571428571
('5', 'times') - 0.02040816326530612
('times', 'now') - 0.023255813953488372


('by', 'the') - 0.31875
('elevator', 'and') - 0.16666666666666666
('and', 'it') - 0.024681835711531045
('it', 'was') - 0.24076147816349383
('was', 'torture') - 0.000547645125958379
('torture', 'hearing') - 1.0
('hearing', 'the') - 0.5
('the', 'sounds') - 0.0001886080724254998
('sounds', 'of') - 0.3333333333333333
('elevator', 'which') - 0.06666666666666667
('which', 'just') - 0.005681818181818182
('just', 'would') - 0.010471204188481676
('would', 'never') - 0.03058103975535168
('never', 'stop') - 0.009174311926605505
('stop', 'i') - 0.07142857142857142
('for', 'late') - 0.0009746588693957114
('late', 'checkout') - 0.16666666666666666
('checkout', 'at') - 0.1
('at', '1pm') - 0.0013422818791946308
('1pm', 'and') - 0.5
('receptionist', 'refused') - 0.08333333333333333
('refused', 'stating') - 0.5
('stating', 'that') - 0.6666666666666666
('that', 'we') - 0.07327586206896551
('be', 'charged') - 0.011111111111111112
('charged', 'for') - 0.25
('a', 'half') - 0.0013351134846461949
('half', 'da

('to', 'be') - 0.05550239234449761
('be', 'friendly') - 0.007407407407407408
('friendly', 'and') - 0.3592233009708738
('and', 'helpful') - 0.010026995757809487
('helpful', 'we') - 0.061855670103092786
('we', 'will') - 0.020590868397493287
('will', 'be') - 0.174496644295302
('be', 'return') - 0.003703703703703704
('return', 'guests') - 0.037037037037037035
('guests', 'many') - 0.03773584905660377
('many', 'times') - 0.08571428571428572
('times', 'in') - 0.046511627906976744
('the', 'future') - 0.0013202565069784986
('future', 'for') - 0.125
('for', 'sure') - 0.003898635477582846
('sure', 'after') - 0.029411764705882353
('after', 'leaving') - 0.015151515151515152
('leaving', 'some') - 0.09090909090909091
('some', 'important') - 0.0078125
('important', 'documents') - 0.16666666666666666
('documents', 'in') - 1.0
('room', 'i') - 0.02478448275862069
('i', 'called') - 0.018702513150204558
('called', 'and') - 0.04081632653061224
('and', 'asked') - 0.003470883146934053
('the', 'lost') - 0.0001

('shouldnt', 'keep') - 1.0
('keep', 'you') - 0.06666666666666667
('you', 'awake') - 0.002242152466367713
('awake', 'at') - 1.0
('at', 'night') - 0.022818791946308724
('night', 'used') - 0.005025125628140704
('used', 'my') - 0.02857142857142857
('my', 'stay') - 0.03383458646616541
('stay', 'toward') - 0.002457002457002457
('toward', 'nw') - 0.2
('nw', 'airlines') - 1.0
('airlines', 'bonus') - 1.0
('bonus', 'points') - 0.125
('points', 'bar') - 0.06666666666666667
('bar', 'was') - 0.13186813186813187
('was', 'ok') - 0.004928806133625411
('ok', 'one') - 0.047619047619047616
('one', 'cup') - 0.0043859649122807015
('cup', 'coffee') - 0.16666666666666666
('coffee', 'service') - 0.06349206349206349
('was', 'awful') - 0.001095290251916758
('awful', 'no') - 0.25
('no', 'free') - 0.023346303501945526
('free', 'wireless') - 0.033707865168539325
('wireless', 'and') - 0.14285714285714285
('and', 'no') - 0.004242190512919399
('free', 'cable') - 0.011235955056179775
('cable', 'channels') - 0.66666666

('comfortable', 'housekeepingturn') - 0.008849557522123894
('housekeepingturn', 'down') - 1.0
('down', 'service') - 0.053763440860215055
('service', 'were') - 0.015748031496062992
('efficient', 'and') - 0.25
('and', 'thorough') - 0.0007713073659853452
('thorough', 'bathroom') - 0.5
('bathroom', 'amenities') - 0.022900763358778626
('amenities', 'were') - 0.0967741935483871
('and', 'replaced') - 0.0003856536829926726
('replaced', 'daily') - 0.14285714285714285
('daily', 'this') - 0.16666666666666666
('is', 'definitely') - 0.0035842293906810036
('definitely', 'my') - 0.018867924528301886
('my', 'first') - 0.02819548872180451
('first', 'choice') - 0.01834862385321101
('choice', 'for') - 0.3333333333333333
('a', 'hotel') - 0.021806853582554516
('chicago', 'it') - 0.01524390243902439
('it', 'is') - 0.09854423292273236
('is', 'also') - 0.014336917562724014
('also', 'very') - 0.04861111111111111
('very', 'close') - 0.014675052410901468
('to', 'all') - 0.005741626794258373
('all', 'the') - 0.25

('helped', 'me') - 0.42857142857142855
('me', 'print') - 0.004048582995951417
('print', 'boarding') - 0.3333333333333333
('boarding', 'passes') - 0.8
('passes', 'negative') - 0.25
('negative', 'it') - 0.13333333333333333
('is', 'frustrating') - 0.0011947431302270011
('frustrating', 'that') - 0.3333333333333333
('that', 'nowadays') - 0.0014367816091954023
('nowadays', 'you') - 1.0
('you', 'do') - 0.02242152466367713
('have', 'free') - 0.0045871559633027525
('free', 'internet') - 0.10112359550561797
('internet', 'in') - 0.09090909090909091
('in', 'most') - 0.002380952380952381
('most', 'hotels') - 0.14
('hotels', 'and') - 0.037383177570093455
('it', 's') - 0.09070548712206047
('s', 'the') - 0.037037037037037035
('same', 'here') - 0.02631578947368421
('to', 'pay') - 0.010526315789473684
('the', 'internet') - 0.002640513013956997
('internet', 'negative') - 0.01818181818181818
('negative', 'loud') - 0.06666666666666667
('loud', 'on') - 0.0625
('the', 'phone') - 0.0058468502451904944
('phone

('concrete', 'and') - 0.2
('and', 'thick') - 0.0003856536829926726
('thick', 'we') - 0.5
('we', 'heard') - 0.0035810205908683975
('heard', 'nothing') - 0.09090909090909091
('nothing', 'both') - 0.022222222222222223
('both', 'nights') - 0.12195121951219512
('nights', 'we') - 0.0875
('were', 'there') - 0.02249134948096886
('there', 'highly') - 0.002777777777777778
('highly', 'recommend') - 0.5357142857142857
('hotel', 'rooms') - 0.005791505791505791
('are', 'small') - 0.025806451612903226
('small', 'but') - 0.10714285714285714
('not', 'crowded') - 0.0016286644951140066
('crowded', 'for') - 0.25
('two', 'people') - 0.03125
('people', 'they') - 0.012658227848101266
('they', 'are') - 0.044897959183673466
('are', 'old') - 0.0064516129032258064
('old', 'style') - 0.03508771929824561
('style', 'hotel') - 0.1
('hotel', 'roomsvery') - 0.0009652509652509653
('roomsvery', 'classic') - 1.0
('classic', 'i') - 0.14285714285714285
('am', 'staying') - 0.028985507246376812
('staying', 'here') - 0.137931

('fyi', 'rooms') - 0.5
('rooms', 'on') - 0.009900990099009901
('on', 'floors') - 0.0015625
('floors', '15') - 0.07142857142857142
('15', 'and') - 0.045454545454545456
('and', 'below') - 0.0007713073659853452
('below', 'were') - 0.08333333333333333
('were', 'renovated') - 0.0017301038062283738
('year', 'rest') - 0.02702702702702703
('rest', 'were') - 0.1111111111111111
('were', 'done') - 0.0017301038062283738
('done', '5') - 0.041666666666666664
('5', 'years') - 0.02040816326530612
('ago', 'for') - 0.1111111111111111
('price', 'it') - 0.025
('great', 'bargain') - 0.0028328611898017
('bargain', 'only') - 0.16666666666666666
('only', 'complaint') - 0.05
('is', 'bathroom') - 0.0011947431302270011
('bathroom', 'is') - 0.061068702290076333
('is', 'small') - 0.007168458781362007
('small', 'even') - 0.008928571428571428
('even', 'though') - 0.15436241610738255
('the', 'reviews') - 0.002640513013956997
('reviews', 'of') - 0.02
('hotel', 'are') - 0.0028957528957528956
('thing', 'about') - 0.0789

('in', 'handy') - 0.002380952380952381
('handy', 'it') - 0.2
('it', 'rained') - 0.0011198208286674132
('rained', 'the') - 1.0
('whole', 'time') - 0.25
('was', 'in') - 0.016976998904709748
('internet', 'connection') - 0.05454545454545454
('connection', 'is') - 0.25
('is', '99524hrs') - 0.0011947431302270011
('99524hrs', 'great') - 1.0
('place', 'i') - 0.05555555555555555
('i', 'will') - 0.02571595558153127
('will', 'return') - 0.020134228187919462
('return', 'for') - 0.037037037037037035
('sure', 'i') - 0.11764705882352941
('can', 'see') - 0.05504587155963303
('see', 'how') - 0.044444444444444446
('how', 'this') - 0.05405405405405406
('place', 'must') - 0.011111111111111112
('must', 'have') - 0.34782608695652173
('been', 'gorgeous') - 0.008928571428571428
('gorgeous', 'at') - 0.125
('at', 'one') - 0.010738255033557046
('one', 'time') - 0.008771929824561403
('time', 'but') - 0.02027027027027027
('they', 'really') - 0.006122448979591836
('really', 'need') - 0.00909090909090909
('need', 'a

('super', 'great') - 0.06666666666666667
('great', 'helpful') - 0.0028328611898017
('helpful', 'friendly') - 0.010309278350515464
('staff', 'angie') - 0.0037313432835820895
('angie', 'at') - 0.5
('desk', 'made') - 0.006289308176100629
('made', 'sure') - 0.03488372093023256
('sure', 'my') - 0.08823529411764706
('my', 'husbands') - 0.0018796992481203006
('husbands', 'birthday') - 1.0
('birthday', 'would') - 0.125
('be', 'great') - 0.007407407407407408
('great', 'our') - 0.0028328611898017
('was', 'beautiful') - 0.004381161007667032
('beautiful', 'angie') - 0.014705882352941176
('angie', 'suggested') - 0.5
('suggested', 'we') - 0.09090909090909091
('we', 'go') - 0.004476275738585497
('to', 'sullivans') - 0.0004784688995215311
('sullivans', 'for') - 1.0
('for', 'dinnerperfect') - 0.0009746588693957114
('dinnerperfect', 'choice') - 1.0
('choice', 'make') - 0.05555555555555555
('make', 'the') - 0.07317073170731707
('the', 'reservation') - 0.003017729158807997
('us', 'best') - 0.0046511627906

('never', 'would') - 0.009174311926605505
('would', 'come') - 0.01529051987767584
('come', 'on') - 0.1111111111111111
('it', 'seems') - 0.005599104143337066
('seems', 'that') - 0.18181818181818182
('conrad', 'thinks') - 0.037037037037037035
('thinks', 'it') - 1.0
('it', 'can') - 0.0022396416573348264
('can', 'spend') - 0.009174311926605505
('spend', 'a') - 0.18181818181818182
('of', 'money') - 0.0019120458891013384
('money', 'to') - 0.06060606060606061
('make', 'itself') - 0.024390243902439025
('itself', 'look') - 0.047619047619047616
('look', 'beautiful') - 0.03571428571428571
('beautiful', 'without') - 0.014705882352941176
('without', 'worrying') - 0.029411764705882353
('worrying', 'about') - 1.0
('about', 'each') - 0.005555555555555556
('each', 'customer') - 0.03225806451612903
('customer', 's') - 0.02702702702702703
('s', 'experience') - 0.004629629629629629
('experience', 'i') - 0.08450704225352113
('think', 'they') - 0.07692307692307693
('they', 'go') - 0.006122448979591836
('go'

('the', 'sidewalk') - 0.0003772161448509996
('sidewalk', 'that') - 0.3333333333333333
('that', 'has') - 0.007183908045977011
('has', 'flowers') - 0.008
('flowers', 'etc') - 0.125
('etc', 'it') - 0.05
('it', 'looks') - 0.0022396416573348264
('a', 'scene') - 0.0008900756564307966
('scene', 'out') - 0.16666666666666666
('of', 'europe') - 0.0028680688336520078
('europe', 'i') - 0.2
('there', 'with') - 0.005555555555555556
('with', 'our') - 0.015
('our', 'daughters') - 0.0021551724137931034
('daughters', 'ages') - 0.3333333333333333
('ages', '10') - 0.5
('10', 'and') - 0.038461538461538464
('and', '13') - 0.0007713073659853452
('13', 'we') - 0.1111111111111111
('arrived', 'at') - 0.2698412698412698
('omni', 'on') - 0.029411764705882353
('on', '2nd') - 0.0015625
('2nd', 'september') - 0.14285714285714285
('september', 'for') - 0.16666666666666666
('a', '6') - 0.0004450378282153983
('6', 'day') - 0.058823529411764705
('day', 'stay') - 0.02586206896551724
('took', 'ill') - 0.02040816326530612


('reluctant', 'to') - 1.0
('use', 'this') - 0.021739130434782608
('this', 'service') - 0.0016835016835016834
('and', 'this') - 0.004242190512919399
('in', 'future') - 0.0007936507936507937
('a', 'hilton') - 0.0008900756564307966
('hilton', 'hotel') - 0.04081632653061224
('very', 'unimpressed') - 0.0041928721174004195
('unimpressed', 'the') - 0.3333333333333333
('are', 'incredibly') - 0.0032258064516129032
('incredibly', 'small') - 0.08333333333333333
('small', 'there') - 0.017857142857142856
('are', 'no') - 0.012903225806451613
('no', 'refrigirator') - 0.0038910505836575876
('refrigirator', 'or') - 1.0
('or', 'coffee') - 0.012269938650306749
('coffee', 'machine') - 0.031746031746031744
('machine', 'for') - 0.16666666666666666
('for', 'coffee') - 0.0009746588693957114
('coffee', 'or') - 0.031746031746031744
('or', 'tea') - 0.006134969325153374
('tea', 'wifi') - 0.125
('wifi', 'costs') - 0.037037037037037035
('costs', 'an') - 0.14285714285714285
('an', 'arm') - 0.004464285714285714
('arm

('here', 'from') - 0.006172839506172839
('from', 'nov') - 0.0027397260273972603
('nov', '30') - 1.0
('30', 'to') - 0.125
('to', 'dec') - 0.0004784688995215311
('dec', '2') - 0.5
('2', 'and') - 0.01904761904761905
('wonderful', 'time') - 0.06557377049180328
('time', 'the') - 0.0472972972972973
('is', 'just') - 0.013142174432497013
('just', 'beautiful') - 0.005235602094240838
('beautiful', 'and') - 0.14705882352941177
('excellent', 'from') - 0.01639344262295082
('from', 'check') - 0.010958904109589041
('in', 'to') - 0.005555555555555556
('the', 'maid') - 0.002074688796680498
('maid', 'staff') - 0.08333333333333333
('the', 'bartenders') - 0.0001886080724254998
('bartenders', 'in') - 0.5
('in', 'kitty') - 0.0007936507936507937
('kitty', 'oshea') - 0.75
('oshea', 's') - 1.0
('s', 'we') - 0.009259259259259259
('king', 'bed') - 0.36363636363636365
('bed', 'that') - 0.022988505747126436
('had', 'very') - 0.003703703703703704
('nice', 'feather') - 0.0056179775280898875
('feather', 'pillows') - 

('phone', 'to') - 0.03508771929824561
('get', 'help') - 0.005555555555555556
('help', 'luckily') - 0.03571428571428571
('luckily', 'right') - 0.2
('right', 'after') - 0.015873015873015872
('after', 'that') - 0.015151515151515152
('that', 'an') - 0.0028735632183908046
('an', 'employee') - 0.008928571428571428
('employee', 'who') - 0.2
('was', 'randomly') - 0.000547645125958379
('randomly', 'walking') - 1.0
('walking', 'by') - 0.023255813953488372
('by', 'asked') - 0.00625
('asked', 'me') - 0.041666666666666664
('been', 'waiting') - 0.008928571428571428
('waiting', 'long') - 0.05263157894736842
('long', 'as') - 0.05128205128205128
('as', 'if') - 0.029508196721311476
('if', 'she') - 0.005
('she', 'knew') - 0.014925373134328358
('knew', 'this') - 0.26666666666666666
('problem', 'i') - 0.05263157894736842
('said', 'no') - 0.01639344262295082
('no', 'and') - 0.0038910505836575876
('and', 'she') - 0.0011569610489780178
('she', 'called') - 0.04477611940298507
('check', 'apparently') - 0.012345

('the', 'vacuuming') - 0.0001886080724254998
('vacuuming', 'the') - 1.0
('bed', 'skirt') - 0.005747126436781609
('skirt', 'had') - 1.0
('a', 'foreign') - 0.0004450378282153983
('foreign', 'stain') - 1.0
('stain', 'on') - 0.75
('the', 'honor') - 0.0003772161448509996
('honor', 'barfridge') - 0.125
('barfridge', 'was') - 1.0
('was', 'gouged') - 0.000547645125958379
('gouged', 'and') - 1.0
('and', 'dirty') - 0.0011569610489780178
('dirty', 'too') - 0.034482758620689655
('too', 'the') - 0.028169014084507043
('the', 'mirror') - 0.0003772161448509996
('mirror', 'was') - 0.16666666666666666
('beds', 'appeared') - 0.013333333333333334
('been', 'jumped') - 0.008928571428571428
('jumped', 'on') - 1.0
('on', 'not') - 0.003125
('not', 'neat') - 0.0016286644951140066
('neat', 'and') - 0.25
('and', 'tidy') - 0.0007713073659853452
('tidy', 'at') - 0.6666666666666666
('there', 'were') - 0.06388888888888888
('were', 'boxes') - 0.0017301038062283738
('boxes', 'of') - 1.0
('of', 'trash') - 0.000956022944

('because', 'they') - 0.06862745098039216
('not', 'been') - 0.013029315960912053
('been', 'used') - 0.008928571428571428
('used', 'for') - 0.05714285714285714
('for', 'sometime') - 0.0009746588693957114
('sometime', 'however') - 1.0
('however', 'the') - 0.13157894736842105
('the', 'microwave') - 0.0001886080724254998
('oven', 'was') - 0.5
('very', 'useful') - 0.0041928721174004195
('useful', 'as') - 0.3333333333333333
('can', 'heat') - 0.009174311926605505
('heat', 'up') - 0.16666666666666666
('up', 'food') - 0.00546448087431694
('food', 'to') - 0.01694915254237288
('night', 'you') - 0.005025125628140704
('easily', 'walk') - 0.15384615384615385
('mile', 'by') - 0.02127659574468085
('by', 'cutting') - 0.00625
('cutting', 'across') - 1.0
('shopping', 'mall') - 0.07142857142857142
('mall', 'across') - 0.1
('the', 'road') - 0.0003772161448509996
('road', 'do') - 0.25
('do', 'check') - 0.012269938650306749
('out', 'pizzeria') - 0.00510204081632653
('pizzeria', 'uno') - 1.0
('uno', 'which') 

('when', 'you') - 0.05860805860805861
('you', 'came') - 0.002242152466367713
('chicago', 'on') - 0.012195121951219513
('college', 'roadtrip') - 0.25
('roadtrip', 'i') - 1.0
('did', 'but') - 0.004048582995951417
('but', 'that') - 0.018480492813141684
('the', 'case') - 0.0005658242172764995
('case', 'any') - 0.1
('any', 'more') - 0.027777777777777776
('more', 'this') - 0.009433962264150943
('property', 'is') - 0.14285714285714285
('is', 'directly') - 0.0011947431302270011
('directly', 'across') - 0.14285714285714285
('for', '12') - 0.0009746588693957114
('price', 'you') - 0.0375
('get', 'their') - 0.005555555555555556
('their', 'great') - 0.00909090909090909
('location', 'with') - 0.004608294930875576
('a', 'newer') - 0.0008900756564307966
('newer', 'albeit') - 0.25
('albeit', 'smaller') - 1.0
('smaller', 'room') - 0.125
('and', 'every') - 0.0023139220979560356
('every', 'ammenity') - 0.01694915254237288
('ammenity', 'you') - 1.0
('would', 'expect') - 0.01529051987767584
('expect', 'from

('a', 'band') - 0.0004450378282153983
('band', 'that') - 0.3333333333333333
('that', 'what') - 0.0028735632183908046
('what', 'in') - 0.00819672131147541
('town', 'the') - 0.13043478260869565
('was', 'amazing') - 0.0038335158817086527
('amazing', 'just') - 0.047619047619047616
('just', 'walking') - 0.010471204188481676
('walking', 'in') - 0.046511627906976744
('was', 'gorgeous') - 0.0016429353778751369
('gorgeous', 'i') - 0.25
('have', 'no') - 0.016055045871559634
('complaints', 'prompt') - 0.08333333333333333
('prompt', 'room') - 0.4
('service', 'friendly') - 0.003937007874015748
('friendly', 'front') - 0.009708737864077669
('desk', 'many') - 0.006289308176100629
('many', 'shops') - 0.014285714285714285
('shops', 'beautiful') - 0.1111111111111111
('beautiful', 'fountin') - 0.014705882352941176
('fountin', 'just') - 1.0
('just', 'an') - 0.020942408376963352
('an', 'all') - 0.004464285714285714
('all', 'around') - 0.014388489208633094
('around', 'great') - 0.02127659574468085
('great', 

('in', 'less') - 0.0015873015873015873
('than', '2') - 0.010101010101010102
('2', 'weeks') - 0.009523809523809525
('weeks', 'my') - 0.1111111111111111
('my', 'fiance') - 0.0037593984962406013
('fiance', 'called') - 0.5
('called', 'last') - 0.01020408163265306
('last', 'week') - 0.057692307692307696
('week', 'to') - 0.08571428571428572
('to', 'confirm') - 0.0014354066985645933
('confirm', 'the') - 0.5
('until', 'today') - 0.030303030303030304
('today', 'when') - 0.16666666666666666
('they', 'called') - 0.004081632653061225
('to', 'bump') - 0.0004784688995215311
('bump', 'us') - 1.0
('no', 'upgrades') - 0.0038910505836575876
('upgrades', 'no') - 0.5
('no', 'discounts') - 0.0038910505836575876
('discounts', 'apparently') - 1.0
('apparently', 'it') - 0.1111111111111111
('is', 'my') - 0.0047789725209080045
('my', 'problem') - 0.0037593984962406013
('problem', 'that') - 0.02631578947368421
('that', 'their') - 0.004310344827586207
('their', 'manager') - 0.00909090909090909
('manager', 'ca') -

('difference', 'all') - 0.125
('all', 'that') - 0.01079136690647482
('that', 'said') - 0.0028735632183908046
('said', 'what') - 0.01639344262295082
('hotel', 'thanks') - 0.0009652509652509653
('thanks', 'omni') - 0.1111111111111111
('omni', 'i') - 0.11764705882352941
('week', 'with') - 0.05714285714285714
('family', 'this') - 0.05555555555555555
('huge', 'so') - 0.029411764705882353
('so', 'clean') - 0.014285714285714285
('clean', 'has') - 0.007692307692307693
('has', 'comfy') - 0.008
('comfy', 'beds') - 0.3
('beds', 'foods') - 0.013333333333333334
('foods', 'great') - 1.0
('great', 'staff') - 0.0084985835694051
('staff', 'couldn') - 0.0037313432835820895
('couldn', 't') - 1.0
('t', 'of') - 0.3333333333333333
('of', 'being') - 0.0038240917782026767
('being', 'any') - 0.015873015873015872
('any', 'nicer') - 0.013888888888888888
('nicer', 'stayed') - 0.18181818181818182
('for', 'thanksgiving') - 0.001949317738791423
('thanksgiving', 'it') - 0.3333333333333333
('fabulous', 'second') - 0.0

('the', 'basics') - 0.0001886080724254998
('basics', 'of') - 0.5
('a', 'less') - 0.0013351134846461949
('less', 'trendy') - 0.07142857142857142
('trendy', 'hotel') - 0.16666666666666666
('in', 'at') - 0.0031746031746031746
('at', '7') - 0.0026845637583892616
('7', 'pm') - 0.1
('pm', 'and') - 0.125
('and', 'our') - 0.004627844195912071
('ready', 'when') - 0.043478260869565216
('key', 'the') - 0.058823529411764705
('maid', 'was') - 0.08333333333333333
('still', 'in') - 0.061224489795918366
('room', 'making') - 0.0010775862068965517
('making', 'it') - 0.2222222222222222
('it', 'up') - 0.004479283314669653
('wait', 'in') - 0.20689655172413793
('hallway', 'on') - 0.1
('weekend', 'i') - 0.046875
('back', 'for') - 0.03125
('a', 'nap') - 0.0008900756564307966
('nap', 'at') - 0.3333333333333333
('at', 'about') - 0.004026845637583893
('about', '330') - 0.011111111111111112
('330', 'pm') - 0.6666666666666666
('pm', 'after') - 0.125
('after', 'vacating') - 0.007575757575757576
('vacating', 'the') 

('and', 'months') - 0.0003856536829926726
('in', 'advanced') - 0.0015873015873015873
('advanced', 'when') - 0.5
('monaco', 'they') - 0.041666666666666664
('us', 'that') - 0.04186046511627907
('they', 'doubled') - 0.0020408163265306124
('doubled', 'booked') - 1.0
('room', 'this') - 0.004310344827586207
('was', 'after') - 0.001095290251916758
('after', 'many') - 0.007575757575757576
('many', 'emails') - 0.014285714285714285
('emails', 'to') - 0.5
('hotel', 'inquiring') - 0.0009652509652509653
('inquiring', 'about') - 1.0
('and', 'if') - 0.0015426147319706903
('if', 'it') - 0.035
('they', 'tried') - 0.006122448979591836
('it', 'by') - 0.0033594624860022394
('by', 'providing') - 0.00625
('providing', 'us') - 0.6666666666666666
('us', 'with') - 0.023255813953488372
('wonderful', 'view') - 0.04918032786885246
('and', 'giving') - 0.0003856536829926726
('giving', 'us') - 0.14285714285714285
('a', 'ten') - 0.0004450378282153983
('ten', 'dollar') - 0.2
('dollar', 'gift') - 0.3333333333333333
('g

('like', 'new') - 0.006944444444444444
('new', 'great') - 0.023809523809523808
('bed', 'large') - 0.005747126436781609
('bathroom', 'with') - 0.03816793893129771
('with', 'separate') - 0.005
('shower', 'great') - 0.017241379310344827
('great', 'ammenities') - 0.0028328611898017
('ammenities', 'a') - 0.5
('beautiful', 'bouquet') - 0.014705882352941176
('bouquet', 'of') - 1.0
('of', 'flowers') - 0.0009560229445506692
('flowers', 'arrived') - 0.125
('arrived', 'upon') - 0.015873015873015872
('our', 'arrival') - 0.008620689655172414
('arrival', 'the') - 0.15789473684210525
('about', '10') - 0.022222222222222223
('10', 'mins') - 0.11538461538461539
('mins', 'from') - 0.1111111111111111
('from', 'everything') - 0.0027397260273972603
('everything', 'not') - 0.013888888888888888
('not', 'close') - 0.003257328990228013
('close', 'but') - 0.015384615384615385
('not', 'far') - 0.0016286644951140066
('from', 'anything') - 0.0027397260273972603
('anything', 'has') - 0.029411764705882353
('great', '

('claiming', 'said') - 1.0
('said', 'deal') - 0.01639344262295082
('deal', 'is') - 0.023809523809523808
('is', 'available') - 0.0023894862604540022
('available', 'on') - 0.02857142857142857
('on', 'certain') - 0.0046875
('certain', 'dates') - 0.2
('dates', 'but') - 0.5
('you', 'call') - 0.004484304932735426
('call', 'to') - 0.031746031746031744
('reservation', 'based') - 0.015625
('that', 'deal') - 0.0014367816091954023
('deal', 'they') - 0.023809523809523808
('they', 'tell') - 0.0020408163265306124
('tell', 'you') - 0.3157894736842105
('you', 'it') - 0.008968609865470852
('not', 'available') - 0.008143322475570033
('available', 'there') - 0.02857142857142857
('are', 'rooms') - 0.0032258064516129032
('deal', 'then') - 0.047619047619047616
('they', 'try') - 0.006122448979591836
('to', 'condescendingly') - 0.0004784688995215311
('condescendingly', 'tell') - 1.0
('you', 'they') - 0.002242152466367713
('only', 'sell') - 0.00625
('sell', 'so') - 0.3333333333333333
('many', 'rooms') - 0.0142

('nt', 'do') - 0.011904761904761904
('do', 'it') - 0.018404907975460124
('it', 'justice') - 0.0011198208286674132
('justice', 'we') - 1.0
('in', 'february') - 0.002380952380952381
('february', '2010') - 0.25
('2010', 'the') - 0.2
('had', 'purchased') - 0.003703703703703704
('purchased', 'it') - 0.25
('it', 'at') - 0.005599104143337066
('a', 'raffel') - 0.0004450378282153983
('raffel', 'at') - 1.0
('at', 'work') - 0.0013422818791946308
('work', 'for') - 0.01818181818181818
('a', 'fund') - 0.0004450378282153983
('fund', 'raiser') - 1.0
('raiser', 'for') - 1.0
('for', 'united') - 0.0009746588693957114
('united', 'way') - 0.5
('way', 'so') - 0.03636363636363636
('we', 'probably') - 0.0017905102954341987
('probably', 'paid') - 0.058823529411764705
('paid', 'more') - 0.045454545454545456
('then', 'we') - 0.09090909090909091
('needed', 'but') - 0.0625
('was', 'for') - 0.0016429353778751369
('for', 'charity') - 0.0009746588693957114
('charity', 'we') - 1.0
('were', 'woke') - 0.0017301038062283

('wine', 'gratis') - 0.02857142857142857
('gratis', 'in') - 0.5
('the', 'late') - 0.0003772161448509996
('late', 'afternoon') - 0.03333333333333333
('afternoon', 'and') - 0.4
('loop', 'has') - 0.05263157894736842
('has', 'to') - 0.032
('offer', 'in') - 0.043478260869565216
('this', 'remarkable') - 0.0016835016835016834
('remarkable', 'city') - 1.0
('city', 'i') - 0.05128205128205128
('recommend', 'parking') - 0.014492753623188406
('parking', 'outside') - 0.018518518518518517
('outside', 'of') - 0.16666666666666666
('the', 'downtown') - 0.0003772161448509996
('downtown', 'area') - 0.045454545454545456
('and', 'taking') - 0.0007713073659853452
('taking', 'the') - 0.1111111111111111
('el', 'into') - 0.125
('loop', 'which') - 0.05263157894736842
('which', 'leaves') - 0.005681818181818182
('leaves', 'you') - 0.3333333333333333
('you', 'about') - 0.002242152466367713
('about', '2') - 0.016666666666666666
('2', 'minutes') - 0.009523809523809525
('minutes', 'from') - 0.020833333333333332
('hot

('always', 'happy') - 0.024390243902439025
('happy', 'he') - 0.05263157894736842
('he', 'obviously') - 0.011904761904761904
('obviously', 'loves') - 0.125
('loves', 'his') - 0.3333333333333333
('his', 'job') - 0.03333333333333333
('job', 'people') - 0.09090909090909091
('desk', 'are') - 0.006289308176100629
('are', 'always') - 0.0032258064516129032
('always', 'nice') - 0.024390243902439025
('efficient', 'if') - 0.125
('to', 'workout') - 0.0009569377990430622
('workout', 'roberta') - 0.1
('roberta', 'runs') - 1.0
('runs', 'their') - 1.0
('their', 'fitness') - 0.00909090909090909
('fitness', 'center') - 0.42105263157894735
('center', 'another') - 0.05
('another', 'one') - 0.06451612903225806
('one', 'who') - 0.0043859649122807015
('who', 'loves') - 0.016666666666666666
('loves', 'their') - 0.3333333333333333
('their', 'job') - 0.00909090909090909
('and', 'serves') - 0.0007713073659853452
('serves', 'guests') - 0.16666666666666666
('guests', 'well') - 0.03773584905660377
('well', 'great')

('service', 'menu') - 0.011811023622047244
('menu', 'ie') - 0.1111111111111111
('ie', 'grilled') - 0.1111111111111111
('grilled', 'cheese') - 1.0
('cheese', 'with') - 0.2
('with', 'mini') - 0.0016666666666666668
('mini', 'tomato') - 0.0625
('tomato', 'soup') - 1.0
('soup', 'for') - 0.3333333333333333
('for', 'dipping') - 0.0009746588693957114
('dipping', 'very') - 1.0
('very', 'cute') - 0.0020964360587002098
('cute', 'their') - 0.3333333333333333
('their', 'late') - 0.00909090909090909
('late', 'night') - 0.1
('night', 'menu') - 0.005025125628140704
('menu', 'however') - 0.1111111111111111
('however', 'is') - 0.02631578947368421
('is', 'lacking') - 0.0011947431302270011
('lacking', 'it') - 0.16666666666666666
('been', 'nice') - 0.044642857142857144
('see', 'some') - 0.044444444444444446
('some', 'soup') - 0.0078125
('soup', 'on') - 0.3333333333333333
('on', 'there') - 0.0015625
('that', 'being') - 0.0028735632183908046
('being', 'said') - 0.015873015873015872
('said', 'i') - 0.08196721

('600', 'water') - 0.5
('water', 'or') - 0.043478260869565216
('or', 'try') - 0.012269938650306749
('the', '28') - 0.0001886080724254998
('28', 'breakfast') - 0.5
('buffet', 'the') - 0.05
('friendly', 'but') - 0.019417475728155338
('but', 'of') - 0.002053388090349076
('course', 'you') - 0.13333333333333333
('you', 'tip') - 0.002242152466367713
('tip', 'for') - 0.08333333333333333
('smile', 'they') - 0.125
('did', 'give') - 0.008097165991902834
('a', 'cot') - 0.0004450378282153983
('cot', 'for') - 1.0
('free', 'as') - 0.02247191011235955
('a', 'foldout') - 0.0004450378282153983
('foldout', 'couch') - 1.0
('couch', 'as') - 0.125
('a', 'party') - 0.0008900756564307966
('party', 'of') - 0.09090909090909091
('of', '5') - 0.0028680688336520078
('5', 'did') - 0.02040816326530612
('gym', 'or') - 0.05263157894736842
('or', 'pool') - 0.006134969325153374
('floor', 'but') - 0.021897810218978103
('but', 'looked') - 0.002053388090349076
('looked', 'nice') - 0.029411764705882353
('nice', 'small') - 

('coffee', 'shop') - 0.015873015873015872
('shop', 'the') - 0.3333333333333333
('bell', 'persons') - 0.058823529411764705
('persons', 'and') - 1.0
('hilton', 'honors') - 0.08163265306122448
('honors', 'person') - 0.25
('person', 'at') - 0.10344827586206896
('desk', 'they') - 0.025157232704402517
('even', 'lowered') - 0.006711409395973154
('lowered', 'our') - 1.0
('our', 'price') - 0.004310344827586207
('price', 'to') - 0.025
('to', 'moderately') - 0.0004784688995215311
('moderately', 'high') - 0.5
('high', 'when') - 0.02702702702702703
('we', 'inquired') - 0.0008952551477170994
('inquired', 'a') - 1.0
('few', 'days') - 0.10204081632653061
('days', 'before') - 0.06666666666666667
('before', 'our') - 0.030303030303030304
('stay', 'when') - 0.002457002457002457
('we', 'want') - 0.0017905102954341987
('stay', 'south') - 0.002457002457002457
('south', 'of') - 0.25
('river', 'downtown') - 0.023255813953488372
('back', 'my') - 0.0234375
('my', 'experiences') - 0.0037593984962406013
('experien

('my', '2') - 0.0018796992481203006
('2', 'year') - 0.01904761904761905
('old', 'son') - 0.05263157894736842
('son', 'was') - 0.07692307692307693
('was', 'awaken') - 0.000547645125958379
('awaken', 'by') - 1.0
('by', 'all') - 0.0125
('the', 'loud') - 0.0001886080724254998
('and', 'cursing') - 0.0003856536829926726
('cursing', 'then') - 1.0
('we', 'get') - 0.0017905102954341987
('get', 'up') - 0.027777777777777776
('at', '730') - 0.0013422818791946308
('730', 'to') - 1.0
('ready', 'only') - 0.043478260869565216
('water', 'after') - 0.014492753623188406
('after', 'the') - 0.08333333333333333
('the', 'maintenance') - 0.0005658242172764995
('maintenance', 'man') - 0.2222222222222222
('man', 'piddles') - 0.1111111111111111
('piddles', 'around') - 1.0
('around', 'with') - 0.02127659574468085
('with', 'it') - 0.008333333333333333
('half', 'hour') - 0.125
('time', 'to') - 0.02702702702702703
('to', 'shower') - 0.0009569377990430622
('to', 'ride') - 0.0004784688995215311
('ride', 'the') - 0.142

('by', 'river') - 0.00625
('river', 'departs') - 0.023255813953488372
('departs', 'a') - 1.0
('a', 'dock') - 0.0004450378282153983
('dock', 'that') - 0.25
('fairmont', 'and') - 0.038461538461538464
('are', 'lots') - 0.0032258064516129032
('club', 'room') - 0.043478260869565216
('is', 'worth') - 0.0011947431302270011
('extra', 'expense') - 0.029411764705882353
('expense', 'we') - 0.3333333333333333
('that', 'concentrates') - 0.0014367816091954023
('concentrates', 'on') - 1.0
('on', 'conferences') - 0.0015625
('conferences', 'rather') - 0.3333333333333333
('rather', 'than') - 0.16666666666666666
('normal', 'guest') - 0.14285714285714285
('guest', 'we') - 0.025
('it', 'hard') - 0.0011198208286674132
('that', 'particular') - 0.0014367816091954023
('particular', 'night') - 0.14285714285714285
('paid', 'above') - 0.022727272727272728
('the', 'odds') - 0.0001886080724254998
('odds', 'for') - 1.0
('this', '200') - 0.0016835016835016834
('200', 'for') - 0.23076923076923078
('felt', 'when') - 0.

('than', 'adequate') - 0.010101010101010102
('adequate', 'with') - 0.2
('beds', 'my') - 0.013333333333333334
('my', 'kids') - 0.0018796992481203006
('kids', 'enjoy') - 0.09090909090909091
('s', 'windows') - 0.004629629629629629
('windows', 'that') - 0.14285714285714285
('that', 'overlook') - 0.0014367816091954023
('overlook', 'the') - 1.0
('river', 'stayed') - 0.023255813953488372
('chicago', 'hilton') - 0.018292682926829267
('for', 'three') - 0.001949317738791423
('three', 'nights') - 0.22580645161290322
('and', 'from') - 0.0003856536829926726
('the', 'minute') - 0.0003772161448509996
('minute', 'we') - 0.045454545454545456
('walked', 'through') - 0.04
('door', 'i') - 0.028169014084507043
('impressed', 'checkin') - 0.05
('quick', 'easy') - 0.08333333333333333
('easy', 'and') - 0.08
('very', 'glad') - 0.0020964360587002098
('glad', 'that') - 0.3333333333333333
('in', 'despite') - 0.0007936507936507937
('despite', 'being') - 0.05263157894736842
('being', 'earlier') - 0.01587301587301587

('and', 'cooling') - 0.0003856536829926726
('cooling', 'systems') - 1.0
('systems', 'and') - 1.0
('entrance', 'most') - 0.07692307692307693
('most', 'recently') - 0.02
('recently', 'but') - 0.03571428571428571
('other', 'things') - 0.022900763358778626
('things', 'have') - 0.029411764705882353
('have', 'gone') - 0.0045871559633027525
('gone', 'downhill') - 0.18181818181818182
('downhill', 'for') - 0.5
('last', 'several') - 0.019230769230769232
('several', 'stays') - 0.023255813953488372
('stays', 'i') - 0.1111111111111111
('have', 'experienced') - 0.0022935779816513763
('experienced', 'very') - 0.125
('dirty', 'carpets') - 0.034482758620689655
('carpets', 'and') - 0.16666666666666666
('and', 'slow') - 0.0003856536829926726
('slow', 'service') - 0.07142857142857142
('service', 'rates') - 0.003937007874015748
('rates', 'have') - 0.09090909090909091
('have', 'skyrocketed') - 0.0022935779816513763
('skyrocketed', 'and') - 1.0
('and', 'now') - 0.0007713073659853452
('now', 'they') - 0.02380

('have', 'excellent') - 0.0022935779816513763
('excellent', 'customer') - 0.03278688524590164
('really', 'cared') - 0.00909090909090909
('cared', 'about') - 0.3333333333333333
('guests', 'there') - 0.018867924528301886
('was', 'always') - 0.002190580503833516
('always', 'coffee') - 0.024390243902439025
('tea', 'hot') - 0.125
('hot', 'chocolate') - 0.08333333333333333
('chocolate', 'in') - 0.16666666666666666
('lobby', 'good') - 0.008064516129032258
('good', 'hot') - 0.006211180124223602
('hot', 'breakfast') - 0.08333333333333333
('dinner', 'with') - 0.027777777777777776
('with', 'wine') - 0.0033333333333333335
('and', 'beer') - 0.0007713073659853452
('beer', 'the') - 0.16666666666666666
('and', 'willing') - 0.0007713073659853452
('willing', 'to') - 1.0
('to', 'help') - 0.003827751196172249
('help', 'out') - 0.07142857142857142
('for', 'directions') - 0.0009746588693957114
('the', 'job') - 0.0001886080724254998
('job', 'there') - 0.09090909090909091
('a', 'kitchen') - 0.0004450378282153

('just', 'picture') - 0.005235602094240838
('picture', 'the') - 0.125
('the', 'flying') - 0.0001886080724254998
('flying', 'ferrari') - 0.3333333333333333
('ferrari', 'scene') - 1.0
('scene', 'in') - 0.3333333333333333
('in', 'ferris') - 0.0007936507936507937
('ferris', 'bueller') - 1.0
('bueller', 's') - 1.0
('s', 'day') - 0.018518518518518517
('day', 'off') - 0.017241379310344827
('off', 'i') - 0.0136986301369863
('nt', 'imagine') - 0.005952380952380952
('imagine', 'where') - 0.09090909090909091
('where', 'in') - 0.01818181818181818
('the', 'crowded') - 0.0001886080724254998
('crowded', 'chicago') - 0.25
('chicago', 'traffic') - 0.003048780487804878
('traffic', 'they') - 0.125
('they', 'found') - 0.004081632653061225
('go', 'that') - 0.00909090909090909
('that', 'fast') - 0.0014367816091954023
('fast', 'though') - 0.125
('though', 'it') - 0.12962962962962962
('take', 'long') - 0.014705882352941176
('long', 'to') - 0.05128205128205128
('make', 'that') - 0.024390243902439025
('that', '

('despite', 'having') - 0.10526315789473684
('package', 'and') - 0.09090909090909091
('took', 'quite') - 0.02040816326530612
('a', 'polite') - 0.0004450378282153983
('but', 'seemingly') - 0.002053388090349076
('seemingly', 'less') - 1.0
('than', 'informed') - 0.010101010101010102
('informed', 'front') - 0.125
('to', 'correct') - 0.0014354066985645933
('the', 'problem') - 0.001131648434552999
('problem', 'would') - 0.02631578947368421
('absolutely', 'go') - 0.034482758620689655
('back', 'but') - 0.0078125
('but', 'probably') - 0.002053388090349076
('probably', 'not') - 0.058823529411764705
('special', 'package') - 0.038461538461538464
('package', 'we') - 0.18181818181818182
('we', 'choose') - 0.0008952551477170994
('choose', 'to') - 0.15789473684210525
('hotel', 'due') - 0.0009652509652509653
('high', 'rooftop') - 0.02702702702702703
('rooftop', 'ratings') - 0.25
('ratings', 'at') - 0.3333333333333333
('hotel', 'not') - 0.005791505791505791
('not', 'only') - 0.013029315960912053
('only'

('the', 'fire') - 0.0003772161448509996
('fire', 'alarm') - 0.3333333333333333
('alarm', 'was') - 0.3333333333333333
('wall', 'in') - 0.03333333333333333
('main', 'bedroom') - 0.07142857142857142
('bedroom', 'we') - 0.07692307692307693
('the', '37') - 0.0001886080724254998
('37', 'floor') - 1.0
('room', '3711') - 0.0010775862068965517
('3711', 'considering') - 1.0
('considering', 'what') - 0.125
('what', 'ones') - 0.00819672131147541
('ones', 'pays') - 0.16666666666666666
('pays', 'for') - 1.0
('would', 'think') - 0.012232415902140673
('pool', 'would') - 0.02127659574468085
('be', 'free') - 0.007407407407407408
('every', 'single') - 0.03389830508474576
('single', 'small') - 0.25
('small', 'amenity') - 0.008928571428571428
('amenity', 'i') - 0.3333333333333333
('not', 'plan') - 0.0016286644951140066
('on', 'staying') - 0.003125
('again', 'apart') - 0.006211180124223602
('apart', 'from') - 0.5
('the', 'trip') - 0.0003772161448509996
('great', 'stayed') - 0.0028328611898017
('here', 'as')

('constantly', 'have') - 0.3333333333333333
('to', 'touch') - 0.0004784688995215311
('touch', 'anything') - 0.07692307692307693
('anything', 'on') - 0.029411764705882353
('a', 'toilet') - 0.0004450378282153983
('toilet', 'if') - 0.041666666666666664
('was', 'fully') - 0.001095290251916758
('fully', 'booked') - 0.6666666666666666
('booked', 'with') - 0.011627906976744186
('a', 'teen') - 0.0004450378282153983
('teen', 'convention') - 1.0
('convention', 'fccla') - 0.07692307692307693
('fccla', 'and') - 1.0
('and', 'unfortunately') - 0.0007713073659853452
('unfortunately', 'for') - 0.05
('for', 'anyone') - 0.0009746588693957114
('anyone', 'else') - 0.08
('all', 'suffered') - 0.0035971223021582736
('suffered', 'for') - 1.0
('noise', 'at') - 0.034482758620689655
('night', 'from') - 0.005025125628140704
('from', 'every') - 0.0027397260273972603
('every', 'direction') - 0.01694915254237288
('direction', 'the') - 1.0
('only', 'way') - 0.00625
('to', 'describe') - 0.0004784688995215311
('describ

('and', 'park') - 0.0011569610489780178
('park', 'hyatt') - 0.10526315789473684
('hyatt', 'we') - 0.03225806451612903
('my', '2nd') - 0.0018796992481203006
('2nd', 'wedding') - 0.14285714285714285
('wedding', 'anniversary') - 0.05263157894736842
('at', 'homewood') - 0.0013422818791946308
('homewood', 'my') - 0.2
('impression', 'was') - 0.3333333333333333
('helpful', 'valet') - 0.010309278350515464
('valet', 'employees') - 0.03571428571428571
('employees', 'and') - 0.05263157894736842
('one', 'perk') - 0.0043859649122807015
('perk', 'of') - 1.0
('here', 'otherwise') - 0.006172839506172839
('no', 'problems') - 0.027237354085603113
('problems', 'checking') - 0.043478260869565216
('has', '19') - 0.008
('19', 'floors') - 1.0
('asked', 'that') - 0.041666666666666664
('the', 'highest') - 0.0003772161448509996
('highest', 'room') - 0.5
('room', 'avalible') - 0.0010775862068965517
('avalible', 'due') - 1.0
('husband', 'first') - 0.027777777777777776
('time', 'there') - 0.006756756756756757
('th

('contain', 'street') - 1.0
('street', 'noise') - 0.09836065573770492
('and', 'sidewalk') - 0.0003856536829926726
('sidewalk', 'conversations') - 0.3333333333333333
('conversations', 'were') - 0.3333333333333333
('were', 'heard') - 0.0017301038062283738
('heard', 'throughout') - 0.045454545454545456
('was', 'rather') - 0.000547645125958379
('rather', 'provocative') - 0.08333333333333333
('provocative', 'to') - 1.0
('to', 'listen') - 0.0004784688995215311
('the', 'couple') - 0.0001886080724254998
('couple', 'next') - 0.043478260869565216
('door', 'carousing') - 0.014084507042253521
('carousing', 'but') - 1.0
('but', 'also') - 0.004106776180698152
('also', 'rather') - 0.006944444444444444
('rather', 'tacky') - 0.08333333333333333
('tacky', 'unsettling') - 0.5
('unsettling', 'it') - 1.0
('took', 'more') - 0.02040816326530612
('then', '30') - 0.012987012987012988
('car', 'after') - 0.02564102564102564
('out', 'this') - 0.00510204081632653
('lost', 'it') - 0.0625
('james', 'sysndicate') - 0

('hilton', 'points') - 0.02040816326530612
('points', 'to') - 0.13333333333333333
('but', 'paid') - 0.002053388090349076
('paid', '35night') - 0.022727272727272728
('35night', 'to') - 1.0
('to', 'upgrade') - 0.0014354066985645933
('suite', 'we') - 0.047619047619047616
('room', '1519') - 0.0010775862068965517
('1519', 'i') - 1.0
('would', 'reccomend') - 0.0030581039755351682
('reccomend', 'people') - 0.5
('people', 'ask') - 0.012658227848101266
('an', 'odd') - 0.004464285714285714
('odd', 'numbered') - 0.25
('numbered', 'room') - 1.0
('view', 'looking') - 0.01904761904761905
('looking', 'north') - 0.046511627906976744
('north', 'on') - 0.1111111111111111
('ave', 'our') - 0.02564102564102564
('modern', 'with') - 0.03125
('2', 'large') - 0.009523809523809525
('large', 'hdflat') - 0.014705882352941176
('hdflat', 'panel') - 1.0
('panel', 'tv') - 1.0
('and', 'bose') - 0.0003856536829926726
('bose', '3') - 0.5
('3', 'speaker') - 0.015384615384615385
('speaker', 'sound') - 0.5
('system', '4') 

('my', 'walk') - 0.0018796992481203006
('walk', 'etc') - 0.014705882352941176
('etc', 'this') - 0.025
('is', 'reflective') - 0.0011947431302270011
('reflective', 'of') - 1.0
('staff', 'i') - 0.018656716417910446
('encountered', 'the') - 0.14285714285714285
('desk', 'promptly') - 0.006289308176100629
('promptly', 'checked') - 0.25
('in', 'definitely') - 0.0007936507936507937
('definitely', 'register') - 0.018867924528301886
('register', 'for') - 0.3333333333333333
('their', 'rewards') - 0.00909090909090909
('rewards', 'program') - 1.0
('program', 'before') - 0.1111111111111111
('arrive', 'the') - 0.16666666666666666
('complimentary', 'gym') - 0.045454545454545456
('gym', 'access') - 0.10526315789473684
('access', 'is') - 0.06666666666666667
('excellent', 'plus') - 0.01639344262295082
('plus', 'you') - 0.06666666666666667
('you', 'save') - 0.002242152466367713
('save', 'on') - 0.2
('on', 'internet') - 0.0015625
('access', 'fees') - 0.03333333333333333
('fees', 'which') - 0.16666666666666

('34th', 'floor') - 1.0
('first', 'impressions') - 0.009174311926605505
('impressions', 'were') - 1.0
('were', 'that') - 0.0034602076124567475
('everything', 'appeared') - 0.013888888888888888
('be', 'clean') - 0.007407407407407408
('was', 'undergoing') - 0.000547645125958379
('undergoing', 'renovation') - 0.3333333333333333
('renovation', 'at') - 0.09090909090909091
('entrance', 'but') - 0.07692307692307693
('was', 'otherwise') - 0.000547645125958379
('otherwise', 'ok') - 0.08333333333333333
('ok', 'lots') - 0.047619047619047616
('of', 'conferences') - 0.0009560229445506692
('conferences', 'were') - 0.6666666666666666
('taking', 'place') - 0.1111111111111111
('place', 'so') - 0.011111111111111112
('elevators', 'was') - 0.03125
('was', 'long') - 0.000547645125958379
('long', 'seemed') - 0.02564102564102564
('seemed', 'like') - 0.08333333333333333
('like', 'only') - 0.006944444444444444
('were', 'working') - 0.0017301038062283738
('the', '6') - 0.0005658242172764995
('6', 'that') - 0.05

('is', 'indoors') - 0.0011947431302270011
('indoors', 'but') - 0.5
('but', 'looks') - 0.002053388090349076
('looks', 'much') - 0.1
('much', 'larger') - 0.011764705882352941
('larger', 'on') - 0.15384615384615385
('week', 'the') - 0.02857142857142857
('wonderful', 'location') - 0.04918032786885246
('and', 'prompt') - 0.0007713073659853452
('prompt', 'we') - 0.2
('we', 'highly') - 0.0008952551477170994
('any', 'travel') - 0.013888888888888888
('travel', 'needs') - 0.03125
('needs', 'whether') - 0.09090909090909091
('whether', 'it') - 0.5
('it', 'be') - 0.0011198208286674132
('be', 'for') - 0.003703703703703704
('business', 'or') - 0.041666666666666664
('or', 'pleasure') - 0.012269938650306749
('pleasure', 'took') - 0.125
('trip', 'with') - 0.017543859649122806
('wife', 'got') - 0.02564102564102564
('with', 'valet') - 0.0016666666666666668
('valet', 'included') - 0.03571428571428571
('included', 'paid') - 0.06666666666666667
('paid', 'a') - 0.045454545454545456
('couple', 'extra') - 0.043

('low', 'chairs') - 0.07142857142857142
('chairs', 'at') - 0.125
('corner', 'light') - 0.043478260869565216
('fixtures', 'did') - 0.1
('do', 'much') - 0.018404907975460124
('to', 'brighten') - 0.0004784688995215311
('brighten', 'things') - 1.0
('things', 'up') - 0.029411764705882353
('up', 'since') - 0.00546448087431694
('was', 'here') - 0.001095290251916758
('really', 'wanted') - 0.00909090909090909
('wanted', 'a') - 0.13636363636363635
('bright', 'environment') - 0.14285714285714285
('environment', 'to') - 0.5
('to', 'wake') - 0.0004784688995215311
('wake', 'up') - 0.8
('up', 'early') - 0.00546448087431694
('early', 'and') - 0.034482758620689655
('and', 'start') - 0.0003856536829926726
('start', 'the') - 0.14285714285714285
('the', 'workday') - 0.0001886080724254998
('workday', 'in') - 1.0
('have', 'as') - 0.0022935779816513763
('a', 'cheerful') - 0.0004450378282153983
('cheerful', 'office') - 0.25
('office', 'for') - 0.125
('for', 'midday') - 0.0009746588693957114
('midday', 'calls'

('m', 'finally') - 0.02702702702702703
('finally', 'catching') - 0.02564102564102564
('catching', 'up') - 1.0
('past', 'several') - 0.05263157894736842
('several', 'months') - 0.023255813953488372
('months', 'a') - 0.125
('a', 'dear') - 0.0004450378282153983
('dear', 'friend') - 0.5
('friend', 'and') - 0.26666666666666666
('regency', 'in') - 0.14285714285714285
('late', 'october') - 0.03333333333333333
('october', '2007') - 0.5
('2007', 'for') - 0.3333333333333333
('night', 'while') - 0.005025125628140704
('visiting', 'a') - 0.09090909090909091
('and', 'her') - 0.0007713073659853452
('her', 'husband') - 0.024390243902439025
('husband', 'from') - 0.027777777777777776
('from', 'out') - 0.0027397260273972603
('town', 'this') - 0.08695652173913043
('perfect', 'imo') - 0.02564102564102564
('imo', 'easy') - 1.0
('easy', 'check') - 0.04
('out', 'lovely') - 0.00510204081632653
('lovely', 'clean') - 0.045454545454545456
('comfortable', 'rooms') - 0.008849557522123894
('area', 'pretty') - 0.0142

('years', 'is') - 0.041666666666666664
('is', 'being') - 0.0011947431302270011
('being', 'eliminated') - 0.015873015873015872
('eliminated', 'so') - 0.3333333333333333
('so', 'in') - 0.0035714285714285713
('other', 'words') - 0.007633587786259542
('words', 'anyting') - 0.5
('anyting', 'to') - 1.0
('with', 'service') - 0.0016666666666666668
('gone', 'from') - 0.18181818181818182
('i', 'hear') - 0.0011689070718877848
('hear', 'they') - 0.07142857142857142
('to', 'begin') - 0.0004784688995215311
('begin', 'renovations') - 0.3333333333333333
('in', 'december') - 0.0015873015873015873
('december', 'but') - 0.16666666666666666
('no', 'excuse') - 0.007782101167315175
('excuse', 'to') - 0.5
('let', 'an') - 0.03571428571428571
('old', 'glory') - 0.017543859649122806
('glory', 'go') - 1.0
('to', 'pot') - 0.0004784688995215311
('pot', 'in') - 1.0
('meantime', 'we') - 0.25
('we', 'wo') - 0.0017905102954341987
('july', 'or') - 0.1111111111111111
('or', 'for') - 0.012269938650306749
('thanksgiving',

('has', 'limited') - 0.008
('limited', 'hours') - 0.3333333333333333
('no', 'access') - 0.0038910505836575876
('access', 'outside') - 0.03333333333333333
('of', 'those') - 0.0019120458891013384
('those', 'hours') - 0.07692307692307693
('hours', 'other') - 0.038461538461538464
('other', 'areas') - 0.007633587786259542
('areas', 'similarly') - 0.058823529411764705
('similarly', 'lack') - 0.5
('lack', 'the') - 0.1
('the', 'level') - 0.0001886080724254998
('service', 'that') - 0.011811023622047244
('is', 'expected') - 0.0023894862604540022
('expected', 'for') - 0.09090909090909091
('this', 'type') - 0.003367003367003367
('of', 'hotel') - 0.004780114722753346
('the', 'individuals') - 0.0001886080724254998
('individuals', 'at') - 0.5
('concierge', 'desk') - 0.02
('twice', 'gave') - 0.08333333333333333
('us', 'bad') - 0.004651162790697674
('bad', 'information') - 0.022222222222222223
('about', 'public') - 0.005555555555555556
('transportation', 'and') - 0.1111111111111111
('the', 'hours') - 0

('the', 'problems') - 0.0007544322897019992
('problems', 'he') - 0.043478260869565216
('horribly', 'condescending') - 0.3333333333333333
('and', 'cared') - 0.0003856536829926726
('cared', 'very') - 0.3333333333333333
('very', 'little') - 0.0041928721174004195
('little', 'i') - 0.0196078431372549
('a', 'hampon') - 0.0004450378282153983
('hampon', 'inn') - 1.0
('inn', 'when') - 0.058823529411764705
('they', 'screwed') - 0.0020408163265306124
('up', 'our') - 0.01639344262295082
('vouchers', 'i') - 0.5
('to', 'physically') - 0.0004784688995215311
('physically', 'go') - 0.3333333333333333
('and', 'question') - 0.0003856536829926726
('question', 'it') - 0.14285714285714285
('said', 'we') - 0.04918032786885246
('will', 'send') - 0.006711409395973154
('send', 'them') - 0.08333333333333333
('them', 'right') - 0.014925373134328358
('right', 'up') - 0.015873015873015872
('no', 'that') - 0.0038910505836575876
('that', 'never') - 0.0014367816091954023
('happened', 'absolutely') - 0.0555555555555555

('might', 'encounter') - 0.05
('encounter', 'a') - 0.5
('this', 'policy') - 0.0016835016835016834
('policy', 'of') - 0.08333333333333333
('of', 'theirs') - 0.0009560229445506692
('theirs', 'to') - 1.0
('my', 'knowledge') - 0.0018796992481203006
('knowledge', 'they') - 0.5
('this', 'sort') - 0.0016835016835016834
('of', 'thing') - 0.0019120458891013384
('thing', 'before') - 0.02631578947368421
('use', 'credit') - 0.021739130434782608
('cards', 'its') - 0.25
('its', 'why') - 0.016129032258064516
('why', 'i') - 0.07142857142857142
('have', 'money') - 0.0022935779816513763
('money', 'in') - 0.030303030303030304
('a', 'recession') - 0.0004450378282153983
('recession', '2') - 1.0
('clean', 'but') - 0.015384615384615385
('but', 'their') - 0.002053388090349076
('their', 'in') - 0.00909090909090909
('room', 'accoutrements') - 0.0010775862068965517
('accoutrements', 'and') - 1.0
('and', 'mini') - 0.0003856536829926726
('bar', 'items') - 0.01098901098901099
('items', 'had') - 0.1111111111111111
(

('passport', 'people') - 1.0
('people', 'ca') - 0.012658227848101266
('even', 'figure') - 0.006711409395973154
('figure', 'out') - 0.5
('out', 'how') - 0.00510204081632653
('get', 'you') - 0.011111111111111112
('you', 'points') - 0.002242152466367713
('points', 'in') - 0.06666666666666667
('call', 'corporate') - 0.015873015873015872
('corporate', 'and') - 0.16666666666666666
('my', 'points') - 0.0018796992481203006
('points', 'for') - 0.06666666666666667
('stay', 'not') - 0.004914004914004914
('i', 'plan') - 0.0017533606078316774
('a', 'hyatt') - 0.0008900756564307966
('hyatt', 'property') - 0.03225806451612903
('property', 'again') - 0.03571428571428571
('again', 'by') - 0.006211180124223602
('way', 'apparently') - 0.01818181818181818
('still', 'undergoing') - 0.02040816326530612
('undergoing', 'changes') - 0.3333333333333333
('changes', 'because') - 0.3333333333333333
('my', 'minibar') - 0.0018796992481203006
('minibar', 'was') - 0.06666666666666667
('was', 'thinking') - 0.0005476451

('store', 'when') - 0.1
('finally', 'arrived') - 0.02564102564102564
('at', '2') - 0.004026845637583893
('2', 'am') - 0.01904761904761905
('car', 'so') - 0.02564102564102564
('we', 'looked') - 0.0008952551477170994
('looked', 'for') - 0.029411764705882353
('parking', 'ourselves') - 0.037037037037037035
('ourselves', 'checking') - 0.25
('not', 'greeted') - 0.003257328990228013
('anyone', 'even') - 0.04
('receptionist', 'did') - 0.08333333333333333
('not', 'greet') - 0.0016286644951140066
('greet', 'us') - 1.0
('us', 'no') - 0.004651162790697674
('no', 'welcome') - 0.007782101167315175
('welcome', 'to') - 0.17647058823529413
('knickerbocker', 'nothing') - 0.047619047619047616
('reception', 'was') - 0.16666666666666666
('rude', 'which') - 0.02702702702702703
('me', 'think') - 0.008097165991902834
('think', 'what') - 0.019230769230769232
('what', 'kind') - 0.00819672131147541
('service', 'training') - 0.003937007874015748
('training', 'this') - 0.25
('guy', 'could') - 0.08333333333333333
(

('is', 'best') - 0.0011947431302270011
('hotel', 'bargin') - 0.0009652509652509653
('bargin', 'in') - 1.0
('chicago', 'if') - 0.003048780487804878
('not', 'overly') - 0.0016286644951140066
('overly', 'unhappy') - 0.5
('unhappy', 'with') - 0.3333333333333333
('just', 'redone') - 0.005235602094240838
('redone', 'by') - 1.0
('by', 'kimpton') - 0.00625
('kimpton', 'this') - 0.05263157894736842
('hotel', 'chain') - 0.0009652509652509653
('chain', 'that') - 0.125
('that', 'really') - 0.0028735632183908046
('really', 'provides') - 0.00909090909090909
('a', 'the') - 0.0004450378282153983
('the', 'sense') - 0.0001886080724254998
('sense', 'of') - 0.25
('of', 'escape') - 0.0009560229445506692
('escape', 'and') - 0.5
('and', 'fun') - 0.0007713073659853452
('fun', 'to') - 0.0625
('their', 'hotels') - 0.01818181818181818
('a', 'location') - 0.0004450378282153983
('of', 'lasalle') - 0.0009560229445506692
('lasalle', 'and') - 1.0
('the', 'cadillac') - 0.0001886080724254998
('cadillac', 'theater') - 1

('24', 'for') - 0.14285714285714285
('your', 'entire') - 0.009900990099009901
('even', 'begin') - 0.006711409395973154
('begin', 'on') - 0.3333333333333333
('on', 'what') - 0.0015625
('off', 'that') - 0.0273972602739726
('is', '2') - 0.0011947431302270011
('2', 'as') - 0.009523809523809525
('gold', 'or') - 0.14285714285714285
('or', 'platinum') - 0.006134969325153374
('platinum', 'member') - 1.0
('member', 'you') - 0.05555555555555555
('you', 'always') - 0.002242152466367713
('always', 'get') - 0.024390243902439025
('a', 'complimentary') - 0.0008900756564307966
('complimentary', 'water') - 0.045454545454545456
('water', 'bottle') - 0.014492753623188406
('bottle', 'in') - 0.058823529411764705
('offer', 'that') - 0.043478260869565216
('that', 'option') - 0.0014367816091954023
('option', 'here') - 0.16666666666666666
('fuss', 'about') - 0.5
('before', 'they') - 0.030303030303030304
('they', 'give') - 0.004081632653061225
('give', 'you') - 0.11538461538461539
('you', 'one') - 0.00224215246

('cookie', 'is') - 0.25
('nice', 'free') - 0.0056179775280898875
('wifi', 'which') - 0.037037037037037035
('which', 'worked') - 0.005681818181818182
('worked', 'to') - 0.14285714285714285
('to', 'varying') - 0.0004784688995215311
('varying', 'degrees') - 1.0
('degrees', 'throughout') - 0.125
('throughout', 'stay') - 0.0625
('bad', 'they') - 0.022222222222222223
('a', '45') - 0.0013351134846461949
('45', 'star') - 0.23076923076923078
('which', 'to') - 0.011363636363636364
('me', 'means') - 0.004048582995951417
('means', 'better') - 0.14285714285714285
('better', 'then') - 0.01098901098901099
('star', 'and') - 0.02
('and', 'online') - 0.0003856536829926726
('online', 'with') - 0.06666666666666667
('star', 'without') - 0.02
('pool', 'they') - 0.02127659574468085
('they', 'used') - 0.004081632653061225
('have', 'kiehls') - 0.0022935779816513763
('kiehls', 'toiletries') - 0.5
('toiletries', 'and') - 0.1
('now', 'have') - 0.047619047619047616
('have', 'harmony') - 0.0022935779816513763
('har

('negotiating', 'breakfast') - 0.5
('breakfast', 'we') - 0.01020408163265306
('2', 'children') - 0.009523809523809525
('very', 'inconvenient') - 0.0020964360587002098
('inconvenient', 'instead') - 0.5
('of', 'getting') - 0.0009560229445506692
('getting', 'up') - 0.03571428571428571
('and', 'going') - 0.0003856536829926726
('museum', 'as') - 0.14285714285714285
('had', 'planned') - 0.001851851851851852
('planned', 'we') - 0.25
('up', 'pack') - 0.00546448087431694
('pack', 'up') - 0.3333333333333333
('our', 'suitcases') - 0.0021551724137931034
('suitcases', 'check') - 0.5
('hotel', 'take') - 0.0009652509652509653
('cab', 'check') - 0.043478260869565216
('check', 'back') - 0.012345679012345678
('amalfi', 'only') - 0.038461538461538464
('only', 'then') - 0.00625
('then', 'could') - 0.012987012987012988
('could', 'we') - 0.005649717514124294
('we', 'head') - 0.0008952551477170994
('head', 'to') - 0.18181818181818182
('museum', 'we') - 0.14285714285714285
('s', 'stay') - 0.004629629629629629

('time', 'very') - 0.006756756756756757
('loop', 'directly') - 0.05263157894736842
('directly', 'south') - 0.047619047619047616
('7th', 'day') - 0.25
('day', 'adventist') - 0.008620689655172414
('adventist', 'church') - 1.0
('church', 'my') - 0.5
('upon', 'wacker') - 0.02631578947368421
('wacker', 'drive') - 0.6666666666666666
('drive', 'the') - 0.125
('the', 'aforementioned') - 0.0001886080724254998
('aforementioned', 'church') - 1.0
('church', 'and') - 0.5
('river', 'small') - 0.023255813953488372
('room', 'virtually') - 0.0010775862068965517
('virtually', 'no') - 0.4
('no', 'storage') - 0.0038910505836575876
('space', 'attended') - 0.045454545454545456
('conference', 'with') - 0.02564102564102564
('monaco', 'as') - 0.041666666666666664
('as', 'venue') - 0.003278688524590164
('venue', 'they') - 1.0
('they', 'provided') - 0.0020408163265306124
('provided', 'adequate') - 0.05263157894736842
('adequate', 'service') - 0.2
('problems', 'this') - 0.043478260869565216
('spent', 'with') - 0.

('i', 'visited') - 0.0005844535359438924
('visited', 'this') - 0.25
('magnificent', 'not') - 0.037037037037037035
('only', 'was') - 0.0125
('mini', 'palace') - 0.0625
('palace', 'but') - 1.0
('polite', 'helpful') - 0.09090909090909091
('professional', 'in') - 0.1111111111111111
('email', 'i') - 0.09090909090909091
('received', 'before') - 0.05
('left', 'for') - 0.02127659574468085
('concierge', 'les') - 0.02
('les', 'offered') - 1.0
('us', 'tickets') - 0.004651162790697674
('tickets', 'directions') - 0.25
('directions', 'reservations') - 0.2
('reservations', 'to') - 0.2
('to', 'anything') - 0.0004784688995215311
('anything', 'we') - 0.029411764705882353
('to', 'seedo') - 0.0004784688995215311
('seedo', 'he') - 1.0
('got', 'us') - 0.00819672131147541
('us', 'an') - 0.009302325581395349
('upgrade', 'on') - 0.034482758620689655
('when', 'someone') - 0.003663003663003663
('someone', 'rudely') - 0.037037037037037035
('rudely', 'butted') - 0.3333333333333333
('butted', 'in') - 1.0
('were', '

('sign', 'i') - 0.08333333333333333
('did', 'and') - 0.008097165991902834
('even', 'came') - 0.006711409395973154
('came', 'until') - 0.01818181818181818
('until', 'later') - 0.030303030303030304
('later', 'in') - 0.043478260869565216
('was', 'big') - 0.000547645125958379
('big', 'enough') - 0.023809523809523808
('enough', 'had') - 0.025
('drawers', 'suposedly') - 0.2
('suposedly', 'remodeled') - 1.0
('remodeled', 'but') - 0.3333333333333333
('but', 'carpet') - 0.002053388090349076
('carpet', 'did') - 0.06666666666666667
('nt', 'reach') - 0.002976190476190476
('reach', 'the') - 0.25
('wall', 'toilet') - 0.03333333333333333
('paper', 'holder') - 0.06666666666666667
('holder', 'fell') - 0.5
('off', 'constantly') - 0.0136986301369863
('constantly', 'and') - 0.3333333333333333
('head', 'was') - 0.09090909090909091
('was', 'broken') - 0.002738225629791895
('broken', 'the') - 0.1
('the', 'mirrors') - 0.0003772161448509996
('mirrors', 'were') - 0.5
('water', 'was') - 0.043478260869565216
('ho

('member', 'positives') - 0.05555555555555555
('positives', 'great') - 0.5
('location', 'daytime') - 0.004608294930875576
('daytime', 'staff') - 1.0
('at', 'desk') - 0.0013422818791946308
('two', 'bikes') - 0.0078125
('bikes', 'to') - 1.0
('to', 'borrow') - 0.0004784688995215311
('borrow', 'which') - 1.0
('is', 'handy') - 0.0011947431302270011
('handy', 'views') - 0.2
('views', 'could') - 0.038461538461538464
('higher', 'room') - 0.1
('room', 'checked') - 0.0010775862068965517
('the', 'raddison') - 0.0001886080724254998
('raddison', 'blu') - 1.0
('blu', 'across') - 1.0
('street', 'looked') - 0.01639344262295082
('looked', 'much') - 0.029411764705882353
('much', 'nicer') - 0.023529411764705882
('spacious', 'it') - 0.02631578947368421
('largest', 'flat') - 0.16666666666666666
('tv', 'i') - 0.022727272727272728
('seen', 'in') - 0.2727272727272727
('great', 'excuse') - 0.0028328611898017
('catch', 'up') - 0.3333333333333333
('on', 'some') - 0.0015625
('some', 'movies') - 0.0078125
('movies

('hotel', 'moved') - 0.0009652509652509653
('hotel', '2') - 0.0019305019305019305
('miles', 'away') - 0.3333333333333333
('away', 'in') - 0.07142857142857142
('than', 'desireable') - 0.010101010101010102
('desireable', 'location') - 1.0
('location', 'management') - 0.004608294930875576
('management', 'on') - 0.0625
('duty', 'was') - 0.16666666666666666
('was', 'unfriendly') - 0.000547645125958379
('unfriendly', 'and') - 0.5
('even', 'stated') - 0.006711409395973154
('call', 'security') - 0.015873015873015872
('security', 'if') - 0.05555555555555555
('not', 'leave') - 0.0016286644951140066
('leave', 'they') - 0.05263157894736842
('us', '15') - 0.004651162790697674
('15', 'for') - 0.09090909090909091
('for', 'cab') - 0.0009746588693957114
('fare', 'will') - 0.16666666666666666
('again', 'happened') - 0.006211180124223602
('to', 'numerous') - 0.0004784688995215311
('numerous', 'guests') - 0.25
('same', 'weekend') - 0.02631578947368421
('weekend', 'barely') - 0.015625
('barely', 'average')

('talbot', 'that') - 0.25
('that', 'since') - 0.0014367816091954023
('since', 'they') - 0.05
('small', 'boutique') - 0.026785714285714284
('could', 'provide') - 0.005649717514124294
('provide', 'special') - 0.25
('special', 'personal') - 0.038461538461538464
('personal', 'treatment') - 0.1
('treatment', 'nothing') - 0.3333333333333333
('nothing', 'could') - 0.022222222222222223
('been', 'further') - 0.008928571428571428
('further', 'from') - 0.3333333333333333
('truth', 'they') - 0.25
('they', 'failed') - 0.0020408163265306124
('to', 'honor') - 0.0009569377990430622
('honor', 'two') - 0.125
('two', 'requests') - 0.0078125
('requests', 'to') - 0.09090909090909091
('bar', 'open') - 0.01098901098901099
('open', 'after') - 0.022727272727272728
('our', 'reception') - 0.0021551724137931034
('reception', 'they') - 0.03333333333333333
('they', 'persistently') - 0.0020408163265306124
('persistently', 'confused') - 1.0
('confused', 'reservation') - 0.16666666666666666
('reservation', 'requests')

('big', 'red') - 0.023809523809523808
('red', 'stain') - 0.07692307692307693
('and', 'quicklyhid') - 0.0003856536829926726
('quicklyhid', 'changed') - 1.0
('changed', 'it') - 0.09090909090909091
('we', 'seen') - 0.0008952551477170994
('seen', 'it') - 0.09090909090909091
('he', 'apologized') - 0.011904761904761904
('apologized', 'and') - 0.6666666666666666
('and', 'offered') - 0.0003856536829926726
('bring', 'us') - 0.125
('new', 'ones') - 0.023809523809523808
('ones', 'once') - 0.16666666666666666
('we', 'denied') - 0.0008952551477170994
('denied', 'since') - 0.5
('so', 'late') - 0.0035714285714285713
('late', 'and') - 0.06666666666666667
('just', 'slept') - 0.010471204188481676
('sheets', 'the') - 0.04
('the', 'appeared') - 0.0001886080724254998
('clean', 'then') - 0.007692307692307693
('then', 'next') - 0.012987012987012988
('i', 'eagerly') - 0.0005844535359438924
('eagerly', 'showed') - 1.0
('showed', 'the') - 0.14285714285714285
('the', 'fronk') - 0.0001886080724254998
('fronk', 'd

('basic', 'one') - 0.14285714285714285
('one', 'the') - 0.0043859649122807015
('the', 'softest') - 0.0001886080724254998
('softest', 'sheets') - 1.0
('the', 'technology') - 0.0001886080724254998
('technology', 'in') - 0.5
('great', 'they') - 0.0028328611898017
('nice', 'upscale') - 0.0056179775280898875
('upscale', 'tolietries') - 0.14285714285714285
('tolietries', 'in') - 1.0
('bathrooms', 'for') - 0.05
('modern', 'room') - 0.03125
('still', 'comfortable') - 0.02040816326530612
('and', 'homey') - 0.0003856536829926726
('homey', 'the') - 0.5
('getting', 'wherever') - 0.03571428571428571
('wherever', 'you') - 1.0
('really', 'funky') - 0.00909090909090909
('funky', 'with') - 0.25
('an', 'interesting') - 0.004464285714285714
('interesting', 'martini') - 0.16666666666666666
('martini', 'menu') - 0.25
('menu', 'one') - 0.1111111111111111
('enjoyed', 'it') - 0.06896551724137931
('recommend', 'my') - 0.014492753623188406
('girlfriends', 'and') - 0.4
('stayed', '4') - 0.004310344827586207
('ta

('reservation', 'this') - 0.015625
('was', 'their') - 0.000547645125958379
('mistake', 'they') - 0.07142857142857142
('could', 'sit') - 0.005649717514124294
('hours', 'until') - 0.038461538461538464
('until', 'my') - 0.030303030303030304
('friend', 'arrived') - 0.06666666666666667
('try', 'and') - 0.03125
('and', 'help') - 0.0003856536829926726
('me', 'until') - 0.008097165991902834
('i', 'stood') - 0.0005844535359438924
('stood', 'there') - 1.0
('complained', 'for') - 0.09090909090909091
('for', 'twenty') - 0.0009746588693957114
('minutes', 'i') - 0.020833333333333332
('believe', 'that') - 0.08333333333333333
('manager', 'would') - 0.023809523809523808
('would', 'treat') - 0.0030581039755351682
('treat', 'a') - 0.14285714285714285
('customer', 'this') - 0.02702702702702703
('way', 'there') - 0.01818181818181818
('the', 'aggravation') - 0.0001886080724254998
('aggravation', 'my') - 1.0
('thru', 'their') - 0.1111111111111111
('website', 'when') - 0.037037037037037035
('when', 'called') 

('and', 'tools') - 0.0003856536829926726
('tools', 'we') - 1.0
('are', 'never') - 0.0032258064516129032
('never', 'staying') - 0.009174311926605505
('again', 'other') - 0.006211180124223602
('reviews', 'complain') - 0.02
('major', 'city') - 0.0625
('found', 'our') - 0.031746031746031744
('room', 'quite') - 0.0010775862068965517
('quite', 'comfortable') - 0.0196078431372549
('nice', 'furnishings') - 0.0056179775280898875
('furnishings', 'one') - 0.2
('one', 'feature') - 0.0043859649122807015
('feature', 'we') - 0.16666666666666666
('we', 'liked') - 0.0008952551477170994
('liked', 'is') - 0.1
('the', 'ability') - 0.0001886080724254998
('window', 'the') - 0.04
('the', 'knikerbocker') - 0.0001886080724254998
('knikerbocker', 'is') - 1.0
('hotel', 'right') - 0.0009652509652509653
('avenue', 'in') - 0.037037037037037035
('best', 'area') - 0.014285714285714285
('helpful', 'for') - 0.010309278350515464
('the', '116') - 0.0001886080724254998
('116', 'we') - 1.0
('paid', 'on') - 0.02272727272727

('quite', 'cold') - 0.0196078431372549
('cold', 'and') - 0.1111111111111111
('welcome', 'at') - 0.11764705882352941
('was', 'noisy') - 0.000547645125958379
('noisy', 'thin') - 0.07692307692307693
('hear', 'exactly') - 0.03571428571428571
('next', 'room') - 0.009345794392523364
('is', 'talking') - 0.0011947431302270011
('talking', 'about') - 0.25
('broken', 'too') - 0.05
('a', 'guy') - 0.0004450378282153983
('guy', 'came') - 0.08333333333333333
('took', 'like') - 0.02040816326530612
('anyway', 'given') - 0.2
('given', 'that') - 0.08
('didnt', 'live') - 0.1111111111111111
('expectations', 'after') - 0.05263157894736842
('the', '1st') - 0.0001886080724254998
('1st', 'time') - 0.5
('time', 'at') - 0.006756756756756757
('might', 'give') - 0.05
('try', 'after') - 0.03125
('2nd', 'time') - 0.2857142857142857
('decided', 'i') - 0.043478260869565216
('am', 'really') - 0.014492753623188406
('really', 'not') - 0.00909090909090909
('not', 'coming') - 0.0016286644951140066
('back', 'especially') - 

('space', 'you') - 0.045454545454545456
('upgrade', '2') - 0.034482758620689655
('2', 'since') - 0.009523809523809525
('been', 'recently') - 0.017857142857142856
('recently', 'rehabbed') - 0.03571428571428571
('rehabbed', 'you') - 0.5
('the', 'stuff') - 0.0001886080724254998
('stuff', 'that') - 0.09090909090909091
('the', 'covered') - 0.0001886080724254998
('covered', 'or') - 0.1111111111111111
('or', 'painted') - 0.006134969325153374
('painted', '3') - 0.25
('3', 'there') - 0.015384615384615385
('no', 'views') - 0.0038910505836575876
('hotel', 'now') - 0.0009652509652509653
('the', 'plusses') - 0.0001886080724254998
('plusses', '1') - 1.0
('awesome', 'probably') - 0.09090909090909091
('hotel', 'showers') - 0.0009652509652509653
('showers', 'i') - 0.2
('seen', 'rain') - 0.09090909090909091
('with', 'wand') - 0.0016666666666666668
('wand', 'great') - 0.5
('great', 'pressure') - 0.0028328611898017
('pressure', 'and') - 0.16666666666666666
('the', 'enclosure') - 0.0001886080724254998
('en

('set', 'me') - 0.17647058823529413
('massage', 'when') - 0.2
('reservation', 'at') - 0.046875
('spa', 'i') - 0.1
('requested', 'was') - 0.029411764705882353
('was', 'unavailable') - 0.000547645125958379
('unavailable', 'anthony') - 0.5
('anthony', 'said') - 0.2
('said', 'he') - 0.01639344262295082
('he', 'knew') - 0.023809523809523808
('this', 'spa') - 0.0016835016835016834
('nice', 'he') - 0.0056179775280898875
('he', 'set') - 0.011904761904761904
('and', 'quoted') - 0.0003856536829926726
('quoted', 'me') - 0.5
('of', '125') - 0.0009560229445506692
('125', 'when') - 1.0
('spa', 'which') - 0.1
('was', 'actually') - 0.001095290251916758
('a', 'seedy') - 0.0004450378282153983
('seedy', 'nail') - 1.0
('nail', 'and') - 1.0
('and', 'hair') - 0.0003856536829926726
('hair', 'salon') - 0.14285714285714285
('salon', 'i') - 1.0
('front', 'window') - 0.007936507936507936
('massage', 'for') - 0.2
('was', '100') - 0.000547645125958379
('100', 'with') - 0.08333333333333333
('with', '20') - 0.001666

('whenever', 'we') - 1.0
('return', 'before') - 0.037037037037037035
('did', 'read') - 0.004048582995951417
('was', 'concerned') - 0.000547645125958379
('concerned', 'i') - 0.3333333333333333
('up', 'booking') - 0.00546448087431694
('booking', 'it') - 0.07692307692307693
('it', 'based') - 0.0011198208286674132
('my', 'sisters') - 0.0018796992481203006
('sisters', 'recommendation') - 1.0
('recommendation', 'big') - 1.0
('mistake', 'the') - 0.07142857142857142
('temperature', 'in') - 0.25
('a', 'balmy') - 0.0004450378282153983
('balmy', '99') - 1.0
('99', 'degrees') - 0.2
('degrees', 'and') - 0.125
('temp', 'in') - 0.25
('about', '85') - 0.005555555555555556
('85', 'degrees') - 1.0
('degrees', 'when') - 0.125
('complain', 'at') - 0.07692307692307693
('at', '1100') - 0.0013422818791946308
('1100', 'am') - 1.0
('am', 'they') - 0.014492753623188406
('turn', 'the') - 0.0625
('temp', 'down') - 0.25
('to', '40degrees') - 0.0004784688995215311
('40degrees', 'we') - 1.0
('at', '730pm') - 0.00134

('and', 'iron') - 0.0003856536829926726
('iron', 'the') - 0.14285714285714285
('and', 'anxious') - 0.0003856536829926726
('please', 'i') - 0.14285714285714285
('i', 'recommend') - 0.0011689070718877848
('the', 'strongest') - 0.0001886080724254998
('strongest', 'possible') - 1.0
('possible', 'terms') - 0.125
('terms', 'it') - 0.25
('s', 'expensive') - 0.004629629629629629
('better', 'one') - 0.01098901098901099
('is', 'glossy') - 0.0011947431302270011
('glossy', 'and') - 1.0
('and', 'sexy') - 0.0003856536829926726
('sexy', 'but') - 0.5
('but', 'an') - 0.002053388090349076
('an', 'absolutely') - 0.004464285714285714
('absolutely', 'fantastic') - 0.06896551724137931
('absolutely', 'understand') - 0.034482758620689655
('why', 'tourists') - 0.03571428571428571
('tourists', 'would') - 1.0
('it', 'right') - 0.0033594624860022394
('river', 'cool') - 0.023255813953488372
('cool', 'art') - 0.03333333333333333
('deco', 'building') - 0.5
('street', 'regardless') - 0.01639344262295082
('regardless'

('snobbishness', 'inexcusable') - 1.0
('inexcusable', 'since') - 0.5
('we', 'the') - 0.0008952551477170994
('the', 'customers') - 0.0001886080724254998
('customers', 'are') - 0.14285714285714285
('are', 'paying') - 0.00967741935483871
('paying', 'to') - 0.11764705882352941
('their', 'overpriced') - 0.00909090909090909
('overpriced', 'hotel') - 0.09090909090909091
('hotel', 'perhaps') - 0.0009652509652509653
('most', 'annoying') - 0.02
('annoying', 'aspect') - 0.125
('aspect', 'of') - 1.0
('my', 'latest') - 0.0018796992481203006
('latest', 'stay') - 0.5
('down', '11') - 0.010752688172043012
('11', 'flights') - 0.25
('i', 'waited') - 0.0011689070718877848
('a', 'solid') - 0.0008900756564307966
('solid', '15') - 0.25
('which', 'when') - 0.005681818181818182
('it', 'arrived') - 0.0011198208286674132
('arrived', 'was') - 0.015873015873015872
('was', 'packed') - 0.000547645125958379
('packed', 'to') - 0.3333333333333333
('the', 'brim') - 0.0001886080724254998
('brim', 'i') - 1.0
('is', 'incr

('beat', 'staff') - 0.09090909090909091
('friendly', 'courteous') - 0.009708737864077669
('gym', 'was') - 0.05263157894736842
('was', 'substantial') - 0.000547645125958379
('substantial', 'and') - 1.0
('and', 'though') - 0.0003856536829926726
('nt', 'try') - 0.002976190476190476
('the', 'restaurantsroom') - 0.0001886080724254998
('restaurantsroom', 'service') - 1.0
('the', 'menus') - 0.0001886080724254998
('menus', 'looked') - 0.5
('looked', 'yummy') - 0.029411764705882353
('yummy', 'i') - 0.2
('in', 'chgo') - 0.0007936507936507937
('chgo', 'in') - 1.0
('on', 'avg') - 0.0015625
('avg', '12smo') - 1.0
('12smo', 'over') - 1.0
('past', 'three') - 0.05263157894736842
('three', 'years') - 0.03225806451612903
('is', 'at') - 0.0023894862604540022
('top', 'my') - 0.025
('my', 'list') - 0.0018796992481203006
('list', 'now') - 0.14285714285714285
('recommend', 'i') - 0.014492753623188406
('of', '149') - 0.0009560229445506692
('149', 'i') - 1.0
('was', 'worried') - 0.000547645125958379
('weekend'

('chair', 'in') - 0.06666666666666667
('too', 'low') - 0.014084507042253521
('low', 'for') - 0.07142857142857142
('desk', 'so') - 0.006289308176100629
('an', 'awkward') - 0.004464285714285714
('awkward', 'position') - 1.0
('position', 'while') - 0.5
('while', 'using') - 0.014705882352941176
('using', 'my') - 0.09090909090909091
('computer', 'which') - 0.125
('which', 'gave') - 0.005681818181818182
('back', 'ache') - 0.0078125
('ache', 'later') - 1.0
('later', 'fourth') - 0.043478260869565216
('fourth', 'on') - 0.5
('my', 'key') - 0.0018796992481203006
('key', 'did') - 0.058823529411764705
('one', 'which') - 0.0043859649122807015
('one', 'and') - 0.008771929824561403
('same', 'thing') - 0.02631578947368421
('thing', 'happened') - 0.02631578947368421
('happened', 'thankfully') - 0.05555555555555555
('a', 'security') - 0.0004450378282153983
('meet', 'me') - 0.2
('lobby', 'a') - 0.016129032258064516
('a', 'third') - 0.0004450378282153983
('time', 'then') - 0.006756756756756757
('he', 'came

('hotel', 'yet') - 0.0009652509652509653
('yet', 'but') - 0.058823529411764705
('but', 'am') - 0.002053388090349076
('am', 'already') - 0.014492753623188406
('already', 'disappointed') - 0.05555555555555555
('their', 'customer') - 0.00909090909090909
('intercontinental', 'but') - 0.05
('but', 'yesterday') - 0.002053388090349076
('yesterday', 'i') - 1.0
('read', 'some') - 0.047619047619047616
('some', 'online') - 0.0078125
('online', 'articles') - 0.06666666666666667
('articles', 'advertising') - 0.5
('advertising', 'the') - 1.0
('s', 'cyber') - 0.004629629629629629
('cyber', 'monday') - 1.0
('monday', 'special') - 0.25
('the', 'promo') - 0.0001886080724254998
('promo', 'states') - 1.0
('states', 'that') - 0.5
('you', 'buy') - 0.002242152466367713
('buy', 'a') - 0.3333333333333333
('certificate', 'from') - 0.2
('will', 'match') - 0.006711409395973154
('match', 'that') - 0.5
('that', 'amount') - 0.0014367816091954023
('amount', 'in') - 0.25
('in', 'james') - 0.0007936507936507937
('james

('up', 'call') - 0.00546448087431694
('call', 'but') - 0.015873015873015872
('nobody', 'called') - 0.08333333333333333
('complain', 'the') - 0.07692307692307693
('the', 'told') - 0.0001886080724254998
('phone', 'system') - 0.017543859649122806
('down', 'ordered') - 0.010752688172043012
('wrong', 'order') - 0.08333333333333333
('order', 'the') - 0.07142857142857142
('elevator', 'sounds') - 0.03333333333333333
('sounds', 'like') - 0.3333333333333333
('its', 'last') - 0.016129032258064516
('last', 'breath') - 0.019230769230769232
('breath', 'i') - 1.0
('anyone', 'patrick') - 0.04
('patrick', 'very') - 1.0
('washington', 'just') - 0.2
('business', 'booked') - 0.013888888888888888
('a', 'kingcorner') - 0.0004450378282153983
('kingcorner', 'room') - 1.0
('it', 'largest') - 0.0011198208286674132
('largest', 'room') - 0.16666666666666666
('chicago', 'incredibly') - 0.003048780487804878
('and', 'evening') - 0.0003856536829926726
('evening', 'cocktails') - 0.04
('cocktails', 'were') - 1.0
('nice

('you', 'either') - 0.002242152466367713
('their', 'ostentatious') - 0.00909090909090909
('ostentatious', 'dining') - 1.0
('room', 'or') - 0.0010775862068965517
('or', 'go') - 0.006134969325153374
('go', 'around') - 0.00909090909090909
('corner', 'to') - 0.043478260869565216
('to', 'starbucks') - 0.0004784688995215311
('starbucks', 'or') - 0.125
('or', 'mcdonald') - 0.006134969325153374
('mcdonald', 's') - 1.0
('s', 'internet') - 0.004629629629629629
('free', '129524') - 0.011235955056179775
('129524', 'hours') - 1.0
('shop', 'is') - 0.3333333333333333
('joke', 'tucked') - 0.5
('tucked', 'away') - 0.5
('corner', 'of') - 0.08695652173913043
('the', 'coat') - 0.0001886080724254998
('coat', 'check') - 1.0
('check', 'room') - 0.012345679012345678
('room', 'concierge') - 0.0010775862068965517
('concierge', 'services') - 0.04
('services', 'have') - 0.1
('have', 'limited') - 0.0022935779816513763
('is', 'open') - 0.0023894862604540022
('open', 'only') - 0.022727272727272728
('only', 'during')

('terrific', 'deal') - 0.09090909090909091
('visited', 'for') - 0.25
('my', '40th') - 0.0018796992481203006
('birthday', 'we') - 0.125
('so', 'was') - 0.0035714285714285713
('sure', 'where') - 0.029411764705882353
('stay', 'found') - 0.002457002457002457
('travelzoo', 'great') - 0.2
('rate', 'staff') - 0.019230769230769232
('great', 'enjoyed') - 0.0028328611898017
('still', 'like') - 0.02040816326530612
('like', 'nyc') - 0.006944444444444444
('nyc', 'better') - 0.5
('better', 'great') - 0.01098901098901099
('great', 'great') - 0.0028328611898017
('last', 'march') - 0.019230769230769232
('march', 'it') - 0.2
('pleasant', 'stay') - 0.05
('ok', 'a') - 0.047619047619047616
('a', '27') - 0.0004450378282153983
('27', 'crt') - 0.5
('crt', 'flat') - 1.0
('the', 'coincierge') - 0.0001886080724254998
('coincierge', 'was') - 1.0
('friendly', 'when') - 0.009708737864077669
('very', 'cleaned') - 0.0020964360587002098
('cleaned', 'when') - 0.07692307692307693
('ordered', 'some') - 0.0588235294117647

('fire', 'stairs') - 0.3333333333333333
('stairs', 'up') - 0.125
('the', '10th') - 0.0001886080724254998
('10th', 'floor') - 1.0
('floor', 'me') - 0.0072992700729927005
('me', 'oh') - 0.004048582995951417
('oh', 'wtf') - 0.058823529411764705
('wtf', 'so') - 1.0
('bar', 'on') - 0.01098901098901099
('wait', '15') - 0.034482758620689655
('our', 'fucking') - 0.0021551724137931034
('fucking', 'hotel') - 1.0
('get', 'bent') - 0.005555555555555556
('bent', 'we') - 0.3333333333333333
('here', 'not') - 0.006172839506172839
('just', 'drinking') - 0.005235602094240838
('drinking', 'in') - 0.3333333333333333
('your', 'bar') - 0.009900990099009901
('bar', 'do') - 0.01098901098901099
('you', 'imagine') - 0.002242152466367713
('imagine', 'getting') - 0.09090909090909091
('getting', 'there') - 0.03571428571428571
('and', 'overlooking') - 0.0003856536829926726
('ready', 'with') - 0.043478260869565216
('with', 'used') - 0.0016666666666666668
('used', 'towels') - 0.02857142857142857
('the', 'mouthwash') 

('unusable', 'something') - 1.0
('nt', 'discover') - 0.002976190476190476
('discover', 'until') - 0.16666666666666666
('until', 'caught') - 0.030303030303030304
('an', 'expected') - 0.004464285714285714
('expected', 'downpour') - 0.045454545454545456
('downpour', 'i') - 1.0
('was', 'tempted') - 0.000547645125958379
('tempted', 'to') - 1.0
('to', 'throw') - 0.0004784688995215311
('throw', 'it') - 0.25
('but', 'remembered') - 0.002053388090349076
('it', 'if') - 0.0011198208286674132
('nt', 'return') - 0.002976190476190476
('its', 'total') - 0.016129032258064516
('total', 'lack') - 0.16666666666666666
('of', 'usefulness') - 0.0009560229445506692
('usefulness', 'refrigerator') - 1.0
('refrigerator', 'forget') - 0.25
('forget', 'it') - 0.5
('it', 'shampoo') - 0.0011198208286674132
('shampoo', 'for') - 0.07142857142857142
('a', 'row') - 0.0004450378282153983
('row', 'the') - 1.0
('the', 'housekeeper') - 0.0001886080724254998
('housekeeper', 'left') - 1.0
('left', 'several') - 0.0212765957446

('be', 'available') - 0.003703703703703704
('in', 'she') - 0.0007936507936507937
('she', 'went') - 0.014925373134328358
('far', 'to') - 0.041666666666666664
('to', 'but') - 0.0004784688995215311
('but', 'two') - 0.002053388090349076
('reservation', 'paper') - 0.015625
('paper', 'work') - 0.06666666666666667
('the', 'operator') - 0.0001886080724254998
('operator', 'said') - 0.25
('are', 'is') - 0.0032258064516129032
('one', 'two') - 0.0043859649122807015
('thing', 'we') - 0.02631578947368421
('a', 'fold') - 0.0004450378282153983
('fold', 'up') - 1.0
('up', 'bed') - 0.00546448087431694
('that', 'took') - 0.0014367816091954023
('took', 'away') - 0.02040816326530612
('away', 'half') - 0.017857142857142856
('area', 'now') - 0.014285714285714285
('of', '34900') - 0.0009560229445506692
('34900', 'for') - 1.0
('mess', 'mentioned') - 0.2
('breakfast', 'out') - 0.01020408163265306
('am', 'pretty') - 0.014492753623188406
('pretty', 'sure') - 0.043478260869565216
('include', 'this') - 0.5
('you', 

('0', 'for') - 1.0
('for', 'hotel') - 0.0009746588693957114
('hotel', '1') - 0.0009652509652509653
('the', 'still') - 0.0001886080724254998
('nice', 'service') - 0.0056179775280898875
('service', 'since') - 0.003937007874015748
('fault', 'all') - 0.1111111111111111
('is', 'thank') - 0.0011947431302270011
('goodness', 'my') - 0.3333333333333333
('company', 'won') - 0.0625
('won', 't') - 0.2
('t', 'hold') - 0.3333333333333333
('chicago', 'next') - 0.003048780487804878
('year', 'or') - 0.02702702702702703
('or', 'at') - 0.006134969325153374
('least', 'at') - 0.03571428571428571
('the', 'fitzpatrick') - 0.0001886080724254998
('fitzpatrick', 'in') - 0.5
('early', 'june') - 0.034482758620689655
('june', '2004') - 0.125
('2004', 'for') - 0.3333333333333333
('my', 'birthdaygreat') - 0.0018796992481203006
('birthdaygreat', 'hotel') - 1.0
('location', 'provides') - 0.004608294930875576
('provides', 'easy') - 0.2
('pier', 'marshall') - 0.03333333333333333
('marshall', 'field') - 1.0
('field', 'mi

## Part 4

>  Calculate Unigram total log probability using UNK method by creating Implicit vocabulary with size v and choosing top v words by frequency and replacing rest with UNK. Here size is choosen as 3076 where the bottom tokens have occurence less than frequency of 2.


In [72]:
def calculate_unigram_log_prob_unk_method1(test_data, unigrams):
    total_log_prob = 0
    total_words = sum(unigrams.values())
    unigram_probs = defaultdict(float)
    
    unigrams_sorted = sorted(unigrams, key=unigrams.get, reverse = True)
    
    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0)) / total_words

    unigram_probs['<UNK>'] = (len(unigrams)-3076) / total_words
    

    for word in test_data:
        if word not in unigram_probs:
            word = '<UNK>'
        total_log_prob += math.log(unigram_probs[word])


    return total_log_prob 

> Calculate Unigram total log probability using UNK method as described above and do laplace smoothing on the same.


In [73]:
def calculate_unigram_log_prob_unk_method1_laplace(test_data, unigrams):
    total_log_prob = 0
    total_words = sum(unigrams.values())
    unigram_probs = defaultdict(float)
    vocab_size_new = 3076
    
    unigrams_sorted = sorted(unigrams, key = unigrams.get, reverse = True)

    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0) + 1) / (total_words + vocab_size_new)

    unigram_probs['<UNK>'] = ((len(unigrams) - vocab_size_new) + 1) / (total_words + vocab_size_new)
    
    for word in test_data:
        if word not in unigram_probs:
            word = '<UNK>'
        total_log_prob += math.log(unigram_probs[word])
        
    return total_log_prob

> Calculate Unigram total log probability using UNK method as described above and do add-k smoothing on the same.

In [74]:
def calculate_unigram_log_prob_unk_method1_add_k(test_data, unigrams, k):
    total_log_prob = 0
    total_words = sum(unigrams.values())
    unigram_probs = defaultdict(float)
    vocab_size_new = 3076
     
    unigrams_sorted = sorted(unigrams, key = unigrams.get, reverse=True)

    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0) + k) / (total_words + (k * vocab_size_new))

    unigram_probs['<UNK>'] = ((len(unigrams)-vocab_size_new) + k) / (total_words + ( k * vocab_size_new))
    
    for word in test_data:
        if word not in unigram_probs:
            word = '<UNK>'
        total_log_prob += math.log(unigram_probs[word])
        
    return total_log_prob

> Calculate Unigram total log probability using just laplace or add-k smoothing .

In [75]:
def calculate_unigram_log_prob_with_smoothing(test_data, unigrams, smoothing_method, vocab_size, k):
    total_log_prob = 0

    for word in test_data:
        if smoothing_method == 'laplace':
            prob = (unigrams.get(word, 0) + 1) / (sum(unigrams.values()) + vocab_size)
        elif smoothing_method == 'add-k':
            prob = (unigrams.get(word, 0) + k) / (sum(unigrams.values()) + k * vocab_size)
            
        total_log_prob += math.log(prob)

    return total_log_prob

> Calculate Unigram total log probability using UNK token tagging where we add a UNK tag in unigrams from train dataset where the probabilities are recalculated using laplace smoothing.

In [76]:
def calculate_unigram_log_prob_unk_method2_with_laplace(test_data, unigrams, vocab_size):
    total_log_prob = 0
    total_words = sum(unigrams.values())
    unigram_probs = defaultdict(float)
    
    for word in unigrams:
        unigram_probs[word] = (unigrams.get(word, 0)+ 1) / (total_words + vocab_size)
        
    unigram_probs['<UNK>'] = 1 / (total_words + vocab_size)


    for word in test_data:
        if word not in unigrams:
            word = '<UNK>'
        total_log_prob += math.log(unigram_probs[word])
    
    return total_log_prob

> Calculate Unigram total log probability using UNK token tagging where we add a UNK tag in unigrams from train dataset where the probabilities are recalculated using add-k smoothing.

In [77]:
def calculate_unigram_log_prob_unk_method2_add_k(test_data, unigrams, vocab_size, k):
    total_log_prob = 0
    total_words = sum(unigrams.values())
    total_words = sum(unigrams.values())
    unigram_probs = defaultdict(float)
    
    for word in unigrams:
        unigram_probs[word] = (unigrams.get(word, 0) + k) / (total_words + k * vocab_size)
        
    unigram_probs['<UNK>'] = ( k )/(total_words +  k * vocab_size)


    for word in test_data:
        if word not in unigrams:
            word = '<UNK>'
        total_log_prob += math.log(unigram_probs[word])

    return total_log_prob

> Calculate Bigram total log probability using UNK token tagging method by creating Implicit vocabulary with size v and choosing top v words by frequency and replacing rest with UNK. Here size is choosen as 3076 where the bottom tokens have occurence less than frequency of 2.

In [78]:
def calculate_bigram_log_prob_unk_method1(test_data, bigrams, unigrams):
    total_log_prob = 0
    N = len(test_data)
    unigram_probs = defaultdict(float)
    bigram_probs = defaultdict(float)
    bigrams_new = defaultdict(float)
    vocab_size_new = 3076
    
    unigrams_sorted = sorted(unigrams, key = unigrams.get, reverse = True)
    
    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0)) + 1
        
    unigram_probs['<UNK>'] = (len(unigrams) - vocab_size_new)
    
    for bigram in bigrams:
        prev_word,current_word = bigram
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
        bigram = (prev_word, current_word)
        
        bigrams_new[bigram] = bigrams_new.get(bigram, 0) + 1

        
    for i in range(1, N):
        prev_word = test_data[i - 1]
        current_word = test_data[i]
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
        
        bigram = (prev_word, current_word)
        
        if bigram not in bigrams_new:
            bigram = ('<UNK>', '<UNK>')
        bigram_probs[bigram] = bigrams_new.get(bigram, 0) / unigram_probs[prev_word]
        total_log_prob += math.log(bigram_probs[bigram])
        
    return total_log_prob

> Using the same above method calculated bigrams total log probability of test_data with laplace smoothing.

In [79]:
def calculate_bigram_log_prob_unk_method1_laplace(test_data, bigrams, unigrams):
    total_log_prob = 0
    N = len(test_data)
    unigram_probs = defaultdict(float)
    bigram_probs = defaultdict(float)
    bigrams_new = defaultdict(float)
    vocab_size_new = 3076
    
    unigrams_sorted = sorted(unigrams, key = unigrams.get, reverse = True)
    
    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0)) + 1

        
    unigram_probs['<UNK>'] = (len(unigrams) - vocab_size_new)
    
    for bigram in bigrams:
        prev_word, current_word = bigram
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
        bigram = (prev_word, current_word)
        
        bigrams_new[bigram] = bigrams_new.get(bigram, 0) + 1

        
    for i in range(1, N):
        prev_word = test_data[i - 1]
        current_word = test_data[i]
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
            
        bigram = (prev_word, current_word)
        
        if bigram not in bigrams_new:
            bigram = ('<UNK>', '<UNK>')
            
        bigram_probs[bigram] = (bigrams_new.get(bigram, 0) + 1) / (unigram_probs[prev_word] + vocab_size_new)
        total_log_prob += math.log(bigram_probs[bigram])

    return total_log_prob

> Using the same above method calculated bigrams total log probability of test_data with add-k smoothing.

In [80]:
def calculate_bigram_log_prob_unk_method1_add_k(test_data, bigrams, unigrams, k):
    total_log_prob = 0
    N = len(test_data)
    unigram_probs = defaultdict(float)
    bigram_probs = defaultdict(float)
    bigrams_new = defaultdict(float)
    vocab_size_new = 3076
    
    unigrams_sorted = sorted(unigrams, key = unigrams.get, reverse = True)
    
    for i in range(3076):
        word = unigrams_sorted[i]
        unigram_probs[word] = (unigrams.get(word, 0)) + 1

        
    unigram_probs['<UNK>'] = (len(unigrams) - vocab_size_new)

    
    for bigram in bigrams:
        prev_word, current_word = bigram
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
        bigram = (prev_word, current_word)
        
        bigrams_new[bigram] = bigrams_new.get(bigram,0) + 1

        
    for i in range(1, N):
        prev_word = test_data[i - 1]
        current_word = test_data[i]
        if prev_word not in unigram_probs:
            prev_word = '<UNK>'
        if current_word not in unigram_probs:
            current_word = '<UNK>'
        
        bigram = (prev_word, current_word)
        
        if bigram not in bigrams_new:
            bigram = ('<UNK>', '<UNK>')
        bigram_probs[bigram] = (bigrams_new.get(bigram) + k) / (unigram_probs[prev_word] + k * vocab_size_new)
        total_log_prob += math.log(bigram_probs[bigram])

    return total_log_prob

> Calculate total log probabilities using just laplace and add-k smoothing

In [81]:
def calculate_bigram_log_prob_smoothing(test_data, unigrams, bigrams, vocab_size, smoothing_method, k):
    total_log_prob = 0
    N = len(test_data)

    for i in range(1, N):
        prev_word = test_data[i - 1]
        current_word = test_data[i]

        if smoothing_method == 'laplace':
            prob = (bigrams.get((prev_word, current_word), 0) + 1) / (unigrams.get(prev_word, 0) + vocab_size)
        elif smoothing_method == 'add-k':
            prob = (bigrams.get((prev_word, current_word), 0) + k) / (unigrams.get(prev_word, 0) + k * vocab_size)

        total_log_prob += math.log(prob)
        
    return total_log_prob

## Part 5

> To calculate perplexity from total log probability and size of test data

In [82]:
def calculate_perplexity(total_log_prob, N):
    
    return math.exp(-total_log_prob / N)
    

> Calculating total log probabilites by invoking the already defined methods for unigrams

In [83]:
log_prob_unigram_unk_method1 = calculate_unigram_log_prob_unk_method1(test_data, unigrams)
log_prob_unigram_unk_method1_laplace = calculate_unigram_log_prob_unk_method1_laplace(test_data, unigrams)
log_prob_unigram_unk_method1_add_k = calculate_unigram_log_prob_unk_method1_add_k(test_data, unigrams, 0.5)
log_prob_unigram_laplace_smoothing = calculate_unigram_log_prob_with_smoothing(test_data, unigrams, 'laplace', vocab_size, 1)
log_prob_unigram_add_k_smoothing = calculate_unigram_log_prob_with_smoothing(test_data, unigrams, 'add-k', vocab_size, 0.5)
log_prob_unigram_unk_method2_laplace = calculate_unigram_log_prob_unk_method2_with_laplace(test_data, unigrams, vocab_size)
log_prob_unigram_unk_method2_add_k = calculate_unigram_log_prob_unk_method2_add_k(test_data, unigrams, vocab_size, 0.5)

> Calculating perplexity by invoking the already defined perplexity method for unigrams

In [84]:
perplexity_unigram_unk_method1 = calculate_perplexity(log_prob_unigram_unk_method1, N)
perplexity_unigram_unk_method1_laplace = calculate_perplexity(log_prob_unigram_unk_method1_laplace, N)
perplexity_unigram_unk_method1_add_k = calculate_perplexity(log_prob_unigram_unk_method1_add_k, N)
perplexity_unigram_laplace_smoothing = calculate_perplexity(log_prob_unigram_laplace_smoothing, N)
perplexity_unigram_add_k_smoothing = calculate_perplexity(log_prob_unigram_add_k_smoothing, N)
perplexity_unigram_unk_method2_laplace = calculate_perplexity(log_prob_unigram_unk_method2_laplace, N)
perplexity_unigram_unk_method2_add_k = calculate_perplexity(log_prob_unigram_unk_method2_add_k, N)

In [85]:
print(f"Unigram Perplexity with unk method 1: {perplexity_unigram_unk_method1}")
print(f"Unigram Perplexity with unk method 1 and laplace: {perplexity_unigram_unk_method1_laplace}")
print(f"Unigram Perplexity with unk method 1 and add-k: {perplexity_unigram_unk_method1_add_k}")
print(f"Unigram Perplexity with laplace smoothing: {perplexity_unigram_laplace_smoothing}")
print(f"Unigram Perplexity with add-k smoothing: {perplexity_unigram_add_k_smoothing}")
print(f"Unigram Perplexity with unk-laplace method 2: {perplexity_unigram_unk_method2_laplace}")
print(f"Unigram Perplexity with unk-add-k method 2: {perplexity_unigram_unk_method2_add_k}")

Unigram Perplexity with unk method 1: 331.65965712657663
Unigram Perplexity with unk method 1 and laplace: 334.572345391625
Unigram Perplexity with unk method 1 and add-k: 332.9410792379095
Unigram Perplexity with laplace smoothing: 550.4715746437497
Unigram Perplexity with add-k smoothing: 555.2862387883032
Unigram Perplexity with unk-laplace method 2: 550.4715746437497
Unigram Perplexity with unk-add-k method 2: 555.2862387883032


> Calculating total log probabilites by invoking the already defined methods for bigrams

In [86]:
log_prob_bigram_unk_method1 = calculate_bigram_log_prob_unk_method1(test_data, bigrams, unigrams)
log_prob_bigram_unk_method1_laplace = calculate_bigram_log_prob_unk_method1_laplace(test_data, bigrams, unigrams)
log_prob_bigram_unk_method1_add_k = calculate_bigram_log_prob_unk_method1_add_k(test_data, bigrams, unigrams, 0.5)
log_prob_bigram_laplace_smoothing = calculate_bigram_log_prob_smoothing(test_data, unigrams, bigrams, vocab_size, 'laplace', 1)
log_prob_bigram_add_k_smoothing1 = calculate_bigram_log_prob_smoothing(test_data, unigrams, bigrams, vocab_size, 'add-k', 0.5)

> Calculating perplexity by invoking the already defined perplexity method for unigrams

In [87]:
perplexity_bigram_unk_method1 = calculate_perplexity(log_prob_bigram_unk_method1, N)
perplexity_bigram_unk_method1_laplace = calculate_perplexity(log_prob_bigram_unk_method1_laplace, N)
perplexity_bigram_unk_method1_add_k = calculate_perplexity(log_prob_bigram_unk_method1_add_k, N)
perplexity_bigram_laplace_smoothing = calculate_perplexity(log_prob_bigram_laplace_smoothing, N)
perplexity_bigram_add_k_smoothing1 = calculate_perplexity(log_prob_bigram_add_k_smoothing1, N)

In [88]:
print(f"Bigram Perplexity with unk method 1: {perplexity_bigram_unk_method1}")
print(f"Bigram Perplexity with unk method 1 and laplace: {perplexity_bigram_unk_method1_laplace}")
print(f"Bigram Perplexity with unk method 1 and add-k: {perplexity_bigram_unk_method1_add_k}")
print(f"Bigram Perplexity with laplace smoothing: {perplexity_bigram_laplace_smoothing}")
print(f"Bigram Perplexity with add-k smoothing 1: {perplexity_bigram_add_k_smoothing1}")

Bigram Perplexity with unk method 1: 44.64114699183954
Bigram Perplexity with unk method 1 and laplace: 447.9366911925982
Bigram Perplexity with unk method 1 and add-k: 316.6566028361528
Bigram Perplexity with laplace smoothing: 1320.2702645843851
Bigram Perplexity with add-k smoothing 1: 975.3721300180292


## Calculating perplexities on train data for uni and bigrams

In [89]:
log_prob_unigram_unk_method1_train = calculate_unigram_log_prob_unk_method1(train_data, unigrams)
log_prob_unigram_unk_method1_laplace_train = calculate_unigram_log_prob_unk_method1_laplace(train_data, unigrams)
log_prob_unigram_unk_method1_add_k_train = calculate_unigram_log_prob_unk_method1_add_k(train_data, unigrams, 0.5)
log_prob_unigram_laplace_smoothing_train = calculate_unigram_log_prob_with_smoothing(train_data, unigrams, 'laplace', vocab_size, 1)
log_prob_unigram_add_k_smoothing_train = calculate_unigram_log_prob_with_smoothing(train_data, unigrams, 'add-k', vocab_size, 0.5)
log_prob_unigram_unk_method2_laplace_train = calculate_unigram_log_prob_unk_method2_with_laplace(train_data, unigrams, vocab_size)
log_prob_unigram_unk_method2_add_k_train = calculate_unigram_log_prob_unk_method2_add_k(train_data, unigrams, vocab_size, 0.5)

In [90]:
perplexity_unigram_unk_method1_train = calculate_perplexity(log_prob_unigram_unk_method1_train, n)
perplexity_unigram_unk_method1_laplace_train = calculate_perplexity(log_prob_unigram_unk_method1_laplace_train, n)
perplexity_unigram_unk_method1_add_k_train = calculate_perplexity(log_prob_unigram_unk_method1_add_k_train, n)
perplexity_unigram_laplace_smoothing_train = calculate_perplexity(log_prob_unigram_laplace_smoothing_train, n)
perplexity_unigram_add_k_smoothing_train = calculate_perplexity(log_prob_unigram_add_k_smoothing_train, n)
perplexity_unigram_unk_method2_laplace_train = calculate_perplexity(log_prob_unigram_unk_method2_laplace_train, n)
perplexity_unigram_unk_method2_add_k_train = calculate_perplexity(log_prob_unigram_unk_method2_add_k_train, n)

In [91]:
print(f"Unigram Perplexity with unk method 1 train: {perplexity_unigram_unk_method1_train}")
print(f"Unigram Perplexity with unk method 1 and laplace train: {perplexity_unigram_unk_method1_laplace_train}")
print(f"Unigram Perplexity with unk method 1 and add-k train: {perplexity_unigram_unk_method1_add_k_train}")
print(f"Unigram Perplexity with laplace smoothing train: {perplexity_unigram_laplace_smoothing_train}")
print(f"Unigram Perplexity with add-k smoothing train: {perplexity_unigram_add_k_smoothing_train}")
print(f"Unigram Perplexity with unk-laplace train method 2: {perplexity_unigram_unk_method2_laplace_train}")
print(f"Unigram Perplexity with unk-add-k train method 2: {perplexity_unigram_unk_method2_add_k_train}")

Unigram Perplexity with unk method 1 train: 381.20140975076384
Unigram Perplexity with unk method 1 and laplace train: 382.4474713719824
Unigram Perplexity with unk method 1 and add-k train: 381.5502932620644
Unigram Perplexity with laplace smoothing train: 529.447063921777
Unigram Perplexity with add-k smoothing train: 524.6629051608547
Unigram Perplexity with unk-laplace train method 2: 529.447063921777
Unigram Perplexity with unk-add-k train method 2: 524.6629051608547


In [92]:
log_prob_bigram_unk_method1_train = calculate_bigram_log_prob_unk_method1(train_data, bigrams, unigrams)
log_prob_bigram_unk_method1_laplace_train = calculate_bigram_log_prob_unk_method1_laplace(train_data, bigrams, unigrams)
log_prob_bigram_unk_method1_add_k_train = calculate_bigram_log_prob_unk_method1_add_k(train_data, bigrams, unigrams, 0.5)
log_prob_bigram_laplace_smoothing_train = calculate_bigram_log_prob_smoothing(train_data, unigrams, bigrams, vocab_size, 'laplace', 1)
log_prob_bigram_add_k_smoothing_train1 = calculate_bigram_log_prob_smoothing(train_data, unigrams, bigrams, vocab_size, 'add-k', 0.5)


In [93]:
perplexity_bigram_unk_method1_train = calculate_perplexity(log_prob_bigram_unk_method1_train, n)
perplexity_bigram_unk_method1_laplace_train = calculate_perplexity(log_prob_bigram_unk_method1_laplace_train, n)
perplexity_bigram_unk_method1_add_k_train = calculate_perplexity(log_prob_bigram_unk_method1_add_k_train, n)
perplexity_bigram_laplace_smoothing_train = calculate_perplexity(log_prob_bigram_laplace_smoothing_train, n)
perplexity_bigram_add_k_smoothing1_train = calculate_perplexity(log_prob_bigram_add_k_smoothing_train1, n)

In [94]:
print(f"Bigram Perplexity with unk method 1 train: {perplexity_bigram_unk_method1_train}")
print(f"Bigram Perplexity with unk method 1 and laplace train: {perplexity_bigram_unk_method1_laplace_train}")
print(f"Bigram Perplexity with unk method 1 and add-k train: {perplexity_bigram_unk_method1_add_k_train}")
print(f"Bigram Perplexity with laplace smoothing train: {perplexity_bigram_laplace_smoothing_train}")
print(f"Bigram Perplexity with add-k smoothing 1 train: {perplexity_bigram_add_k_smoothing1_train}")


Bigram Perplexity with unk method 1 train: 174.3369949437968
Bigram Perplexity with unk method 1 and laplace train: 1621.2261744384784
Bigram Perplexity with unk method 1 and add-k train: 1233.1386552876177
Bigram Perplexity with laplace smoothing train: 981.1156826539259
Bigram Perplexity with add-k smoothing 1 train: 620.2636955844678
