In [1]:
import nltk
import pandas as pd
import re
import time
import sys
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer


In [2]:
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("punkt")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
df=pd.read_csv('Disaster_Tweet.csv')
df.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [4]:
pat1 = r'@[A-Za-z0-9]+'
pat2 = r'https?://[A-Za-z0-9./]+'
combined_pat =r'|'.join((pat1,pat2))
pat3 = r'[^a-zA-Z]'
combined_pat2 = r'|'.join((combined_pat,pat3))

In [5]:
res = []

for i in range(0, len(df['text'])):
  
  # remove patterns that follow the regex mentioned
  tweets = re.sub(combined_pat2,' ',df['text'][i])
  
  # split sentence based on spaces(words)
  tweets = tweets.split()

  # remove stop words
  tweets = [word for word in tweets if word not in set(stopwords.words('english'))]
  
  tweets = ' '.join(tweets)
  res.append(tweets)

res[:10]

['Just happened terrible car crash',
 'Heard earthquake different cities stay safe everyone',
 'forest fire spot pond geese fleeing across street I cannot save',
 'Apocalypse lighting Spokane wildfires',
 'Typhoon Soudelor kills China Taiwan',
 'We shaking It earthquake',
 'They probably still show life Arsenal yesterday eh EH',
 'Hey How',
 'What nice hat',
 'Fuck']

In [6]:
tokens = " \n ".join(res)
tokens = word_tokenize(tokens)
tokens[:10]

['Just',
 'happened',
 'terrible',
 'car',
 'crash',
 'Heard',
 'earthquake',
 'different',
 'cities',
 'stay']

In [7]:
inv_ind = {}
for i in range(10):
  check = res[i]
  for item in tokens:
    if item in check:
      if item not in inv_ind.keys():
        inv_ind[item] = set()
      inv_ind[item].add(i)
for i in sorted(inv_ind.keys()):
  print(i, ":", inv_ind[i])

A : {3, 6}
Apocalypse : {3}
Arsenal : {6}
C : {4}
Ch : {4}
China : {4}
E : {6}
EH : {6}
F : {9}
Fuck : {9}
H : {1, 6, 7}
He : {1, 7}
Hear : {1}
Heard : {1}
Hey : {7}
How : {7}
I : {2, 5}
It : {5}
J : {0}
Jus : {0}
Just : {0}
S : {3, 4}
So : {4}
Soudelor : {4}
Spokane : {3}
T : {4, 6}
Ta : {4}
Taiwan : {4}
Th : {6}
The : {6}
They : {6}
Typhoon : {4}
W : {8, 5}
We : {5}
What : {8}
across : {2}
af : {1}
ap : {0}
app : {0}
art : {1, 5}
ash : {0}
b : {0, 6}
c : {0, 1, 2, 3, 8, 9}
can : {2}
cann : {2}
car : {0}
cities : {1}
crash : {0}
cross : {2}
da : {6}
day : {6}
de : {4}
diff : {1}
different : {1}
e : {0, 1, 2, 3, 4, 5, 6, 7, 8}
earth : {1, 5}
earthquake : {1, 5}
ed : {0}
eh : {6}
el : {4}
en : {0, 1, 6}
est : {2, 6}
ever : {1}
every : {1}
everyone : {1}
f : {1, 2, 3, 6}
fe : {1, 6}
fire : {2, 3}
fires : {3}
fl : {2}
fleeing : {2}
fo : {2}
forest : {2}
g : {2, 3, 5}
geese : {2}
h : {0, 1, 3, 4, 5, 6, 8}
happen : {0}
happened : {0}
hat : {8}
hey : {6}
ice : {8}
ie : {1}
ight : {3}
ill : {

In [8]:
casefolded = []
for tweet in res:
  casefolded.append(tweet.casefold())
casefolded[:10] 

['just happened terrible car crash',
 'heard earthquake different cities stay safe everyone',
 'forest fire spot pond geese fleeing across street i cannot save',
 'apocalypse lighting spokane wildfires',
 'typhoon soudelor kills china taiwan',
 'we shaking it earthquake',
 'they probably still show life arsenal yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [9]:
lemmatizer = WordNetLemmatizer()
lmntz = [[lemmatizer.lemmatize(word) for word in word_tokenize(item)] for item in casefolded]
lemmatized = []
for item in lmntz:
  s = ""
  for words in item:
    s = s + words + " "
  lemmatized.append(s[:-1])
lemmatized[:10]

['just happened terrible car crash',
 'heard earthquake different city stay safe everyone',
 'forest fire spot pond goose fleeing across street i can not save',
 'apocalypse lighting spokane wildfire',
 'typhoon soudelor kill china taiwan',
 'we shaking it earthquake',
 'they probably still show life arsenal yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [10]:
ps = PorterStemmer()
stemmed = []
stm = [[ps.stem(word) for word in word_tokenize(item)] for item in lemmatized]
for item in stm:
  s = ""
  for words in item:
    s = s + words + " "
  stemmed.append(s[:-1])
stemmed[:10]

['just happen terribl car crash',
 'heard earthquak differ citi stay safe everyon',
 'forest fire spot pond goos flee across street i can not save',
 'apocalyps light spokan wildfir',
 'typhoon soudelor kill china taiwan',
 'we shake it earthquak',
 'they probabl still show life arsen yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [11]:
inv_ind2 = {}
for i in range(10):
  check = stemmed[i]
  for item in stemmed[i].split():
    if item in check:
      if item not in inv_ind2.keys():
        inv_ind2[item] = set()
      inv_ind2[item].add(i)
for i in sorted(inv_ind2.keys()):
  print(i, ":", inv_ind2[i])

across : {2}
apocalyps : {3}
arsen : {6}
can : {2}
car : {0}
china : {4}
citi : {1}
crash : {0}
differ : {1}
earthquak : {1, 5}
eh : {6}
everyon : {1}
fire : {2}
flee : {2}
forest : {2}
fuck : {9}
goos : {2}
happen : {0}
hat : {8}
heard : {1}
hey : {7}
how : {7}
i : {2}
it : {5}
just : {0}
kill : {4}
life : {6}
light : {3}
nice : {8}
not : {2}
pond : {2}
probabl : {6}
safe : {1}
save : {2}
shake : {5}
show : {6}
soudelor : {4}
spokan : {3}
spot : {2}
stay : {1}
still : {6}
street : {2}
taiwan : {4}
terribl : {0}
they : {6}
typhoon : {4}
we : {5}
what : {8}
wildfir : {3}
yesterday : {6}


In [12]:
pos_list = {}
for i in range(100):
  check = stemmed[i]
  for item in stemmed[i].split():
    if item in check:
      if item not in pos_list.keys():
        pos_list[item] = set()
      pos_list[item].add(i)
for i in sorted(pos_list.keys()):
  print(i, ":", pos_list[i])

a : {34, 39, 43, 23, 62}
abl : {81}
ablaz : {15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28}
accid : {29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 79}
across : {2}
afford : {51}
aftershock : {44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 57, 58, 59}
age : {64}
ago : {62, 71}
air : {69, 72, 73, 74, 75, 84}
airplan : {64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 61, 62, 63}
airport : {69, 63}
alfon : {26}
all : {99}
ambul : {75, 76, 77, 78, 79, 80, 82, 83, 84, 85, 86}
amp : {97, 91, 46, 20, 57, 27, 61}
an : {76}
and : {81, 41}
anger : {28}
anim : {35}
annihil : {96, 97, 98, 99, 87, 90, 91, 92, 93, 94, 95}
apocalyps : {3}
appropri : {95}
arsen : {6}
aug : {98}
auth : {97}
automat : {77}
aviat : {60}
away : {85, 30}
awesom : {14}
back : {29}
badg : {31}
band : {51}
bargain : {55}
bartend : {87}
basebal : {71}
basement : {55}
bay : {45}
bbc : {15}
be : {81}
becam : {61}
becom : {35, 95}
begin : {72, 73, 74}
belli : {76}
belov :

In [13]:
def AND(l1, l2):
  common = []
  for i in l1:
    if i in l2:
      common.append(i)
  print("AND :", common)

def OR(l1, l2):
  union = []
  for i in l1:
    union.append(i)
  for i in l2:
    if i not in union:
      union.append(i)
  print("OR :", union)

def NOT(l1):
  res = []
  for i in range(1, 101):
    if i not in l1:
      res.append(i)
  print("NOT :", res)

AND(pos_list["week"], pos_list["what"])
OR(pos_list["week"], pos_list["what"])
NOT(pos_list["week"])

AND : []
OR : [40, 92, 63, 8, 13]
NOT : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100]


In [14]:
pos_list2 = {}
for i in range(100):
  check = stemmed[i]
  for item in stemmed[i].split():
    if item not in pos_list2.keys():
      pos_list2[item] = {}
    if i not in pos_list2[item].keys():
      pos_list2[item][i] = set()
    pos_list2[item][i].add(stemmed[i].index(item))
pos_list_res = {}
for key in sorted(pos_list2.keys()):
  pos_list_res[key] = []
  pos_list_res[key].append(len(pos_list2[key]))
  pos_list_res[key].append(pos_list2[key])
for key, value in pos_list_res.items():
  print(key, ":",  value)

a : [5, {23: {1}, 34: {7}, 39: {29}, 43: {3}, 62: {0}}]
abl : [1, {81: {27}}]
ablaz : [12, {15: {27}, 16: {16}, 17: {68}, 20: {12}, 21: {9}, 22: {65}, 23: {19}, 24: {11}, 25: {29}, 26: {14}, 27: {79}, 28: {9}}]
accid : [31, {29: {0}, 30: {76}, 31: {17}, 32: {68}, 33: {48}, 34: {11}, 35: {49}, 36: {43}, 37: {59}, 38: {17}, 39: {53}, 40: {15}, 41: {0}, 42: {7}, 43: {32}, 60: {25}, 61: {29}, 62: {19}, 63: {52}, 64: {16}, 65: {8}, 66: {0}, 67: {8}, 68: {30}, 69: {14}, 70: {15}, 71: {46}, 72: {72}, 73: {84}, 74: {72}, 79: {37}}]
across : [1, {2: {32}}]
afford : [1, {51: {49}}]
aftershock : [13, {44: {48}, 45: {8}, 46: {10}, 47: {11}, 48: {5}, 49: {11}, 51: {10}, 52: {14}, 53: {11}, 54: {0}, 57: {27}, 58: {16}, 59: {0}}]
age : [1, {64: {101}}]
ago : [2, {62: {38}, 71: {5}}]
air : [6, {69: {24}, 72: {26}, 73: {38}, 74: {26}, 75: {26}, 84: {26}}]
airplan : [13, {61: {35}, 62: {11}, 63: {44}, 64: {8}, 65: {30}, 66: {13}, 67: {30}, 68: {22}, 70: {2}, 71: {38}, 72: {26}, 73: {38}, 74: {26}}]
airp

kira : [1, {43: {56}}]
know : [1, {63: {39}}]
korean : [1, {69: {39}}]
l : [1, {48: {27}}]
la : [1, {45: {19}}]
larg : [1, {32: {82}}]
last : [1, {48: {29}}]
laugh : [1, {78: {50}}]
lead : [1, {60: {14}}]
left : [1, {43: {38}}]
legal : [1, {36: {4}}]
len : [1, {32: {24}}]
leon : [1, {20: {25}}]
let : [1, {27: {10}}]
lew : [1, {34: {24}}]
lez : [1, {77: {48}}]
liber : [1, {97: {54}}]
life : [1, {6: {24}}]
light : [1, {3: {10}}]
like : [5, {10: {5}, 38: {7}, 42: {15}, 44: {6}, 78: {21}}]
line : [1, {63: {17}}]
listen : [1, {51: {34}}]
live : [1, {52: {9}}]
lol : [1, {61: {43}}]
lost : [1, {71: {23}}]
loud : [1, {46: {63}}]
love : [3, {30: {9}, 61: {66}, 95: {18}}]
luggag : [1, {66: {30}}]
m : [2, {39: {3}, 76: {2}}]
makeup : [1, {66: {64}}]
makinwa : [1, {17: {25}}]
man : [4, {21: {5}, 65: {14}, 67: {14}, 87: {31}}]
manga : [1, {35: {72}}]
map : [1, {56: {21}}]
market : [1, {15: {20}}]
marriag : [1, {17: {33}}]
mayor : [1, {27: {48}}]
medic : [2, {36: {10}, 82: {10}}]
met : [1, {40: {11}

In [15]:
phrase = "who say american dream dead"
words = phrase.split()
ind = []
ids = []
pos = []
try:
  for i in pos_list_res[words[0]][1].keys():
    ind.append(i)
  for i in ind:
    ids.append(i)
  test = 0
  for i in ind:
    temp = []
    for j in pos_list_res[words[0]][1].values():
      for t in j:
        temp.append(t)
    for j in temp:
      input = j
      pos = []
      if test == len(ind):
        break
      pos.append(ind[test])
      test += 1
      for k in range(len(words)):
        pos.append(input)
        input += 1 + len(words[k])
      for k in range(1, len(pos)):
        checking = []
        for ele in pos_list_res[words[k - 1]][1].values():
          for enter in ele:
            checking.append(enter)
        if pos[k] not in checking:
          if pos[0] in ids:
            ids.remove(pos[0])
  print("Phrase is present in:", ids)
except:
  print("Phrase has words that aren't present in any of the documents")

Phrase has words that aren't present in any of the documents


In [16]:
newsgroups_train=pd.read_csv('Disaster_Tweet.csv')

In [17]:
print(type(newsgroups_train))
print(list(newsgroups_train.text))

<class 'pandas.core.frame.DataFrame'>


In [18]:
newsgroups_train = pd.read_csv('Disaster_Tweet.csv')
# docs is list of documents of the class sci.space (emails)
docs = newsgroups_train["text"]

In [19]:
for i in newsgroups_train.text[:1]:
  print(i)

Just happened a terrible car crash


In [20]:
for i in newsgroups_train.text[:5]:
  print(word_tokenize(i))

['Just', 'happened', 'a', 'terrible', 'car', 'crash']
['Heard', 'about', '#', 'earthquake', 'is', 'different', 'cities', ',', 'stay', 'safe', 'everyone', '.']
['there', 'is', 'a', 'forest', 'fire', 'at', 'spot', 'pond', ',', 'geese', 'are', 'fleeing', 'across', 'the', 'street', ',', 'I', 'can', 'not', 'save', 'them', 'all']
['Apocalypse', 'lighting', '.', '#', 'Spokane', '#', 'wildfires']
['Typhoon', 'Soudelor', 'kills', '28', 'in', 'China', 'and', 'Taiwan']


In [21]:
pat1 = r'@[A-Za-z0-9]+'
pat2 = r'https?://[A-Za-z0-9./]+'
combined_pat =r'|'.join((pat1,pat2))
pat3 = r'[^a-zA-Z]'
combined_pat2 = r'|'.join((combined_pat,pat3))

result = []

for i in range(0, len(docs)):
  
  # remove patterns that follow the regex mentioned
  emails = re.sub(combined_pat2,' ',docs[i])
  
  # split sentence based on spaces(words)
  emails = emails.split()

  # remove stop words
  emails = [word for word in emails if word not in set(stopwords.words('english'))]
  
  emails = ' '.join(emails)
  result.append(emails)

result[:10]

['Just happened terrible car crash',
 'Heard earthquake different cities stay safe everyone',
 'forest fire spot pond geese fleeing across street I cannot save',
 'Apocalypse lighting Spokane wildfires',
 'Typhoon Soudelor kills China Taiwan',
 'We shaking It earthquake',
 'They probably still show life Arsenal yesterday eh EH',
 'Hey How',
 'What nice hat',
 'Fuck']

In [22]:
tokens1 = " \n ".join(result)
tokens1 = word_tokenize(tokens1)
tokens1[:10]

['Just',
 'happened',
 'terrible',
 'car',
 'crash',
 'Heard',
 'earthquake',
 'different',
 'cities',
 'stay']

In [23]:
casefolded = []
for tweet in result:
  casefolded.append(tweet.casefold())
casefolded[:10]

['just happened terrible car crash',
 'heard earthquake different cities stay safe everyone',
 'forest fire spot pond geese fleeing across street i cannot save',
 'apocalypse lighting spokane wildfires',
 'typhoon soudelor kills china taiwan',
 'we shaking it earthquake',
 'they probably still show life arsenal yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [24]:
lemmatizer = WordNetLemmatizer()
lmntz = [[lemmatizer.lemmatize(word) for word in word_tokenize(item)] for item in casefolded]
lemmatized = []
for item in lmntz:
  s = ""
  for words in item:
    s = s + words + " "
  lemmatized.append(s[:-1])
lemmatized[:10]

['just happened terrible car crash',
 'heard earthquake different city stay safe everyone',
 'forest fire spot pond goose fleeing across street i can not save',
 'apocalypse lighting spokane wildfire',
 'typhoon soudelor kill china taiwan',
 'we shaking it earthquake',
 'they probably still show life arsenal yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [25]:
ps = PorterStemmer()
stemmed = []
stm = [[ps.stem(word) for word in word_tokenize(item)] for item in lemmatized]
for item in stm:
  s = ""
  for words in item:
    s = s + words + " "
  stemmed.append(s[:-1])
stemmed[:10]

['just happen terribl car crash',
 'heard earthquak differ citi stay safe everyon',
 'forest fire spot pond goos flee across street i can not save',
 'apocalyps light spokan wildfir',
 'typhoon soudelor kill china taiwan',
 'we shake it earthquak',
 'they probabl still show life arsen yesterday eh eh',
 'hey how',
 'what nice hat',
 'fuck']

In [26]:
hash_table = {}
for i in range(len(stemmed)):
  check = stemmed[i]
  for item in stemmed[i].split():
    if item in check:
      if item not in hash_table.keys():
        hash_table[item] = set()
      hash_table[item].add(i)
for i in sorted(hash_table.keys()):
  print(i, ":", hash_table[i])

a : {512, 1538, 2566, 2060, 2061, 527, 2577, 23, 1053, 2081, 34, 39, 1064, 2599, 1066, 43, 2602, 2604, 1072, 1597, 62, 584, 2122, 3152, 2131, 2133, 3168, 1121, 1127, 1645, 2157, 2162, 627, 2674, 3186, 3192, 2691, 2187, 2700, 1169, 1681, 674, 677, 1705, 1706, 172, 1711, 177, 183, 1209, 2750, 1226, 203, 204, 2253, 723, 1748, 2260, 2275, 1764, 2279, 2280, 2282, 235, 747, 1772, 1774, 2283, 2287, 1777, 1781, 1783, 1784, 1794, 1292, 2319, 1809, 2836, 1301, 1302, 791, 2839, 1817, 1818, 284, 1820, 2845, 2851, 1319, 1841, 2365, 2885, 845, 2905, 355, 2923, 1901, 2927, 1392, 2417, 2424, 1401, 1922, 2946, 2962, 917, 408, 924, 1439, 417, 935, 1964, 956, 960, 3019, 1484, 2517, 2518, 2520, 2521, 1498, 2522, 476, 2523, 2525, 2526, 480, 2016, 2527, 483, 2528, 1000, 2538, 1518, 1011, 3069}
aapatwork : {200}
aaron : {1192}
ab : {955}
aba : {2275, 2279, 2280, 2282, 2283, 2287, 2428}
abandon : {1286, 1741, 493, 1743, 1453, 1745, 2905}
abba : {2925}
abbog : {1399}
abbott : {366}
abbswinston : {2872, 985, 28

attack : {1920, 1921, 1883, 3208, 2763, 2775, 3211, 1037, 270, 1038, 2830, 1042, 1046, 2778, 1307, 2078, 2780, 2297, 2087, 168, 424, 2864, 2865, 2866, 3128, 2876, 446, 2750, 2811, 2880, 2882, 1861, 1871, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 604, 1750, 998, 1891, 2289, 2803, 1908, 3189, 2295, 1912, 1754, 2298, 2772, 3196}
attempt : {1249, 2919, 3129, 1213, 3006}
attend : {841, 689, 2098, 2085}
attende : {3256}
attent : {1272, 457, 1611, 3175}
attila : {2629}
attitud : {410, 223}
attorney : {633}
atw : {472}
atx : {982}
au : {611, 2984, 946, 216, 2684}
aubrey : {2506}
auburn : {3195}
auckland : {1439}
aucklnd : {621}
auction : {1520}
audi : {1166}
audio : {1159, 2576, 111}
audiobook : {3152}
auditiontim : {581}
auditori : {642}
aug : {1505, 98, 868, 1095, 2088, 1773, 1902, 1716, 1653, 1622, 2900, 2584, 3124, 475, 1724, 541, 863}
august : {2912, 2882, 1507, 3170, 2629, 2922, 3146, 622, 2

brendan : {1507}
brevoort : {321}
brew : {2674, 1899}
brian : {357, 253, 695}
bribe : {3006}
bricktop : {1636}
bride : {1499, 695}
bridg : {513, 514, 515, 516, 517, 518, 583, 520, 521, 522, 523, 524, 525, 1598, 726, 2276, 1214, 511}
brief : {2256, 2851, 3069}
brigad : {538, 692}
brighton : {1531}
brine : {2365}
bring : {2240, 1029, 456, 393, 2410, 944, 1137, 1680, 307, 2096, 2706, 3126, 1752, 2234, 2843, 382, 2239}
britain : {2776, 2409}
british : {2784, 2409, 2429}
britney : {1736, 150}
briton : {2434}
brittani : {2674}
bro : {2208, 2053, 1904, 1905, 342, 92}
broad : {2461, 2455}
broadcast : {2325}
broadwat : {776}
broadway : {769, 766, 767}
broke : {1443, 2375, 3081, 2960, 1075, 1343}
brokelynati : {1787}
broken : {1441, 580, 1896, 3115, 913, 2942, 3035, 2076, 574}
brook : {2372}
brooklyn : {2598, 2599, 2600, 2602, 2604, 2605, 1906, 1366}
broom : {1431}
brooo : {100}
broth : {1292}
brother : {1570, 1348, 2216, 809, 682, 906, 1292, 1356, 1301, 1302, 856, 1306}
brought : {3200, 1481, 3

cnua : {856}
co : {131, 2341, 615, 2344, 2186, 1599, 3149, 466, 19, 1653, 184, 2717, 1151}
coach : {2944, 2945, 2703, 2932, 2714, 762, 2940, 2941, 2943}
coahuila : {64}
coal : {2725, 1063}
coast : {2169, 1441}
coastal : {3177, 707, 1653}
coastguard : {2424}
coastpowerlinetramtr : {1441}
cod : {348, 2332}
code : {1779, 3012}
coffe : {2951, 637, 2655}
coincid : {33}
coinflip : {2289}
col : {2790}
colbert : {1099}
cold : {41, 10, 1610, 2671, 1846, 919, 2683, 3135}
coldston : {2380}
coldwood : {425}
cole : {353}
coliseum : {1029}
collab : {2637}
collaps : {513, 514, 515, 516, 517, 518, 3075, 520, 521, 522, 523, 524, 525, 1432, 1439, 679, 700, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 3154, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 2154, 1515, 1516, 511}
collect : {2495, 236, 2790, 2103}
colleg : {2299, 782}
collid : {2432, 1419, 1585, 739, 740, 741, 742, 743, 744, 746, 747, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 7

detector : {2342}
determin : {1162, 1538, 244, 292}
deton : {1152, 1153, 1154, 1155, 1156, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 2732, 2733, 2734, 2735, 2736, 2737, 2738, 2739, 2741, 2742, 2743, 2744, 2745, 3255, 2752, 2021}
detona : {2764}
detroit : {1032, 364}
deutsch : {966}
devalu : {1945}
devast : {1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 2229, 1215, 1216, 3042, 3051, 3052}
develop : {549, 853, 1979, 733, 1151}
devic : {81, 1162, 2884, 1335}
devil : {889, 450}
devji : {2870}
devot : {363, 724}
dgm : {3055}
dhhj : {1788}
dhsscitech : {529}
di : {2792, 22, 1757, 1118, 2207}
diabet : {1045}
diablo : {1777}
dial : {1402, 2330}
diamond : {1058, 2098, 2085}
dian : {442, 1869}
dick : {3039, 2573, 1806, 2574}
did : {2233, 2876}
didn : {

estu : {2312}
etc : {2730, 1233, 1234, 2358, 1400, 1244, 2046}
ethic : {751}
ethiopia : {1531}
etsymntt : {1998}
etx : {1253}
eu : {800}
eufaula : {1610}
eugen : {616}
eunha : {2212}
euro : {957}
eurobasket : {1944}
europ : {1528, 1130, 2204}
european : {1564}
eurpo : {2494}
evacu : {2688, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1434, 1435, 1436, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1067, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 3165, 1378}
evansvil : {2597}
evauc : {1437}
even : {1409, 3203, 1671, 1679, 1807, 1937, 148, 1813, 1050, 1690, 3104, 289, 2086, 425, 2473, 1579, 1454, 305, 1333, 2997, 1208, 1977, 316, 957, 2878, 1987, 1223, 331, 459, 1229, 78, 1483, 3154, 1111, 216, 733, 1759, 743, 2537, 1526, 2815}
event : {130, 1026, 2088, 2575, 2162, 1622, 1239, 664, 2902}
eventu : {406}
ever : {1413, 637, 2956, 1426, 3095, 1963, 306, 2870, 1084, 320, 832, 1223, 1229, 3157, 2392, 1497, 2908, 1129, 752, 883, 2557, 127

funni : {224, 3045, 203, 1931, 342, 1979}
funniest : {832}
funtenna : {1860, 1862, 1863, 1847, 1850, 1851, 1852, 1853, 1854}
furi : {241}
furnitur : {736, 1652}
fusionfestiv : {2425}
futur : {2981, 362, 2477, 2802, 3058, 1752, 956}
futurist : {137, 125}
fy : {2553}
g : {288, 2595, 1993, 2613, 599, 1567}
ga : {2257, 2579}
gabon : {2587, 2594, 2579}
gabriel : {2213}
gadget : {773, 774, 949}
gaelit : {1494}
gag : {1199}
gaga : {1686}
gah : {1627}
gain : {2179, 1228, 3173, 1999}
gal : {1250}
galact : {2720, 2723, 2724, 2726, 2728, 2729, 2716, 2717, 2718, 2719}
gallipoli : {249, 255}
gallup : {2249}
gambit : {2768}
gambl : {1075}
game : {2061, 2961, 402, 1555, 405, 22, 1947, 3103, 1954, 419, 675, 2467, 3111, 1079, 2237, 459, 1997, 1502, 375, 2532, 485, 360, 2281, 2803, 1909, 374, 886, 762}
gamechang : {2689}
gameofthron : {832}
gameplay : {3016, 2332}
gamescom : {1137, 1309}
gandhi : {116}
gang : {3196, 2362, 1748, 3189}
ganwilson : {2542}
garag : {1914, 1366}
garbag : {2448, 3250, 3060}
ga

hickori : {355}
hid : {1211}
hidden : {2232}
hide : {746, 2380}
hieroglyph : {3058}
high : {675, 1987, 2631, 1452, 2900, 3028, 1560, 377, 602}
higher : {994, 2117}
highli : {1129}
highlight : {1961, 3086, 2303}
highsmith : {1030}
highway : {1547, 1534}
hihow : {63}
hijack : {1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843, 1844, 1845, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863}
hijink : {2083}
hike : {706, 3181}
hilari : {832, 2392, 1845, 783}
hill : {786, 2287, 1727}
hillari : {1047}
hillsid : {2281}
hilton : {745, 2695}
him : {1432, 2449, 909}
himalaya : {2434, 2429}
himika : {1815}
hindu : {2755, 2406}
hint : {1486}
hinton : {555, 547}
hiphop : {1788}
hiram : {635}
hire : {1408, 1409, 1188, 1395, 185, 988, 1405, 638}
hiroshima : {2050, 2820, 2828, 2165, 2833, 2840, 2201, 1150, 2842, 668, 2845, 2972, 2043, 311

isl : {580}
isla : {2075}
islam : {2853, 1414, 2758, 1864, 2761, 1866, 2762, 2764, 2860, 1870, 2863, 2577, 1875, 1877, 2778}
island : {3234, 868, 3237, 72, 73, 74, 1160, 3240, 943, 1327, 946, 2675, 3188, 1238, 1785, 860, 2589, 2494}
isn : {796, 2549}
isnt : {3213}
iso : {1821}
isol : {2340}
isra : {225, 2882, 227, 168, 3208, 3212, 1008, 2866, 217, 218, 220, 221}
israel : {1025, 1797, 654, 2192, 146, 2196, 1177, 2202, 1179, 2204, 1183, 171, 172, 183, 2360, 841, 90, 2779, 1887}
israelnew : {1183}
issaquah : {1409}
issit : {403}
issu : {149, 2588, 2084, 1959, 2867, 1716, 2100, 3261, 207, 2900, 2904, 2907, 859, 2910, 2911, 2912, 867, 2788, 2922, 2924, 1776, 1778, 1654}
it : {2051, 5, 3080, 533, 3097, 542, 546, 2082, 2093, 2094, 3120, 2099, 1080, 1083, 2110, 2111, 1088, 2116, 585, 2129, 2139, 1116, 614, 2668, 2163, 2683, 129, 656, 3218, 2196, 149, 3229, 170, 2228, 712, 1736, 212, 1240, 2787, 755, 2298, 1792, 1806, 1300, 794, 287, 2345, 823, 2377, 1358, 337, 2897, 1382, 370, 882, 2422, 1404,

lmk : {813}
lo : {241, 3258, 732}
load : {1809, 2983}
loan : {2107, 2483, 349}
lobbi : {1492}
lobbyist : {2477}
lobster : {2683}
local : {1441, 453, 1608, 1485, 2322, 1043, 2901, 2199, 1081, 1243, 1723, 2750, 767}
localeventcountdown : {869}
localplumb : {1403}
lock : {2649, 420, 1406}
lockdown : {1703}
locksmith : {1406}
lockstep : {724}
locomot : {2658, 2652}
loft : {2561}
lofti : {2666}
log : {3104, 3009}
logic : {1649, 997}
logo : {1628}
loki : {2066}
lol : {778, 1552, 2453, 406, 1562, 799, 2086, 2216, 3117, 3246, 1199, 2994, 1075, 2611, 2358, 571, 61, 1218, 2628, 343, 1123, 2660, 2792, 745, 1899, 2673, 1269, 124}
lolol : {3244}
london : {1415, 1484, 1423, 144, 1789}
lone : {249, 305}
lonepin : {255}
long : {960, 2816, 677, 197, 102, 2795, 2828, 2224, 2675, 2580, 534, 2492, 253}
longer : {256, 257, 258, 260, 261, 262, 263, 265, 266, 1642, 268, 1612, 1198, 2970, 926}
loo : {464}
look : {1799, 1032, 3209, 3084, 783, 272, 2959, 279, 2585, 922, 283, 2713, 3229, 3102, 927, 288, 1316, 42

mirajan : {2203}
misaki : {826}
miseri : {1232}
mishap : {256, 257, 258, 260, 262, 265, 268}
misl : {1888}
misocapnist : {2650}
misogyni : {2472}
miss : {775, 522, 2442, 913, 1939, 1684, 942, 943, 944, 1198, 1200, 947, 1204, 1454, 950, 3253, 952, 3133, 2629, 1609, 212, 2004, 2007, 1758, 3166, 2017, 2277, 127}
mission : {2803, 2412}
mistak : {2051, 467}
misti : {1994}
mix : {1643, 46, 2000, 2418, 439, 732}
mixer : {685}
mixtap : {3092}
mkx : {1552, 1555}
ml : {1331, 236}
mlb : {2260, 836}
mlg : {347}
mm : {1998}
mma : {1453}
mmda : {3000}
mme : {2965}
mmx : {814}
mo : {187, 2902, 927}
mob : {1056}
mobil : {558}
mod : {3016, 139, 141, 126}
mode : {2691, 2685, 70}
model : {2622, 2620, 2084, 2100}
modern : {653}
modesto : {1373}
modi : {1320}
modifi : {1884, 1886}
modiministri : {2461}
moi : {647}
moist : {1274}
molli : {2301}
mollusk : {2217}
molten : {1992, 2187}
mom : {2560, 775, 332, 3148, 2416, 1204, 341, 1812, 2682, 1375}
moment : {1157, 1419, 1420, 1485, 1515, 1516, 2235, 376, 1627,

nyc : {1592, 2603, 188}
nycha : {1668}
nyclass : {667}
nyork : {553}
nyse : {263}
nyt : {1282, 2382, 2259, 1523, 2264, 506}
nyy : {875}
nz : {1640, 395, 1439}
o : {1153, 1154, 1155, 1156, 1159, 1161, 394, 1163, 1164, 409, 2214, 1078, 952, 3138, 965, 1121, 2531, 357, 1127, 2938}
oak : {1449, 2670, 2677, 2326, 1434}
obama : {1184, 3042, 3044, 1189, 1094, 1191, 2056, 1195, 3051, 3052, 1518, 626, 1683, 916, 2130, 2132, 890, 955}
obj : {303}
obliter : {2194, 2195, 2196, 2197, 2198, 2199, 2200, 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, 2242, 2243, 2244, 2245, 2246, 2247, 2248, 2250, 2251, 2252}
observ : {2591}
obsess : {2640}
obviou : {1049, 2698, 2755}
ocampo : {64}
occas : {1609}
occitan : {438}
occup : {1457}
occupi : {3000, 3002, 3188}
occur : {1121, 1127, 489, 2796, 525, 978, 1688, 1401}
occu

plung : {2944, 2945, 2932, 2940, 2941, 2943}
plutot : {647}
plz : {1363}
pm : {541, 3230, 1311, 3104, 3231, 3232, 3233, 3235, 3236, 3238, 3239, 1965, 944, 1716, 3253, 3126, 951, 1723, 192, 2882, 1735, 1617, 1874, 2901, 2902, 2904, 2906, 859, 3164, 2910, 2911, 2912, 867, 2922, 1654, 2296, 1662}
pmngiqi : {1493}
pnc : {2095}
po : {2633, 637, 1183}
pocket : {376, 2417}
podcast : {1497, 2258}
poetri : {2390, 2497, 982}
point : {842, 2453, 1208, 1241, 2621}
pointless : {1894}
poison : {3067, 886}
pok : {252}
pokemon : {3001}
polaroid : {2646}
pole : {25, 778, 43}
polic : {1920, 1921, 1543, 776, 3211, 2318, 2319, 2320, 2321, 2322, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2078, 2465, 1570, 2466, 168, 41, 174, 178, 1918, 188, 189, 2620, 2622, 1600, 2624, 2754, 3192, 966, 2764, 1615, 466, 2644, 1621, 214, 3198, 219, 3199, 228, 3187, 1908, 629, 3191, 1912, 2428, 1533, 1534, 2431}
policeman : {1914, 3211}
polici : {3012, 2346, 2348, 2349, 2350, 2351, 2355, 2359}
polit : {2722, 171, 172, 3055, 17

retweet : {2091, 645, 894}
reunion : {72, 73, 74, 942, 943, 947, 950, 952}
reunit : {2835}
reuter : {2369, 1697, 2371, 2373, 2757, 2759, 2376, 2760, 1523, 2749}
rev : {2617}
reveal : {1137, 1549}
revel : {2420}
revenu : {2490, 723, 716, 709}
revers : {2179, 998}
review : {2697, 461, 2582}
reviv : {855}
revolt : {921}
revolut : {2245, 2122, 1509}
revolv : {862}
rey : {150}
reynold : {1845, 325}
rfc : {3244}
rgreen : {102}
rhodeisland : {3186}
rhyme : {569, 371}
ri : {979, 3259}
rice : {1626}
richard : {1933, 1511}
richi : {1734}
richmond : {3192, 3199, 3191}
rick : {2797}
rickard : {672}
ricki : {521, 524}
rico : {659}
ricochet : {1307}
rid : {1652, 596}
ride : {1606, 2696, 1897, 876, 85, 1050, 476}
ridg : {1923, 1460, 1717}
ridicul : {445}
rifl : {3109, 2134}
rig : {3081}
riggd : {1698}
right : {128, 131, 2948, 3077, 3203, 1674, 2065, 407, 413, 3229, 799, 545, 1185, 1059, 1571, 933, 2211, 808, 3114, 2987, 2609, 951, 1849, 60, 3143, 2504, 1997, 1486, 1624, 2649, 606, 993, 2403, 874, 279

serb : {2077}
serbian : {2396}
sergio : {1768}
seri : {933, 2999, 312, 2778, 1979}
serial : {184, 196, 198, 192}
seriou : {1470, 2353, 886, 1946, 1950}
serious : {1953, 3034, 3195, 2876, 1021}
serv : {1610, 1583}
server : {2235, 797}
servic : {1409, 1410, 1400, 1401, 1402, 1403, 1404, 1696, 1697, 1406, 36, 1060, 1062, 1407, 553, 1577, 1067, 1705, 1706, 686, 687, 688, 689, 1070, 691, 1071, 1712, 2360, 3000, 3002, 1399, 1597, 1088, 2761, 1611, 1366, 3168, 106, 109, 110, 2414, 1393, 114, 115, 1394, 117, 118, 1395, 120, 121, 122, 123, 1396, 1397, 1398, 1023}
servicesgold : {1441}
servil : {363}
set : {17, 22, 23, 24, 25, 27, 546, 1573, 550, 1576, 169, 1451, 176, 948, 823, 2106, 188, 1096, 718, 1366, 1370, 1498, 1117, 744, 1009, 2931, 1022}
settl : {1318, 2903}
setxnew : {1247}
seven : {3196, 2740, 2731, 1116}
seventh : {2020}
sever : {535, 293, 1716, 1720, 1723, 2368, 2369, 2371, 2373, 2376, 1741, 1743, 2900, 2901, 2902, 2906, 859, 2907, 2909, 2911, 2914, 867, 2915, 1253, 2278, 2922, 3069}

statu : {2693}
stay : {1, 737, 3077, 1800, 1642, 2730, 1036, 1740, 3210, 3152, 1337, 3007}
steak : {1992, 1274}
steal : {2190, 2319, 784, 2480, 2291, 1237}
steam : {2188, 2189}
steamrol : {491}
stearn : {1165}
steel : {153, 178, 2330, 1589}
steer : {1821}
stellar : {2593}
step : {582, 938, 2580, 2905, 569, 2203, 93, 1503}
stephen : {2122}
stereotyp : {2058}
sterl : {1511}
steve : {216, 2344, 2596, 2341}
stewart : {2427, 1157}
stick : {448, 1427, 2917}
stile : {409}
still : {259, 1283, 1795, 6, 2311, 1801, 1929, 1934, 272, 279, 283, 668, 1053, 2588, 33, 1314, 419, 2081, 2467, 553, 3119, 48, 2224, 2992, 2611, 52, 2995, 2490, 3259, 701, 574, 1215, 1216, 1344, 834, 1600, 1858, 2114, 966, 328, 2505, 1483, 589, 1620, 598, 2659, 2915, 2407, 2164, 3190, 889}
stillnotoverit : {1454}
stir : {3004, 171, 172, 183}
stl : {1440}
stlcard : {3026}
stock : {2498, 2499, 2501, 870, 102, 2502, 2503, 3241, 2489, 3247, 2484, 2485, 2486, 2488, 1945, 2491, 1118, 2105}
stockton : {2321}
stomach : {1472, 1721}


threaten : {547, 1866, 555, 1870, 1872, 1873, 1875, 598, 1882}
three : {2368, 801, 2369, 2371, 2780, 2373, 2882, 2376, 2189, 594, 2866, 1147, 2428, 798, 3167}
threshold : {592}
threw : {2532, 1478, 3183, 1519, 190}
thrift : {678}
thriller : {1317, 1095}
thrive : {1557}
throat : {2715, 2947, 3222, 1263}
throne : {2532}
through : {1729, 2967}
throughout : {1267, 1036, 3142}
throw : {2497, 231, 1034, 3085, 1806, 1999, 2992, 2930, 1268, 2519, 1464, 703}
throwback : {515, 518}
throwbackthursday : {2692}
thru : {170, 2859, 310, 1054}
thrust : {1105}
tht : {2874}
thu : {2080, 1105, 1481, 1489}
thug : {2478, 462}
thunder : {2019, 2211, 2888, 2889, 1738, 2890, 2891, 2892, 2893, 2894, 2895, 2896, 2897, 2898, 2899, 3146}
thunderstorm : {1716, 1720, 1723, 2900, 2901, 2902, 2903, 2904, 2905, 2906, 2907, 2908, 2909, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2921, 2922}
thur : {637}
thursday : {2592, 3170, 515, 518, 1899, 2764, 508, 1662}
thx : {3021}
ti : {1226, 1069, 591}
tia : {318}
tibetan : {163

valley : {1448, 1922}
valleywx : {733}
valu : {720, 3057}
vamo : {748}
vampir : {433}
van : {3242, 1825, 1826, 1823}
vancouv : {2590}
vandal : {877}
vandalis : {990}
vandalz : {533}
vanessa : {1467}
vapor : {1756}
variat : {58}
varieti : {1368, 1371}
vassal : {1238}
vaulter : {43}
vavazouma : {1636}
vbac : {2665}
ve : {792, 1300}
vega : {1546, 45, 3007}
veget : {1368, 1371}
vegetarian : {188}
veggi : {2671}
vehicl : {32, 77, 2125, 784, 755, 2620, 189, 2622}
veil : {2632, 753}
veld : {1454}
vendor : {1876}
venezuela : {2292, 2958}
vent : {1762, 2411}
veri : {104, 2131, 2668}
vermont : {1543}
verrri : {2542}
vers : {2046}
verseth : {1481, 1489}
version : {1719}
versu : {3057}
vessel : {2592, 1749}
vet : {2414}
veteran : {401, 1611, 2854}
veterinarian : {2419}
vfp : {2854}
vh : {698}
vi : {1563}
via : {1028, 2060, 21, 1047, 542, 2603, 1079, 1085, 1600, 1601, 2624, 2120, 587, 1103, 2130, 88, 89, 1118, 2149, 2151, 106, 2155, 108, 109, 110, 1645, 114, 115, 117, 118, 119, 120, 121, 122, 123, 

wwii : {2123, 2125, 1165, 499, 2807, 2842}
www : {1812, 1565}
wx : {1776}
wxki : {1654}
x : {1664, 2561, 130, 390, 3082, 3092, 152, 159, 160, 161, 1314, 164, 1580, 441, 1756, 2277, 1639, 744, 2156, 1650}
xbox : {1093, 917}
xc : {1762}
xcom : {2795}
xfactor : {2553}
xl : {2237}
xleak : {315, 308}
xmwte : {2236}
xoxoxxxooo : {381}
xrwn : {3258}
xshanemichaelsx : {1601}
xuskak : {2539}
xvii : {241}
y : {1582}
ya : {1701, 585, 1867, 1004, 2667, 1556, 3135}
yaaasss : {2553}
yahistor : {2481}
yahoo : {2370, 3006}
yakub : {2879}
yal : {2792}
yale : {2165}
yall : {1667}
yank : {2225}
yanke : {388, 71}
yaound : {2778}
yarbrough : {2596}
yard : {1736, 3243, 3259, 1055}
yay : {706, 1135}
yazidi : {1889, 1892, 1893, 1895, 2076}
yc : {2712}
ye : {579, 2179, 1190, 2536, 2155, 2539, 2700, 2574, 2319, 1267, 2102, 412}
yea : {2386}
yeah : {291, 2534, 327, 3242, 2933, 567, 344, 347, 1277, 1791}
year : {1536, 1537, 2050, 2176, 2436, 2820, 264, 777, 1544, 1931, 1292, 2953, 1679, 1809, 2835, 1301, 1302, 23

In [27]:
pos_list2 = {}
for i in range(len(stemmed)):
  check = stemmed[i]
  for item in stemmed[i].split():
    if item not in pos_list2.keys():
      pos_list2[item] = {}
    if i not in pos_list2[item].keys():
      pos_list2[item][i] = set()
    pos_list2[item][i].add(stemmed[i].index(item))
pos_list_res = {}
for key in sorted(pos_list2.keys()):
  pos_list_res[key] = []
  pos_list_res[key].append(len(pos_list2[key]))
  pos_list_res[key].append(pos_list2[key])
for key, value in pos_list_res.items():
  print(key, ":",  value)

a : [136, {23: {1}, 34: {7}, 39: {29}, 43: {3}, 62: {0}, 172: {14}, 177: {1}, 183: {14}, 203: {7}, 204: {3}, 235: {0}, 284: {34}, 355: {2}, 408: {0}, 417: {2}, 476: {0}, 480: {17}, 483: {18}, 512: {15}, 527: {15}, 584: {3}, 627: {28}, 674: {1}, 677: {0}, 723: {7}, 747: {4}, 791: {6}, 845: {11}, 917: {38}, 924: {0}, 935: {2}, 956: {0}, 960: {0}, 1000: {0}, 1011: {19}, 1053: {0}, 1064: {11}, 1066: {1}, 1072: {1}, 1121: {2}, 1127: {2}, 1169: {45}, 1209: {16}, 1226: {2}, 1292: {5}, 1301: {5}, 1302: {5}, 1319: {0}, 1392: {13}, 1401: {0}, 1439: {0}, 1484: {20}, 1498: {0}, 1518: {3}, 1538: {16}, 1597: {7}, 1645: {2}, 1681: {19}, 1705: {8}, 1706: {0}, 1711: {17}, 1748: {1}, 1764: {1}, 1772: {13}, 1774: {0}, 1777: {2}, 1781: {6}, 1783: {3}, 1784: {4}, 1794: {2}, 1809: {1}, 1817: {15}, 1818: {12}, 1820: {12}, 1841: {3}, 1901: {30}, 1922: {8}, 1964: {0}, 2016: {18}, 2060: {12}, 2061: {0}, 2081: {1}, 2122: {4}, 2131: {16}, 2133: {2}, 2157: {1}, 2162: {9}, 2187: {8}, 2253: {19}, 2260: {19}, 2275: {

bashir : [1, {2054: {65}}]
basic : [3, {1613: {23}, 1880: {4}, 3097: {47}}]
bass : [1, {323: {10}}]
bat : [4, {521: {38}, 524: {44}, 2035: {83}, 3057: {0}}]
batch : [1, {2513: {32}}]
bateman : [1, {2452: {8}}]
bath : [4, {677: {50}, 1341: {36}, 1487: {8}, 1974: {26}}]
bathroom : [6, {212: {49}, 730: {10}, 994: {15}, 1480: {10}, 2038: {54}, 3249: {33}}]
batman : [1, {1766: {8}}]
batteri : [2, {431: {33}, 436: {33}}]
battl : [15, {249: {51}, 250: {2}, 251: {34}, 252: {52}, 253: {11}, 254: {32}, 956: {21}, 1085: {20}, 1121: {18}, 1127: {18}, 1429: {17}, 1556: {62}, 1949: {25}, 3110: {24}, 3171: {25}}]
baum : [1, {2464: {5}}]
bay : [5, {45: {41}, 192: {29}, 682: {59}, 2440: {0}, 3146: {43}}]
bayelsa : [5, {1814: {76}, 1817: {14}, 1818: {11}, 1820: {11}, 1828: {0}}]
bayonet : [2, {2133: {21}, 3124: {60}}]
bb : [3, {1467: {56}, 2118: {66}, 2195: {65}}]
bbc : [11, {15: {33}, 807: {28}, 942: {0}, 950: {38}, 984: {47}, 1041: {0}, 1282: {36}, 2429: {48}, 2853: {11}, 2860: {11}, 2863: {11}}]
bbcl

burglari : [2, {176: {43}, 177: {31}}]
buri : [4, {2193: {37}, 2482: {17}, 2500: {32}, 2575: {32}}]
burlington : [1, {1973: {11}}]
burn : [75, {21: {0}, 23: {45}, 27: {0}, 30: {14}, 95: {23}, 364: {34}, 526: {57}, 527: {77}, 528: {9}, 529: {41}, 530: {15}, 531: {9}, 532: {27}, 533: {36}, 534: {22}, 535: {20}, 536: {55}, 537: {35}, 538: {31}, 539: {10}, 540: {19}, 558: {48}, 559: {2}, 560: {4}, 561: {47}, 562: {22}, 563: {28}, 564: {5}, 565: {0}, 566: {30}, 567: {50}, 568: {36}, 569: {71}, 570: {14}, 571: {23}, 572: {7}, 573: {54}, 574: {30}, 575: {29}, 576: {31}, 577: {9}, 578: {19}, 579: {31}, 580: {73}, 581: {30}, 582: {31}, 583: {0}, 584: {10}, 585: {32}, 586: {0}, 587: {19}, 588: {0}, 589: {28}, 590: {59}, 591: {16}, 592: {72}, 593: {0}, 594: {17}, 595: {41}, 596: {44}, 597: {65}, 598: {15}, 599: {47}, 600: {19}, 601: {23}, 602: {9}, 603: {41}, 1086: {59}, 1137: {42}, 1445: {11}, 1694: {30}, 1704: {21}, 2629: {34}, 3148: {16}, 3156: {35}}]
burna : [1, {2553: {95}}]
burner : [1, {19

collab : [1, {2637: {11}}]
collaps : [52, {511: {21}, 513: {31}, 514: {83}, 515: {75}, 516: {14}, 517: {27}, 518: {36}, 520: {10}, 521: {42}, 522: {17}, 523: {21}, 524: {48}, 525: {12}, 679: {8}, 700: {50}, 708: {14}, 709: {17}, 710: {13}, 711: {13}, 712: {48}, 713: {14}, 714: {39}, 715: {8}, 716: {30}, 717: {29}, 718: {51}, 719: {8}, 720: {29}, 721: {43}, 723: {17}, 724: {13}, 725: {22}, 726: {25}, 727: {36}, 728: {41}, 729: {14}, 730: {2}, 731: {43}, 732: {44}, 733: {25}, 734: {11}, 735: {8}, 736: {21}, 737: {18}, 738: {38}, 1432: {28}, 1439: {25}, 1515: {12}, 1516: {12}, 2154: {23}, 3075: {11}, 3154: {21}}]
collect : [4, {236: {12}, 2103: {7}, 2495: {0}, 2790: {16}}]
colleg : [2, {782: {33}, 2299: {9}}]
collid : [26, {739: {24}, 740: {38}, 741: {12}, 742: {29}, 743: {15}, 744: {0}, 746: {28}, 747: {68}, 749: {22}, 750: {55}, 751: {37}, 752: {23}, 753: {41}, 754: {8}, 755: {72}, 756: {23}, 757: {78}, 758: {17}, 760: {27}, 761: {63}, 762: {76}, 763: {12}, 764: {21}, 1419: {0}, 1585: {

despair : [1, {1097: {18}}]
desper : [2, {658: {0}, 1295: {10}}]
despit : [1, {1207: {0}}]
destini : [5, {1037: {33}, 1038: {33}, 1042: {33}, 1046: {33}, 1502: {53}}]
destroy : [43, {66: {45}, 81: {45}, 175: {10}, 293: {82}, 908: {32}, 1032: {54}, 1105: {41}, 1106: {39}, 1107: {59}, 1108: {85}, 1109: {6}, 1110: {50}, 1111: {9}, 1112: {35}, 1113: {9}, 1114: {6}, 1115: {30}, 1116: {66}, 1117: {19}, 1118: {17}, 1119: {11}, 1120: {33}, 1121: {61}, 1122: {35}, 1123: {29}, 1124: {25}, 1125: {26}, 1126: {20}, 1127: {61}, 1128: {26}, 1129: {17}, 1130: {41}, 1131: {4}, 1132: {5}, 1133: {20}, 1134: {71}, 1135: {9}, 1214: {20}, 1315: {23}, 1557: {38}, 1726: {11}, 2228: {59}, 2513: {43}}]
destruct : [17, {601: {0}, 1136: {7}, 1137: {67}, 1138: {22}, 1139: {62}, 1140: {48}, 1141: {0}, 1142: {8}, 1143: {10}, 1144: {34}, 1145: {35}, 1146: {14}, 1147: {22}, 1148: {23}, 1149: {48}, 1150: {50}, 1151: {10}}]
detail : [7, {590: {9}, 684: {48}, 790: {38}, 1706: {82}, 2756: {5}, 2878: {38}, 3053: {31}}]
det

epicent : [2, {3091: {35}, 3093: {37}}]
epicentr : [1, {1425: {24}}]
epilepsi : [1, {1293: {42}}]
epilept : [1, {1293: {80}}]
episcop : [1, {1765: {41}}]
episod : [8, {397: {28}, 658: {18}, 826: {7}, 854: {15}, 907: {47}, 1388: {50}, 2089: {0}, 2678: {24}}]
eq : [1, {1321: {4}}]
equip : [1, {1178: {43}}]
equival : [1, {1581: {25}}]
er : [2, {1373: {15}, 2934: {26}}]
era : [1, {1165: {33}}]
eras : [2, {305: {60}, 1122: {5}}]
erasureisnotequ : [1, {2159: {19}}]
erdogan : [1, {2768: {0}}]
erea : [1, {274: {33}}]
erect : [5, {355: {52}, 1304: {14}, 1525: {34}, 1528: {70}, 2779: {18}}]
eric : [2, {1794: {53}, 2953: {0}}]
erod : [1, {483: {62}}]
eros : [1, {707: {52}}]
error : [1, {1538: {5}}]
erupt : [5, {615: {12}, 675: {29}, 1047: {14}, 3084: {21}, 3090: {24}}]
escap : [5, {149: {62}, 746: {0}, 1422: {4}, 1972: {43}, 2054: {78}}]
escuchando : [1, {2633: {17}}]
esp : [2, {1365: {46}, 1378: {37}}]
especi : [4, {537: {4}, 624: {27}, 1360: {56}, 2073: {13}}]
espn : [1, {2703: {70}}]
esport : 

gandhi : [1, {116: {6}}]
gang : [4, {1748: {80}, 2362: {54}, 3189: {58}, 3196: {19}}]
ganwilson : [1, {2542: {0}}]
garag : [2, {1366: {6}, 1914: {49}}]
garbag : [3, {2448: {41}, 3060: {29}, 3250: {6}}]
garcia : [1, {64: {87}}]
garden : [3, {1368: {25}, 1371: {25}, 1598: {40}}]
garfield : [2, {1067: {57}, 1070: {48}}]
gasolin : [1, {2807: {22}}]
gat : [1, {2934: {39}}]
gate : [3, {784: {23}, 1366: {12}, 1974: {36}}]
gatensburi : [1, {786: {89}}]
gateway : [1, {744: {7}}]
gaug : [1, {37: {43}}]
gave : [4, {158: {23}, 377: {34}, 485: {8}, 843: {30}}]
gawker : [1, {2769: {3}}]
gawlowski : [1, {1507: {63}}]
gay : [8, {181: {22}, 182: {30}, 185: {19}, 204: {35}, 389: {4}, 492: {85}, 2409: {46}, 2464: {78}}]
gaymaro : [1, {876: {16}}]
gayuk : [1, {86: {37}}]
gaza : [3, {654: {0}, 1132: {13}, 1910: {17}}]
gazan : [1, {1232: {35}}]
gaze : [1, {2683: {33}}]
gbbo : [4, {2137: {37}, 2139: {94}, 2140: {57}, 2144: {18}}]
ge : [1, {1981: {50}}]
gear : [4, {866: {15}, 1368: {49}, 1371: {49}, 1954: {47

hmm : [3, {1333: {0}, 2402: {0}, 2737: {0}}]
hnlnow : [1, {710: {27}}]
hoax : [1, {1162: {15}}]
hobbi : [1, {1492: {4}}]
hobbit : [4, {1096: {4}, 1098: {4}, 1101: {18}, 1103: {26}}]
hobo : [3, {427: {44}, 437: {44}, 440: {44}}]
hockey : [1, {817: {11}}]
hodgin : [1, {2847: {16}}]
hoffman : [3, {238: {24}, 244: {26}, 2102: {0}}]
hoist : [2, {1515: {80}, 1516: {80}}]
hold : [14, {513: {20}, 517: {16}, 1059: {28}, 1272: {44}, 1683: {49}, 1766: {0}, 1867: {8}, 1874: {22}, 1878: {8}, 1880: {44}, 2298: {7}, 2555: {18}, 2930: {43}, 3225: {20}}]
holi : [6, {1480: {0}, 1573: {0}, 1900: {7}, 2297: {27}, 2771: {61}, 3073: {0}}]
holibob : [1, {2959: {54}}]
holiday : [1, {3144: {14}}]
holist : [1, {2580: {68}}]
holli : [2, {1666: {42}, 2950: {64}}]
hollow : [1, {1698: {74}}]
hollywood : [5, {309: {47}, 2139: {5}, 2950: {15}, 2954: {0}, 2957: {0}}]
holm : [1, {2065: {30}}]
holmdel : [1, {2095: {26}}]
hom : [1, {2780: {89}}]
homag : [1, {451: {30}}]
home : [43, {197: {61}, 300: {32}, 464: {25}, 513: 

junk : [1, {3243: {53}}]
juri : [1, {2065: {59}}]
just : [29, {0: {0}, 30: {0}, 100: {9}, 127: {0}, 301: {0}, 333: {14}, 334: {0}, 403: {0}, 502: {0}, 819: {10}, 831: {8}, 872: {57}, 1187: {4}, 1428: {71}, 1431: {7}, 1434: {0}, 1497: {37}, 1509: {59}, 2065: {0}, 2378: {20}, 2534: {2}, 2549: {41}, 2662: {0}, 2730: {18}, 2787: {3}, 2805: {0}, 2908: {0}, 3097: {0}, 3105: {0}}]
justbitch : [1, {1246: {28}}]
justifi : [2, {490: {42}, 2776: {42}}]
justin : [5, {585: {93}, 743: {56}, 747: {46}, 1122: {10}, 1466: {6}}]
jyb : [1, {685: {44}}]
k : [12, {598: {7}, 667: {29}, 732: {6}, 1180: {6}, 1346: {0}, 1737: {35}, 1755: {51}, 1782: {0}, 2024: {0}, 2208: {22}, 2864: {15}, 3259: {69}}]
ka : [1, {27: {85}}]
kabari : [1, {2761: {74}}]
kadeeja : [1, {1245: {23}}]
kaduna : [3, {908: {40}, 1120: {41}, 1128: {34}}]
kagawa : [1, {1757: {44}}]
kahel : [1, {1008: {74}}]
kai : [1, {826: {26}}]
kaimai : [1, {1784: {3}}]
kaiserjaeg : [1, {98: {4}}]
kalispel : [1, {557: {24}}]
kall : [2, {240: {9}, 242: {31

longer : [15, {256: {6}, 257: {6}, 258: {6}, 260: {6}, 261: {40}, 262: {6}, 263: {63}, 265: {6}, 266: {6}, 268: {6}, 926: {27}, 1198: {7}, 1612: {4}, 1642: {24}, 2970: {36}}]
loo : [1, {464: {71}}]
look : [43, {232: {28}, 238: {11}, 272: {54}, 279: {54}, 283: {54}, 288: {69}, 321: {40}, 325: {0}, 348: {68}, 376: {33}, 423: {24}, 491: {10}, 630: {0}, 707: {0}, 783: {11}, 820: {17}, 922: {3}, 927: {6}, 1032: {49}, 1230: {40}, 1316: {42}, 1361: {22}, 1447: {29}, 1487: {20}, 1526: {32}, 1736: {3}, 1758: {42}, 1799: {30}, 2139: {22}, 2144: {0}, 2145: {0}, 2360: {54}, 2396: {35}, 2585: {49}, 2713: {15}, 2959: {72}, 3084: {11}, 3102: {7}, 3183: {20}, 3209: {14}, 3229: {44}, 3256: {53}, 3259: {66}}]
lookin : [1, {1431: {56}}]
lookout : [1, {2725: {61}}]
loom : [1, {669: {20}}]
looney : [1, {533: {19}}]
loop : [3, {1902: {35}, 1941: {27}, 1942: {21}}]
loos : [3, {1346: {29}, 1977: {48}, 3081: {32}}]
loot : [1, {2475: {8}}]
lord : [7, {447: {5}, 756: {36}, 1037: {47}, 1038: {47}, 1042: {47}, 104

mum : [2, {1087: {52}, 2549: {34}}]
mumbai : [1, {2087: {0}}]
mump : [1, {2269: {0}}]
mundo : [1, {319: {41}}]
municip : [4, {1379: {0}, 1386: {37}, 1392: {32}, 3262: {20}}]
murder : [39, {489: {37}, 499: {20}, 510: {41}, 1087: {62}, 1848: {4}, 1855: {4}, 2037: {43}, 2038: {47}, 2039: {0}, 2040: {24}, 2041: {6}, 2042: {65}, 2043: {41}, 2044: {18}, 2045: {42}, 2046: {44}, 2047: {40}, 2048: {38}, 2049: {13}, 2050: {26}, 2051: {28}, 2052: {23}, 2053: {21}, 2054: {54}, 2055: {15}, 2056: {85}, 2057: {35}, 2058: {28}, 2059: {37}, 2060: {22}, 2061: {75}, 2062: {22}, 2063: {5}, 2064: {26}, 2065: {18}, 2066: {33}, 2067: {75}, 2327: {56}, 2774: {34}}]
museum : [1, {2078: {47}}]
music : [12, {61: {0}, 235: {58}, 240: {0}, 242: {22}, 538: {25}, 1082: {64}, 1161: {43}, 2375: {11}, 2644: {35}, 2684: {13}, 2983: {37}, 3219: {16}}]
musician : [2, {240: {0}, 242: {22}}]
musim : [1, {2045: {49}}]
muslim : [15, {169: {13}, 217: {17}, 218: {17}, 220: {17}, 221: {17}, 225: {17}, 227: {17}, 1025: {0}, 1875:

origin : [5, {46: {21}, 531: {25}, 1178: {36}, 1953: {9}, 2277: {4}}]
orlando : [1, {2835: {0}}]
orlean : [1, {1898: {64}}]
orphan : [1, {2683: {26}}]
ortiz : [1, {1646: {6}}]
osha : [1, {1779: {30}}]
ost : [1, {599: {25}}]
ostens : [1, {983: {31}}]
other : [10, {747: {57}, 1146: {81}, 1566: {33}, 1700: {34}, 1708: {34}, 2131: {5}, 2327: {43}, 2432: {31}, 2539: {47}, 3259: {40}}]
otherwis : [1, {2875: {20}}]
otl : [1, {1218: {60}}]
otrabaltimor : [1, {1202: {50}}]
otrametlif : [1, {2537: {0}}]
otsuka : [1, {1147: {31}}]
otto : [1, {3034: {15}}]
ou : [1, {973: {32}}]
ouch : [1, {667: {0}}]
oun : [1, {2911: {0}}]
ounc : [1, {2656: {27}}]
our : [8, {203: {45}, 467: {0}, 777: {0}, 1366: {34}, 1482: {0}, 1957: {0}, 2990: {60}, 3216: {0}}]
out : [14, {98: {20}, 111: {0}, 663: {41}, 681: {41}, 852: {9}, 915: {18}, 1150: {36}, 1494: {26}, 1586: {26}, 1590: {26}, 1988: {51}, 2424: {59}, 2836: {44}, 2958: {36}}]
outbreak : [11, {2265: {47}, 2266: {47}, 2267: {47}, 2268: {43}, 2269: {5}, 2270: {4

pitcher : [1, {703: {5}}]
pitfal : [1, {1040: {44}}]
piti : [1, {605: {48}}]
pixar : [2, {1988: {23}, 3084: {52}}]
pizza : [2, {341: {24}, 993: {27}}]
pizzeria : [1, {2803: {25}}]
pjnet : [1, {1514: {88}}]
pkadlik : [1, {808: {0}}]
pkk : [16, {2732: {11}, 2733: {23}, 2734: {23}, 2735: {19}, 2736: {11}, 2737: {15}, 2738: {14}, 2739: {11}, 2741: {20}, 2742: {19}, 2743: {11}, 2744: {20}, 2745: {11}, 2767: {4}, 2777: {0}, 3255: {11}}]
pl : [6, {369: {39}, 439: {0}, 1087: {69}, 1839: {21}, 2209: {36}, 3001: {63}}]
place : [12, {584: {34}, 740: {18}, 746: {6}, 1580: {33}, 1859: {56}, 2342: {30}, 2380: {40}, 2530: {29}, 2673: {7}, 2675: {64}, 2962: {36}, 3022: {34}}]
plagu : [1, {1680: {62}}]
plain : [2, {2253: {48}, 2260: {48}}]
plan : [39, {104: {5}, 512: {25}, 656: {40}, 872: {75}, 991: {21}, 1045: {45}, 1063: {35}, 1111: {4}, 1126: {28}, 1165: {68}, 1379: {14}, 1380: {54}, 1381: {49}, 1382: {54}, 1383: {69}, 1384: {28}, 1385: {14}, 1386: {51}, 1387: {21}, 1388: {45}, 1389: {54}, 1390: {54

recount : [6, {2932: {25}, 2940: {35}, 2941: {25}, 2943: {25}, 2944: {25}, 2945: {25}}]
recov : [9, {671: {61}, 775: {10}, 871: {15}, 948: {46}, 1228: {92}, 1839: {38}, 2436: {8}, 2493: {16}, 3214: {13}}]
recoveri : [1, {1048: {48}}]
recreat : [2, {240: {22}, 242: {44}}]
recur : [1, {1129: {79}}]
recycl : [1, {1484: {41}}]
red : [27, {102: {26}, 363: {4}, 394: {32}, 404: {19}, 868: {0}, 917: {27}, 1052: {39}, 1217: {10}, 1546: {17}, 1549: {29}, 1617: {0}, 1996: {21}, 2000: {21}, 2019: {0}, 2079: {56}, 2153: {10}, 2550: {58}, 2629: {39}, 2848: {36}, 2855: {36}, 2858: {36}, 2900: {48}, 2980: {47}, 3082: {17}, 3107: {42}, 3138: {33}, 3250: {26}}]
reddit : [18, {2331: {29}, 2333: {0}, 2334: {0}, 2337: {9}, 2338: {0}, 2339: {0}, 2341: {0}, 2343: {0}, 2344: {0}, 2345: {0}, 2346: {0}, 2348: {0}, 2349: {0}, 2350: {4}, 2351: {9}, 2355: {0}, 2359: {9}, 3012: {47}}]
redruth : [1, {693: {43}}]
redscarebot : [1, {2042: {0}}]
redskin : [1, {1915: {0}}]
reduc : [5, {202: {16}, 617: {21}, 623: {21}, 1

segment : [1, {1822: {11}}]
seismic : [20, {2575: {8}, 2576: {0}, 2577: {28}, 2579: {25}, 2580: {33}, 2581: {14}, 2582: {39}, 2583: {33}, 2584: {48}, 2585: {59}, 2586: {20}, 2587: {12}, 2588: {28}, 2589: {70}, 2590: {35}, 2591: {14}, 2592: {26}, 2593: {17}, 2594: {12}, 2595: {60}}]
select : [4, {261: {69}, 1023: {31}, 2016: {40}, 3153: {31}}]
self : [14, {30: {19}, 823: {21}, 848: {26}, 962: {14}, 1105: {31}, 1412: {15}, 1416: {19}, 1417: {19}, 1424: {15}, 1747: {19}, 1940: {4}, 2277: {22}, 2507: {69}, 3204: {12}}]
selfi : [4, {526: {45}, 1562: {35}, 1584: {18}, 2600: {8}}]
selfish : [1, {2422: {33}}]
sell : [3, {676: {5}, 834: {54}, 3129: {12}}]
sen : [2, {238: {7}, 2596: {42}}]
senat : [3, {244: {0}, 1518: {20}, 1780: {13}}]
send : [21, {61: {61}, 76: {8}, 82: {5}, 378: {9}, 762: {7}, 1651: {12}, 1847: {27}, 1850: {27}, 1851: {27}, 1852: {27}, 1853: {27}, 1854: {23}, 1860: {27}, 1862: {27}, 1863: {27}, 1879: {43}, 1901: {40}, 2299: {0}, 2301: {6}, 2657: {0}, 2959: {39}}]
senior : [1,

snowman : [1, {3024: {35}}]
snowmobil : [1, {2673: {55}}]
snowstorm : [19, {2663: {4}, 2664: {0}, 2665: {15}, 2666: {56}, 2667: {40}, 2668: {29}, 2669: {16}, 2670: {33}, 2671: {60}, 2672: {37}, 2674: {2}, 2675: {32}, 2676: {11}, 2677: {14}, 2678: {9}, 2679: {28}, 2680: {59}, 2682: {40}, 2683: {79}}]
snowstormi : [1, {2673: {18}}]
so : [27, {40: {0}, 617: {45}, 623: {45}, 643: {16}, 655: {26}, 760: {0}, 813: {0}, 822: {0}, 826: {0}, 993: {69}, 1338: {0}, 1354: {11}, 1442: {0}, 1527: {0}, 1709: {53}, 1800: {0}, 1856: {13}, 1911: {0}, 1960: {0}, 2058: {0}, 2059: {0}, 2119: {10}, 2200: {32}, 2365: {22}, 2425: {0}, 2506: {56}, 2547: {14}}]
soar : [1, {605: {0}}]
soccer : [2, {1204: {8}, 3024: {3}}]
social : [16, {336: {4}, 634: {0}, 643: {16}, 647: {73}, 751: {5}, 871: {30}, 1509: {35}, 1539: {23}, 1614: {0}, 1794: {12}, 2040: {7}, 2042: {48}, 2299: {47}, 2468: {64}, 2470: {64}, 2867: {14}}]
societi : [3, {669: {4}, 679: {0}, 1420: {0}}]
sociologist : [1, {1970: {35}}]
socket : [1, {1356: {

texa : [7, {204: {0}, 1247: {23}, 2122: {32}, 3178: {0}, 3180: {11}, 3181: {40}, 3185: {0}}]
texan : [1, {1949: {9}}]
texian : [1, {2122: {0}}]
text : [10, {480: {0}, 843: {35}, 893: {15}, 2082: {57}, 2093: {57}, 2094: {57}, 2099: {57}, 2232: {85}, 2249: {13}, 2841: {3}}]
tf : [2, {759: {0}, 795: {74}}]
tfb : [1, {721: {8}}]
tfd : [1, {1589: {31}}]
tgf : [3, {2004: {59}, 2007: {59}, 2017: {59}}]
th : [29, {350: {36}, 470: {61}, 471: {30}, 483: {36}, 492: {0}, 500: {15}, 501: {11}, 502: {28}, 504: {0}, 505: {11}, 506: {11}, 508: {11}, 509: {11}, 541: {25}, 727: {26}, 855: {0}, 1256: {21}, 1543: {47}, 1698: {43}, 1902: {48}, 2114: {38}, 2301: {26}, 2320: {22}, 2369: {26}, 2373: {26}, 2376: {26}, 2599: {37}, 2900: {20}, 3116: {3}}]
thailand : [1, {2159: {76}}]
than : [3, {672: {51}, 747: {63}, 2001: {41}}]
thane : [1, {718: {39}}]
thank : [18, {228: {32}, 1026: {53}, 1190: {0}, 1289: {41}, 1338: {33}, 1904: {5}, 1934: {4}, 2068: {60}, 2209: {30}, 2292: {51}, 2377: {16}, 2414: {28}, 2616: 

tyme : [1, {3248: {10}}]
type : [4, {394: {50}, 1498: {76}, 1635: {22}, 2809: {21}}]
typewrit : [1, {3122: {47}}]
typhoon : [21, {4: {0}, 859: {30}, 864: {12}, 867: {33}, 1184: {20}, 1189: {20}, 1191: {20}, 1195: {20}, 1684: {0}, 3041: {31}, 3042: {20}, 3043: {4}, 3044: {41}, 3045: {0}, 3046: {37}, 3047: {38}, 3048: {41}, 3049: {18}, 3050: {30}, 3051: {20}, 3052: {24}}]
typo : [1, {2997: {40}}]
tyre : [1, {2139: {60}}]
tyuler : [1, {1936: {3}}]
u : [99, {36: {33}, 106: {0}, 109: {0}, 110: {0}, 114: {0}, 115: {0}, 117: {0}, 118: {0}, 120: {0}, 121: {0}, 122: {0}, 123: {0}, 135: {35}, 149: {1}, 207: {1}, 228: {46}, 270: {29}, 288: {5}, 314: {10}, 376: {20}, 380: {10}, 390: {11}, 473: {69}, 484: {0}, 519: {6}, 546: {14}, 564: {6}, 596: {0}, 605: {13}, 645: {14}, 684: {60}, 699: {2}, 743: {33}, 795: {54}, 847: {10}, 887: {2}, 907: {19}, 937: {73}, 1075: {50}, 1086: {9}, 1208: {32}, 1221: {25}, 1246: {0}, 1252: {0}, 1257: {15}, 1259: {0}, 1326: {17}, 1363: {36}, 1428: {18}, 1436: {25}, 1514

worth : [5, {1705: {0}, 2060: {4}, 2419: {10}, 2799: {53}, 2841: {8}}]
would : [74, {40: {26}, 90: {23}, 91: {72}, 96: {28}, 134: {34}, 234: {11}, 346: {25}, 377: {58}, 438: {2}, 526: {27}, 527: {26}, 544: {5}, 571: {41}, 626: {31}, 705: {15}, 707: {57}, 715: {2}, 725: {30}, 744: {51}, 842: {72}, 876: {24}, 887: {17}, 998: {41}, 1049: {51}, 1082: {26}, 1245: {40}, 1294: {16}, 1335: {23}, 1339: {14}, 1359: {5}, 1365: {1}, 1421: {33}, 1457: {9}, 1464: {36}, 1488: {19}, 1514: {17}, 1530: {2}, 1550: {10}, 1581: {51}, 1614: {16}, 1635: {0}, 1658: {37}, 1681: {31}, 1806: {41}, 1808: {0}, 1810: {32}, 1835: {22}, 1901: {0}, 1936: {46}, 1958: {29}, 2046: {73}, 2051: {5}, 2075: {12}, 2110: {3}, 2179: {38}, 2194: {0}, 2197: {17}, 2207: {34}, 2211: {14}, 2228: {45}, 2380: {2}, 2419: {30}, 2653: {0}, 2799: {47}, 2824: {4}, 2888: {55}, 2933: {0}, 2984: {28}, 3028: {48}, 3031: {13}, 3037: {2}, 3080: {16}, 3205: {33}, 3243: {18}}]
woulda : [1, {410: {54}}]
wound : [34, {326: {28}, 328: {11}, 548: {88}

In [28]:
print(sys.getrecursionlimit())
print(len(hash_table))

3000
7106


In [29]:
class Node:
  left = None
  right = None
  word = None
  doc_list = set()

def insert(root, word, doc):
  if root == None:
    t = Node()
    t.word = word
    t.doc_list = set()
    t.doc_list.add(doc)
    return t
  if word < root.word:
    root.left = insert(root.left, word, doc)
  elif word > root.word:
    root.right = insert(root.right, word, doc)
  elif word == root.word:
    root.doc_list.add(doc)
  return root

def search(root, word):
  if root is None or word == root.word:
    return root.doc_list
  if root.word < word:
    return search(root.right, word)
  return search(root.left, word)

def inorder(root):
  if root:
    inorder(root.left)
    print(root.word, ":", root.doc_list, "->")
    inorder(root.right)

In [30]:
ps = PorterStemmer()
root = insert(None, "n-", 0)
lemmatizer = WordNetLemmatizer()
words = set(stopwords.words("english"))
id = -1
for email in stemmed:
  id += 1
  token = word_tokenize(email.casefold())
  token = map(lemmatizer.lemmatize, token)
  token = list(map(ps.stem, token))
  for item in token:
    if item not in words:
      root = insert(root, item, id)

inorder(root)

aapatwork : {200} ->
aaron : {1192} ->
ab : {955} ->
aba : {2275, 2279, 2280, 2282, 2283, 2287, 2428} ->
abandon : {1286, 1741, 493, 1743, 1453, 1745, 2905} ->
abba : {2925} ->
abbog : {1399} ->
abbott : {366} ->
abbswinston : {2872, 985, 2869} ->
abc : {450, 2149, 3052, 2385, 946, 2387, 2388, 2801, 952, 2393, 2394, 2395} ->
abcnew : {1394} ->
abdul : {466} ->
abe : {3127} ->
abella : {883} ->
abh : {1499} ->
abid : {1779} ->
abil : {1621} ->
abl : {81, 1493} ->
ablaz : {15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28} ->
abnorm : {1247} ->
abolish : {3127} ->
abomin : {1094} ->
aboooooortiooooonnnnn : {991} ->
aborigin : {2480} ->
abort : {2936, 439} ->
abound : {1145} ->
abrianna : {1289} ->
absol : {2039} ->
absolut : {416, 1634, 1187, 836, 2210, 2278, 1641, 1646, 1201, 2165, 2231, 1880, 2201, 1274} ->
abstorm : {3072, 1745, 1740, 1390} ->
abt : {3249} ->
abu : {1125, 2986, 1140, 1149, 1087} ->
academi : {2016} ->
acapella : {322} ->
accept : {420, 1957} ->
access : {259, 1861, 2119}

approxim : {1724} ->
april : {1953} ->
aqgco : {802} ->
aquariu : {2809} ->
aquat : {1753} ->
ar : {2914, 2907, 2909} ->
arab : {1387, 587} ->
arabia : {2752, 417, 2754} ->
arabian : {2761, 2763, 2758} ->
arami : {812} ->
arc : {756} ->
arcadia : {1720} ->
archipelagowolv : {660} ->
architect : {3096, 486} ->
archiv : {2845, 2165, 2390, 2167} ->
area : {1952, 682, 1419, 1678, 1841, 1460, 181, 182, 599, 1429, 185, 1430, 2012, 3069, 1054} ->
arent : {2667} ->
argentina : {1212} ->
argument : {1649} ->
ari : {2555} ->
arizona : {105, 1926} ->
arm : {1865, 1130, 3211, 467, 3035, 1021, 703} ->
armageddon : {142, 143, 144, 145, 146, 147, 148, 149} ->
armi : {151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 802, 163, 164, 165, 804, 806, 2732, 2733, 2734, 2735, 2736, 2737, 2738, 2739, 2742, 2743, 3255, 2745, 3129, 986, 3188} ->
armori : {682} ->
armstrong : {1203} ->
arnhem : {2465, 2466} ->
arnley : {1176, 1171, 1166} ->
around : {1273, 1165, 786, 147, 1427, 678, 3259, 1597, 1989, 1350,

bitterroot : {1709} ->
biven : {1846} ->
bizinsid : {2305} ->
bjp : {1977, 1690} ->
bk : {3201} ->
bl : {2417} ->
black : {391, 2576, 914, 153, 3226, 35, 2474, 2867, 566, 1847, 1850, 1851, 1852, 1083, 1853, 1854, 3004, 1860, 1862, 1863, 841, 206, 2384, 213, 1628, 2655, 1121, 869, 1638, 1127, 2412} ->
blackberri : {1496, 1495} ->
blackforestgateau : {2140} ->
blackhat : {1861} ->
blacklivesmatt : {2239} ->
blair : {635} ->
blame : {1690, 2292, 3038, 2326} ->
blanket : {2898, 1372, 2039} ->
blast : {3218, 310, 1093, 2762, 1996, 2000, 1889, 1892, 1893, 2022, 1895, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2036, 1910} ->
blaze : {290, 293, 294, 297, 298, 299, 300, 302, 303, 304, 305, 306, 307, 309, 310, 311, 312, 313, 314, 316, 3141, 3145, 1617} ->
blazer : {237} ->
bldi : {2874} ->
blea : {2462} ->
bleach : {1479} ->
bleed : {320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 1479, 317, 318, 319} ->
bleedinglov : {322} ->
bless : {1037, 1038, 1042, 2804, 1046, 3135} ->
blew

calif : {1449} ->
california : {1702, 167, 1449, 1322, 1705, 1706, 1834, 2096, 1841, 180, 312, 316, 199, 2381, 2382, 2383, 2256, 2385, 3149, 2259, 2387, 2388, 2389, 2391, 2264, 2393, 2394, 2395, 3156, 1373, 3159, 3162, 3168, 3173, 2919, 3150, 1258, 3153} ->
call : {2945, 1935, 2451, 2196, 1566, 36, 2854, 1069, 686, 687, 1583, 2609, 691, 573, 2366, 3014, 1225, 2636, 2941, 80, 3025, 338, 2943, 219, 3036, 1375, 228, 488, 617, 872, 492, 1134, 623, 1394, 1652, 1397, 2165, 2932, 632, 1402, 765, 2047} ->
caller : {1739} ->
callofduti : {2332} ->
callofmini : {3109} ->
calm : {1755, 405, 2638} ->
calori : {579, 565} ->
cambridg : {1733} ->
camcord : {32} ->
came : {929, 163, 1732, 1414, 2638, 2927, 2416, 3056, 410, 2365} ->
cameo : {1099} ->
camera : {32, 2215, 429, 431, 436} ->
cameroon : {2400, 2402, 2405, 2408, 2778, 2397} ->
camo : {2121, 429} ->
camouflag : {428, 429} ->
camp : {705, 1934, 3184, 915, 1915, 1950} ->
campaign : {1258, 499, 636, 1047} ->
campbel : {395} ->
campeonato : {1114

collab : {2637} ->
collap : {513, 514, 515, 516, 517, 518, 3075, 520, 521, 522, 523, 524, 525, 1432, 1439, 679, 700, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 3154, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 2154, 1515, 1516, 511} ->
collect : {2495, 236, 2790, 2103} ->
colleg : {2299, 782} ->
colli : {768, 769, 770, 771, 772, 773, 774, 2703, 2714, 765, 766, 767} ->
collid : {2432, 1419, 1585, 739, 740, 741, 742, 743, 744, 746, 747, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 760, 761, 762, 763, 764} ->
collin : {48, 2013, 1936, 2006} ->
collud : {272, 283, 279} ->
color : {2418} ->
colorado : {1608, 1681, 1075, 246, 2074} ->
coloradoav : {244} ->
coloradoavalanch : {238} ->
colorlin : {2071} ->
colosseum : {822} ->
colouj : {976} ->
colour : {964, 965, 967, 969, 970, 972, 973, 977, 979, 2681} ->
colt : {1950} ->
columbia : {3172, 1607} ->
columbu : {1352, 193, 1355} ->
com : {211} ->
combat : {1312, 2133, 1309, 285

dat : {291, 566, 2623} ->
data : {162, 1860, 1852, 1862, 968, 1863, 1847, 1850, 1851, 956, 1853, 1854} ->
databa : {805} ->
date : {2210, 176, 2967, 3154, 1813, 2293, 2710, 153, 2460, 157, 2974} ->
daughter : {372, 2973, 2366, 3063} ->
dave : {40} ->
davi : {2221} ->
david : {1349, 1646, 2483, 55, 1336} ->
dawg : {2988} ->
day : {2818, 2692, 2823, 1801, 2314, 3209, 3086, 25, 2203, 927, 3103, 2081, 2210, 677, 807, 1959, 2983, 1450, 1962, 2476, 1837, 2222, 2991, 3242, 3249, 563, 1206, 2230, 1477, 1607, 2504, 3068, 1747, 980, 87, 2398, 2401, 358, 742, 1126, 1382, 622, 3183, 1521, 754, 2161, 1146, 635, 2556, 1023} ->
dayfix : {565} ->
dc : {2614} ->
dctographi : {2614} ->
dd : {101} ->
de : {1024, 2785, 1765, 1114, 701} ->
deactiv : {1379, 3246} ->
dead : {896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 409, 1064, 1591, 2362, 452, 2763, 79, 1231, 3198, 3199, 1120, 1128, 1131, 3187, 630, 1910, 3192, 1914, 894, 895} ->
deadli : {417, 2268, 3078} ->
d

duplic : {1675} ->
durden : {1788} ->
dure : {3073, 515, 2673, 3128, 3066, 2590} ->
durham : {3137} ->
dust : {1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 2628, 1710, 1307, 1308, 1309, 1310, 1311} ->
duststorm : {1320} ->
dutch : {2843, 1515, 1516} ->
duti : {3025} ->
dvc : {1690} ->
dvd : {1096, 426} ->
dw : {1288, 2762} ->
dwarf : {1116} ->
dwell : {1530} ->
dwight : {55} ->
dwindl : {1661} ->
dy : {777, 1922, 1087, 23} ->
dysfunct : {2631} ->
dysfunctionalredlin : {1050} ->
e : {772, 1414, 73, 1930, 1771, 972, 1932, 3148, 495, 1328, 851, 2584, 2490, 831} ->
ea : {3245} ->
eagl : {1714, 2125} ->
ear : {323, 37, 329, 330, 1360, 1468} ->
earli : {2720, 2723, 1732, 410, 2586, 2107} ->
earlier : {1057, 2377, 1070, 1143, 2427} ->
earn : {2832, 222} ->
earnest : {1846} ->
earphon : {1360} ->
earth : {1508, 2693, 359, 1481, 2796, 2799, 1489, 147, 2840} ->
earthquak : {1, 5, 1548, 2585, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1329, 1330, 3258, 1100, 3021, 3020, 3030, 2160, 2161} 

facilitiesmanag : {1798} ->
fact : {1476, 932, 1894, 2311, 2468, 2470, 2983, 2859, 3111, 749, 755, 2675, 2236, 2974} ->
factor : {3083} ->
factori : {1352, 1355, 693} ->
fade : {1757} ->
fag : {80} ->
fahlo : {1718} ->
fail : {2154, 2284, 3153, 1690, 667} ->
failur : {656, 2716, 2717, 2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 678, 2726, 2727, 2728, 542, 1535, 2729, 2730, 2163, 895} ->
fair : {275, 621, 2919} ->
fairfax : {466} ->
fairli : {2374} ->
fairport : {787} ->
faisal : {2870} ->
fake : {497, 683, 2995} ->
faker : {2469} ->
fal : {2851, 1957, 755, 1750, 1754} ->
fall : {2816, 406, 1058, 2596, 44, 694, 696, 697, 698, 699, 700, 1723, 702, 703, 704, 705, 706, 707, 328, 2635, 1647, 1916, 1278} ->
fallaci : {2791} ->
fallout : {261, 1725} ->
fame : {2067} ->
famili : {386, 2690, 393, 1293, 910, 2833, 2839, 798, 670, 3231, 801, 3233, 1444, 3236, 3239, 424, 680, 2347, 558, 944, 3133, 3193, 2631, 718, 1231, 1104, 1878, 2265, 2266, 1243, 219, 2267, 2270, 2271, 2272, 2273, 2274, 253

fun : {3045, 2695, 744, 3111, 1420, 3184, 307, 1492, 789, 379, 2366} ->
funch : {2106} ->
function : {1498} ->
fund : {1690, 1383} ->
fundrai : {1827} ->
funfact : {749} ->
funni : {224, 3045, 203, 1931, 342, 1979} ->
funniest : {832} ->
funtenna : {1860, 1862, 1863, 1847, 1850, 1851, 1852, 1853, 1854} ->
furi : {241} ->
furnitur : {736, 1652} ->
fusionfestiv : {2425} ->
futur : {2981, 362, 2477, 2802, 3058, 1752, 956} ->
futurist : {137, 125} ->
fy : {2553} ->
g : {288, 2595, 1993, 2613, 599, 1567} ->
ga : {2257, 2579} ->
gabon : {2587, 2594, 2579} ->
gabriel : {2213} ->
gadget : {773, 774, 949} ->
gaelit : {1494} ->
gag : {1199} ->
gaga : {1686} ->
gah : {1627} ->
gain : {2179, 1228, 3173, 1999} ->
gal : {1250} ->
galact : {2720, 2723, 2724, 2726, 2728, 2729, 2716, 2717, 2718, 2719} ->
gallipoli : {249, 255} ->
gallup : {2249} ->
gambit : {2768} ->
gambl : {1075} ->
game : {2061, 2961, 402, 1555, 405, 22, 1947, 3103, 1954, 419, 675, 2467, 3111, 1079, 2237, 459, 1997, 1502, 375, 2532,

harrup : {2250} ->
harsh : {1748} ->
harun : {2745, 2739, 3255} ->
harvey : {2512} ->
hasaka : {3257} ->
hashtag : {616, 2209, 92} ->
hat : {1860, 1852, 1862, 8, 1863, 2121, 1847, 1850, 1851, 1628, 1853, 1854} ->
hatchet : {219} ->
hate : {1568, 327, 2893, 2356, 629, 533, 2071, 2650, 3097, 2202, 667, 1020, 1086, 31} ->
hatr : {2245} ->
hattrick : {3251} ->
haunt : {2829, 2838, 2831} ->
hav : {1288} ->
havin : {1134, 958} ->
havoc : {850} ->
hawaii : {1321, 3091, 3093, 3094} ->
hay : {1194, 2790} ->
hayden : {409} ->
hazard : {3261, 2262, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1783, 1784, 1785, 1786} ->
hazzel : {1765} ->
hbwahl : {1571} ->
hc : {2713} ->
hcxvneoav : {642} ->
hd : {209, 1698} ->
head : {795, 2560, 163, 2917, 3045, 231, 2695, 3162, 3024, 3069, 1490, 981, 2550, 1465, 762, 317, 765, 990} ->
headach : {918} ->
headlin : {352, 1315, 1548, 1926} ->
headlinesapp : {29

incid : {3170, 755, 1941, 1942} ->
incit : {2472} ->
incl : {548, 1887} ->
includ : {2160, 1049, 2756, 806} ->
incompet : {3081} ->
incr : {913, 1261, 1982} ->
increa : {1048, 874} ->
increasingli : {948} ->
incubustour : {2832} ->
ind : {2157} ->
indi : {1219} ->
india : {2880, 1921, 1890, 1891, 1064, 779, 1966, 1039, 2830, 1041, 1689} ->
indian : {3240, 941, 2182, 2879} ->
indiana : {1978, 1603} ->
indiannew : {1069} ->
indic : {1168, 3211} ->
indiff : {2245, 413} ->
indiscrimin : {550} ->
individu : {1602, 2963} ->
indo : {2180, 2182} ->
indoor : {44} ->
industri : {2802, 2725} ->
industryinsight : {202} ->
ine : {1772} ->
ineedcak : {2142} ->
infami : {2081} ->
infect : {936} ->
infinit : {2250} ->
inflict : {3204} ->
influenc : {1970, 1346, 1471} ->
info : {2005, 869, 3012, 645} ->
inform : {674, 1327, 1969, 530, 211, 2323, 1493, 1621, 2293, 2327, 1502} ->
infosec : {1876} ->
infrastructur : {519} ->
infuri : {1025} ->
iniqu : {3200} ->
initi : {3084} ->
inj : {771, 772} ->
injur 

liverpool : {688, 235} ->
livesci : {3058} ->
livesmatt : {1619} ->
livingontheedg : {248} ->
liz : {933} ->
lizard : {371, 1063} ->
lizeth : {3201} ->
lizzi : {750} ->
lj : {1286} ->
lloyd : {2328} ->
lm : {873} ->
lmao : {752, 2073, 2678} ->
lmfao : {497, 813, 333} ->
lmk : {813} ->
lo : {241, 3258, 732} ->
load : {1809, 2983} ->
loan : {2107, 2483, 349} ->
lobbi : {1492} ->
lobbyist : {2477} ->
lobster : {2683} ->
local : {1441, 453, 1608, 1485, 2322, 1043, 2901, 2199, 1081, 1243, 1723, 2750, 767} ->
localeventcountdown : {869} ->
localplumb : {1403} ->
lock : {2649, 420, 1406} ->
lockdown : {1703} ->
locksmith : {1406} ->
lockstep : {724} ->
locomot : {2658, 2652} ->
loft : {2561} ->
lofti : {2666} ->
log : {3104, 3009} ->
logic : {1649, 997} ->
logo : {1628} ->
loki : {2066} ->
lol : {778, 1552, 2453, 406, 1562, 799, 2086, 2216, 3117, 3246, 1199, 2994, 1075, 2611, 2358, 571, 61, 1218, 2628, 343, 1123, 2660, 2792, 745, 1899, 2673, 1269, 124} ->
lolol : {3244} ->
london : {1415, 148

might : {290, 707, 1830, 811, 1360, 145, 2609, 1395, 888, 696, 1340} ->
mightyworld : {1236} ->
migrant : {1280, 1281, 1282, 1284, 2437, 2438, 2439, 1288, 2442, 2443, 2444, 2446, 2447, 2450, 2834, 1304, 2415, 2430, 1279} ->
migrat : {2580} ->
mikejo : {1020} ->
mild : {1935} ->
mile : {1088, 675, 1028, 2424, 2319, 85, 1720, 761, 765} ->
milit : {1920, 1921, 1891, 1908, 2772, 214, 2775, 1912, 1883} ->
militari : {3208, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, 2128, 2129, 2130, 2131, 2132, 2133, 2134, 2772, 1400} ->
milk : {2029} ->
mill : {2248, 452, 2231} ->
miller : {2847} ->
million : {683, 2222, 2935, 2990, 1906, 627, 631} ->
miloko : {3191} ->
miltari : {1887} ->
min : {3170, 1381, 741, 39, 1642, 2954, 2701, 848, 2673, 2711} ->
mind : {2497, 2468, 1510, 2470, 3245, 3150, 2707, 884, 1365, 2963, 344, 2234, 3067} ->
mindkil : {2239} ->
mindset : {2130} ->
mine : {1183, 1058, 1063, 1659, 938, 1009, 1657, 1809, 1655, 1177, 1179, 2940, 287} ->
minecraft : {1093, 137, 139, 1388, 14

nervou : {3226} ->
net : {2428, 1677} ->
netbot : {1677} ->
netflix : {2441, 2630} ->
nether : {1997} ->
netnewsledg : {3146} ->
neutron : {3109} ->
never : {3201, 2946, 905, 785, 148, 1434, 1568, 290, 2989, 2225, 1080, 954, 573, 2493, 2755, 458, 2636, 1361, 978, 1365, 1752, 473, 2392, 91, 2140, 1508, 485, 743, 1129, 234, 876, 2541, 3186, 892, 2813, 1790} ->
new : {513, 3075, 1029, 3078, 2057, 3083, 3090, 3096, 3098, 2089, 44, 46, 2606, 2101, 3127, 1599, 2627, 2630, 2121, 3146, 590, 81, 1624, 1628, 2143, 107, 1643, 3179, 2670, 2161, 1140, 2677, 1149, 2181, 1709, 1727, 2751, 195, 719, 2769, 1238, 1253, 1788, 779, 791, 2839, 2842, 797, 1309, 1312, 1313, 1314, 2337, 2856, 2348, 2349, 1326, 2350, 2351, 2861, 2355, 2359, 2885, 2381, 2383, 2902, 2392, 1384, 1898, 2413, 2417, 371, 1910, 2427, 379, 2435, 390, 395, 400, 2962, 2964, 1946, 1949, 2980, 427, 2479, 434, 435, 2483, 437, 440, 3016, 3020, 3021, 462, 1495, 1496, 1498, 1506, 1009} ->
newauthor : {2968} ->
newbi : {2089} ->
newborn : {203

packag : {263} ->
packard : {1205} ->
packet : {3101} ->
pact : {2180, 2182} ->
pad : {2576} ->
pagasa : {1685} ->
page : {1009, 44, 1471} ->
paid : {2986, 290, 1677} ->
pain : {576, 3213, 3215, 2229, 2041, 2812} ->
paint : {2972} ->
paintbal : {2121} ->
pair : {2576} ->
pak : {2865, 2874, 804} ->
pakistan : {2848, 802, 2852, 2087, 424, 2855, 2858, 588, 116} ->
pakistani : {2865, 75, 84, 1868} ->
paladin : {1222} ->
palestin : {654} ->
palestinian : {1000, 3208, 3212, 1231, 1012, 2869} ->
palin : {2195} ->
pamanian : {450} ->
pamela : {225, 227, 217, 218, 220, 221} ->
panda : {1436} ->
pandaw : {1688} ->
pandem : {983} ->
pandemonium : {2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2285, 2287} ->
pander : {874} ->
pandora : {1633} ->
panel : {1762, 382} ->
panic : {2947, 201, 2299, 2288, 2289, 2290, 2291, 212, 2292, 2293, 2294, 2296, 2297, 2298, 2295, 2300, 734} ->
panick : {2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2301, 2302, 23

prepar : {1793, 1378, 649, 1674, 1461, 664, 3001} ->
prepper : {2154, 133} ->
prescient : {2888} ->
preseason : {3250, 1909, 246} ->
present : {2818, 1156, 1164, 242, 1139, 890} ->
preserv : {992, 1211} ->
presid : {664, 2056, 659, 2966} ->
presqu : {3184} ->
press : {2382, 2960, 1298, 1299, 1875, 1589, 2259, 1303, 1144, 506, 2264} ->
presstitut : {2879} ->
pressur : {388} ->
pretend : {35, 1837, 3014} ->
pretti : {1122, 2916, 3014, 1289, 426, 2012, 844, 2064, 657, 789, 2197, 377, 1308} ->
prevail : {1370, 1318} ->
prevent : {530, 2340, 629, 2071} ->
preview : {2089} ->
previou : {958} ->
previouslyondoyintv : {17} ->
price : {1937, 827, 2117, 55} ->
pride : {1115} ->
primarili : {674, 2878} ->
prime : {651} ->
princ : {138} ->
princess : {2418, 487} ->
print : {111, 434, 469, 1498, 382} ->
prior : {2225} ->
prioriti : {2900} ->
prison : {1241, 1882} ->
privat : {200, 533} ->
privati : {1220} ->
privileg : {86} ->
privleg : {614} ->
prize : {1531} ->
prkng : {97} ->
pro : {2155, 805} -

reggi : {3034} ->
regim : {2780, 2862} ->
regiment : {98} ->
region : {2688, 1661, 2921, 2010, 2589} ->
regist : {1409, 532, 1373, 1405, 1407} ->
regul : {673} ->
regular : {392, 842, 2155, 935} ->
regulatori : {2177, 2181} ->
rehear : {2692} ->
reid : {1583} ->
reject : {1325, 647} ->
rejuvenati : {1450} ->
rel : {1970, 1979} ->
relat : {386, 3171, 1043, 2291, 2364, 3223, 380, 1150, 3039} ->
relationship : {816, 3201, 203, 2799} ->
relax : {1934} ->
relea : {2950, 1545, 2954, 2957, 2084, 2732, 2733, 2734, 2735, 2736, 2737, 1458, 2738, 2100, 2739, 2742, 2743, 3255, 2745, 198, 1991, 2256, 1882, 3170, 756} ->
relentless : {413} ->
reli : {1612} ->
reliabl : {1521} ->
relief : {2096, 1969, 1237, 1690, 1661} ->
religi : {3031} ->
religion : {2472, 2204} ->
remad : {408} ->
remain : {1889, 1059, 420, 364, 1392, 147, 1979} ->
remak : {2839} ->
remark : {359} ->
remedyyyy : {2939} ->
rememb : {2824, 147, 407, 1707, 2229, 1207, 2365, 1345, 450, 2774, 1752, 728, 2659, 1129, 1898, 2793, 2934, 11

scfd : {3174} ->
schella : {46} ->
schemat : {2236} ->
scheme : {185, 181, 182} ->
scholar : {2232, 2233} ->
school : {1667, 782, 2964, 790, 1430, 2596, 1834, 1451, 1965, 558, 1840, 1841, 1842, 2120, 1100, 341, 1111, 728, 2778, 1765, 2789, 2033} ->
schoolboy : {46} ->
schoolgirl : {2845, 215} ->
scienc : {3234, 1484, 1325, 81, 2005, 2971} ->
sciencedaili : {2800} ->
scientist : {2119} ->
scissor : {2689, 2418} ->
scone : {3101} ->
score : {2452} ->
scoreless : {2020} ->
scorpion : {854} ->
scorpionpilot : {865} ->
scotland : {605, 2079} ->
scottsdal : {633} ->
scotu : {2242} ->
scratch : {327} ->
scream : {2560, 2561, 2562, 2563, 2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2573, 2574, 2551, 2500, 2530, 2531, 2148, 2532, 2533, 2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 2544, 2545, 2546, 2547, 2548, 2549, 2550, 2552, 2553, 2554, 2555, 2556, 2557, 2558, 2559} ->
screen : {1496, 1735, 1495} ->
screenshot : {1312, 1309, 470} ->
screw : {128, 2625, 3114} ->
script 

smucker : {1274} ->
smug : {2052} ->
smuggler : {2986} ->
sn : {2216, 778} ->
snack : {2817, 2038} ->
snag : {1598} ->
snap : {2544, 1628, 333} ->
snapchat : {203} ->
snction : {1887} ->
sneak : {3136} ->
sni : {1118} ->
snicker : {2034} ->
snide : {359} ->
snipe : {2625, 1639} ->
snow : {1721} ->
snowbacksunday : {2679} ->
snowden : {1896} ->
snowi : {376} ->
snowman : {3024} ->
snowmobil : {2673} ->
snowstorm : {2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2674, 2675, 2676, 2677, 2678, 2679, 2680, 2682, 2683} ->
snowstormi : {2673} ->
soar : {605} ->
soccer : {3024, 1204} ->
social : {1794, 643, 1539, 1509, 2042, 647, 871, 2468, 2470, 1614, 751, 336, 2867, 2040, 634, 2299} ->
societi : {1420, 669, 679} ->
sociologist : {1970} ->
socket : {1356} ->
soft : {428} ->
soil : {270} ->
sokoto : {1124} ->
soldier : {2882, 3212, 1008, 2866, 2777} ->
solemn : {2336} ->
solid : {1652, 468} ->
solo : {2804} ->
solut : {1491} ->
solv : {168, 940} ->
somalia : {2587, 2594, 2579} ->


surgeon : {3109} ->
surgeri : {392} ->
surnam : {631} ->
surpri : {2353} ->
surrend : {982} ->
surrey : {189} ->
surround : {2963, 1805} ->
surveil : {197} ->
survey : {2577, 2590, 1462, 2583} ->
surviv : {2816, 2817, 2818, 2819, 2820, 2821, 2822, 2823, 2824, 137, 906, 2825, 2826, 2827, 1296, 2840, 2807, 2808, 561, 3064, 1467, 1726, 2813, 1368, 1371, 1372, 2793, 2794, 2795, 2796, 2797, 2158, 2798, 2799, 2800, 2801, 2802, 2803, 2804, 2805, 503, 2806, 2809, 2810, 2811, 2812, 125, 2814, 2815} ->
survivor : {2442, 2829, 2830, 2447, 2831, 2834, 2835, 2836, 2838, 2839, 2841, 2842, 2844, 2846, 2847, 2230, 2156, 1147, 2430} ->
survivorsguidetoearth : {2832} ->
suscept : {351} ->
susfu : {1311} ->
susp : {3192} ->
suspect : {1920, 1921, 2318, 2324, 2978, 167, 180, 184, 192, 196, 199, 207, 3196, 214, 476, 3187, 1908, 3189, 3191, 3192, 2428, 3197, 3198, 3199} ->
suspen : {884} ->
suspici : {1325} ->
suze : {438} ->
sw : {2320} ->
swain : {2002} ->
swallow : {2528, 2598, 585, 2601, 2603, 2604, 251

tone : {442, 1998} ->
tonedidit : {911} ->
tongu : {3040, 3033, 3031} ->
toni : {635, 1605} ->
tonight : {1920, 1921, 2690, 1799, 3219, 2082, 548, 1316, 2093, 2094, 2099, 2108, 1727, 1620, 1622, 2142, 2801, 1908, 2038} ->
tonka : {1589} ->
tonto : {106, 109, 110, 114, 115, 117, 118, 120, 121, 122, 123} ->
took : {2810, 2955, 1868, 2224, 2768, 1238, 2455, 1562, 253} ->
tool : {1628, 2125} ->
toopain : {2147} ->
toothpast : {1753} ->
toothsom : {1553} ->
top : {1546, 3237, 1192, 2351, 950, 2359, 2487, 1721, 964, 965, 967, 2887, 969, 970, 972, 973, 78, 975, 976, 979, 470, 1494, 2398, 2401, 2423, 3071} ->
topix : {355} ->
topstori : {2954} ->
torch : {1372, 189} ->
tore : {1562} ->
torn : {3107} ->
tornado : {2917, 2918, 2919, 2920, 2921, 2924, 2926, 2927, 1680, 2161, 2930} ->
tornadogiveaway : {2929, 2923} ->
toronto : {193, 2590} ->
torpedo : {2784} ->
tortur : {3074, 579} ->
toru : {211, 1981, 1975} ->
toshikazukatayama : {1548} ->
toss : {2593} ->
total : {2240, 1121, 1127, 2252, 622, 

vandali : {990} ->
vandalz : {533} ->
vanessa : {1467} ->
vapor : {1756} ->
variat : {58} ->
varieti : {1368, 1371} ->
vassal : {1238} ->
vaulter : {43} ->
vavazouma : {1636} ->
vbac : {2665} ->
vega : {1546, 45, 3007} ->
veget : {1368, 1371} ->
vegetarian : {188} ->
veggi : {2671} ->
vehicl : {32, 77, 2125, 784, 755, 2620, 189, 2622} ->
veil : {2632, 753} ->
veld : {1454} ->
vendor : {1876} ->
venezuela : {2292, 2958} ->
vent : {1762, 2411} ->
ver : {2046} ->
veri : {104, 2131, 2668} ->
vermont : {1543} ->
verrri : {2542} ->
verseth : {1481, 1489} ->
version : {1719} ->
versu : {3057} ->
vessel : {2592, 1749} ->
vet : {2414} ->
veteran : {401, 1611, 2854} ->
veterinarian : {2419} ->
vfp : {2854} ->
vh : {698} ->
vi : {1563} ->
via : {1028, 2060, 21, 1047, 542, 2603, 1079, 1085, 1600, 1601, 2624, 2120, 587, 1103, 2130, 88, 89, 1118, 2149, 2151, 106, 2155, 108, 109, 110, 1645, 114, 115, 117, 118, 119, 120, 121, 122, 123, 2679, 639, 640, 2176, 3207, 3208, 2697, 1684, 1685, 1179, 3238, 68

winston : {1693} ->
winter : {3081, 2677, 2670} ->
wintergreen : {1464} ->
wip : {3136} ->
wipe : {98} ->
wire : {1348, 2343, 592, 1365, 2331, 1372, 2333, 575} ->
wirelessli : {1821} ->
wisdom : {1962, 997, 1007} ->
wise : {1566} ->
wisenew : {2491, 2484, 2502} ->
wish : {2209, 3055, 50, 1939, 1269, 1334, 1335, 793, 2619, 319} ->
wit : {2112, 614, 2287, 1624, 411, 3103} ->
witch : {482} ->
wither : {139, 141, 126} ->
within : {1208, 1474, 1675} ->
without : {2275, 2308, 2279, 1896, 2280, 2282, 2283, 2317, 3118, 2063, 2287, 3219, 2132, 2487, 3259, 3166, 351} ->
wlb : {719} ->
wmata : {1050, 1060, 1061} ->
wmur : {552} ->
wnba : {2714, 2703} ->
wo : {2738} ->
woke : {2908, 757} ->
wolf : {409, 660} ->
wolfgangjoc : {451} ->
wololo : {2873} ->
wolverin : {3170, 3165} ->
woman : {2946, 2445, 2318, 2195, 2837, 2971, 157, 2974, 3104, 932, 810, 427, 1583, 2992, 435, 437, 566, 440, 1224, 2124, 2126, 1875, 468, 469, 1237, 1750, 1754, 604, 1378, 2275, 2279, 2280, 2282, 2283, 620, 2287, 2809, 294

In [31]:
try:
  start = time.perf_counter()
  print(hash_table["zero"])
  end = time.perf_counter()
  print("Time :", end - start)
except:
  print("Word to be searched does not exist in the hash table")

{618, 1132, 879, 2426, 1887}
Time : 0.0003326999999941904


In [32]:
try:
  start = time.perf_counter()
  print(search(root, "zero"))
  end = time.perf_counter()
  print("Time :", end - start)
except:
  print("Word to be searched does not exist in the binary search tree")

{618, 1132, 879, 2426, 1887}
Time : 0.0003989000000004239


In [33]:
wildcard = "spa*e"
for i in hash_table.keys():
  if re.search(wildcard, i):
    print(hash_table[i])

{1736, 150}
{1920, 1921, 2318, 2324, 2978, 167, 180, 184, 192, 196, 199, 207, 3196, 214, 476, 3187, 1908, 3189, 3191, 3192, 2428, 3197, 3198, 3199}
{193}
{261}
{313, 2435, 303}
{624, 537, 1360, 2073}
{557}
{1514, 1614, 626, 2516, 1207}
{633, 2978}
{658, 1295}
{737}
{884}
{1106, 2997}
{1148}
{3014, 2836, 1206, 1687, 3099, 1759}
{2912, 2756, 3144, 1333, 1884, 1886}
{1476}
{1696, 1697, 2401, 2310, 1822, 2398}
{2041}
{2119}
{2721, 3179, 2189}
{2344, 2341}
{2576, 2625, 2644, 2701}
{2656}
{2888}
{2983}
{3047}


In [34]:
grams = []
k = 3
query = "spa*e"


if "*" not in query[:k-1]:
  grams.append(query[:k-1])
  
for i in range(len(query)-k+1):
  if "*" not in query[i:i+k]:
    grams.append(query[i:i+k])

if "*" not in query[-k+1:]:
  grams.append(query[-k+1:])

print(grams)

['sp', 'spa']


In [35]:
gdict = dict()
for i in grams:
  gdict[i] = set()
  for j in hash_table:
    if i in j:
      gdict[i].add(j)
for i in gdict.keys():
  print(i, ":", gdict[i])

sp : {'spoke', 'dispos', 'sp', 'perspect', 'esp', 'inspctkp', 'inspir', 'aspect', 'spohn', 'respons', 'darkspawn', 'special', 'sprinter', 'spare', 'inthespiritwebuild', 'desper', 'display', 'spam', 'spring', 'suspici', 'spikebot', 'despit', 'spectacular', 'spanish', 'spliff', 'spoil', 'displac', 'spade', 'despair', 'spaceship', 'gospel', 'suspens', 'spark', 'spit', 'sphere', 'split', 'spill', 'spacer', 'disposit', 'gsp', 'spell', 'sprach', 'splash', 'spur', 'auspol', 'raspi', 'offspr', 'spawn', 'speaker', 'disrespect', 'cesspool', 'spray', 'speed', 'spd', 'inspect', 'hospit', 'spear', 'speak', 'spent', 'conspiraci', 'spitter', 'myfitnessp', 'susp', 'transport', 'transplante', 'esport', 'spitti', 'spo', 'spirit', 'espresso', 'sportinggood', 'spot', 'speech', 'sport', 'atmospher', 'sportwatch', 'space', 'specimen', 'sponsor', 'spain', 'respond', 'splattl', 'especi', 'headspac', 'btsprep', 'displeas', 'sprint', 'spectat', 'spong', 'kalispel', 'spend', 'suspect', 'dsp', 'spiritu', 'special

In [36]:
import functools
terms = list(functools.reduce(lambda x,y: x&y, list(gdict.values())))
print(terms)

['spawn', 'spark', 'spam', 'spade', 'spanish', 'headspac', 'spare', 'spacer', 'space', 'despair', 'spain', 'darkspawn', 'spaceship']


In [37]:
res = set()
for i in terms:
  # post filtering
  # if re.search(query, i):
  for j in hash_table[i]:
    res.add(j)
print(res)

{2696, 1677, 1691, 2716, 2717, 1566, 2718, 2724, 2728, 2729, 1578, 1712, 2868, 2488, 1082, 2498, 2884, 2503, 1097, 1997, 980, 2776, 1121, 1639, 3176, 1127, 746, 1387, 360, 2291}


In [38]:
def rotate(str, n):
    return str[n:] + str[:n]

In [39]:
permuterm_index = {}
for key in sorted(pos_list.keys()):
  dkey = key + "$"
  for i in range(len(dkey), 0, -1):
    out = rotate(dkey, i)
    if key not in permuterm_index.keys():
      permuterm_index[key] = set()
    permuterm_index[key].add(out)
for i in sorted(permuterm_index.keys()):
  print(i, ":", permuterm_index[i])

a : {'a$', '$a'}
abl : {'bl$a', 'abl$', '$abl', 'l$ab'}
ablaz : {'blaz$a', 'ablaz$', 'laz$ab', 'z$abla', 'az$abl', '$ablaz'}
accid : {'ccid$a', 'id$acc', '$accid', 'accid$', 'cid$ac', 'd$acci'}
across : {'oss$acr', 's$acros', '$across', 'cross$a', 'ross$ac', 'ss$acro', 'across$'}
afford : {'rd$affo', 'afford$', 'ford$af', 'ord$aff', 'fford$a', '$afford', 'd$affor'}
aftershock : {'rshock$afte', 'k$aftershoc', 'hock$afters', 'ftershock$a', 'ock$aftersh', '$aftershock', 'shock$after', 'ershock$aft', 'ck$aftersho', 'tershock$af', 'aftershock$'}
age : {'age$', 'e$ag', 'ge$a', '$age'}
ago : {'o$ag', '$ago', 'ago$', 'go$a'}
air : {'ir$a', 'air$', '$air', 'r$ai'}
airplan : {'irplan$a', 'plan$air', '$airplan', 'n$airpla', 'rplan$ai', 'an$airpl', 'airplan$', 'lan$airp'}
airport : {'rt$airpo', 'rport$ai', 't$airpor', 'airport$', 'irport$a', 'ort$airp', '$airport', 'port$air'}
alfon : {'on$alf', 'lfon$a', 'alfon$', 'n$alfo', 'fon$al', '$alfon'}
all : {'all$', 'l$al', 'll$a', '$all'}
ambul : {'ul$a

driver : {'r$drive', 'iver$dr', '$driver', 'ver$dri', 'driver$', 'river$d', 'er$driv'}
drodrolagi : {'agi$drodrol', 'gi$drodrola', 'odrolagi$dr', 'rodrolagi$d', '$drodrolagi', 'olagi$drodr', 'rolagi$drod', 'lagi$drodro', 'drodrolagi$', 'drolagi$dro', 'i$drodrolag'}
drunk : {'drunk$', 'runk$d', 'k$drun', 'unk$dr', 'nk$dru', '$drunk'}
dual : {'al$du', 'l$dua', 'dual$', 'ual$d', '$dual'}
dubstep : {'step$dub', 'ep$dubst', '$dubstep', 'tep$dubs', 'dubstep$', 'p$dubste', 'ubstep$d', 'bstep$du'}
due : {'ue$d', 'due$', 'e$du', '$due'}
dwight : {'ight$dw', '$dwight', 'ht$dwig', 'ght$dwi', 't$dwigh', 'wight$d', 'dwight$'}
dy : {'dy$', '$dy', 'y$d'}
e : {'e$', '$e'}
ear : {'r$ea', '$ear', 'ar$e', 'ear$'}
earthquak : {'uak$earthq', 'earthquak$', 'k$earthqua', 'hquak$eart', '$earthquak', 'thquak$ear', 'arthquak$e', 'rthquak$ea', 'ak$earthqu', 'quak$earth'}
east : {'$east', 'east$', 'ast$e', 't$eas', 'st$ea'}
eb : {'b$e', 'eb$', '$eb'}
ebay : {'bay$e', 'ebay$', 'y$eba', '$ebay', 'ay$eb'}
edm : {'ed

jelli : {'$jelli', 'lli$je', 'i$jell', 'jelli$', 'elli$j', 'li$jel'}
jfk : {'k$jf', '$jfk', 'fk$j', 'jfk$'}
joseph : {'h$josep', '$joseph', 'ph$jose', 'seph$jo', 'joseph$', 'eph$jos', 'oseph$j'}
judg : {'judg$', '$judg', 'udg$j', 'dg$ju', 'g$jud'}
junction : {'$junction', 'nction$ju', 'unction$j', 'on$juncti', 'junction$', 'ction$jun', 'n$junctio', 'tion$junc', 'ion$junct'}
just : {'just$', '$just', 'st$ju', 't$jus', 'ust$j'}
ka : {'ka$', '$ka', 'a$k'}
kaiserjaeg : {'aiserjaeg$k', 'g$kaiserjae', 'kaiserjaeg$', 'rjaeg$kaise', 'eg$kaiserja', 'erjaeg$kais', 'jaeg$kaiser', 'iserjaeg$ka', '$kaiserjaeg', 'aeg$kaiserj', 'serjaeg$kai'}
keio : {'$keio', 'io$ke', 'keio$', 'eio$k', 'o$kei'}
kill : {'l$kil', 'kill$', 'll$ki', 'ill$k', '$kill'}
kingston : {'ingston$k', 'ton$kings', 'ngston$ki', 'kingston$', '$kingston', 'n$kingsto', 'gston$kin', 'ston$king', 'on$kingst'}
kira : {'kira$', '$kira', 'ira$k', 'a$kir', 'ra$ki'}
know : {'now$k', 'know$', '$know', 'ow$kn', 'w$kno'}
korean : {'an$kore', 'o

safeti : {'i$safet', 'ti$safe', 'afeti$s', 'safeti$', '$safeti', 'feti$sa', 'eti$saf'}
save : {'save$', '$save', 'e$sav', 'ave$s', 've$sa'}
say : {'ay$s', 'say$', 'y$sa', '$say'}
scar : {'scar$', 'r$sca', '$scar', 'car$s', 'ar$sc'}
schella : {'$schella', 'chella$s', 'lla$sche', 'la$schel', 'a$schell', 'ella$sch', 'schella$', 'hella$sc'}
schoolboy : {'lboy$schoo', 'y$schoolbo', 'oy$schoolb', '$schoolboy', 'oolboy$sch', 'schoolboy$', 'olboy$scho', 'hoolboy$sc', 'boy$school', 'choolboy$s'}
scienc : {'c$scien', 'scienc$', 'nc$scie', 'cienc$s', '$scienc', 'enc$sci', 'ienc$sc'}
se : {'se$', 'e$s', '$se'}
second : {'d$secon', 'econd$s', 'nd$seco', 'cond$se', 'ond$sec', '$second', 'second$'}
secret : {'et$secr', 'secret$', 'cret$se', 't$secre', 'ret$sec', 'ecret$s', '$secret'}
see : {'see$', 'ee$s', 'e$se', '$see'}
seek : {'eek$s', 'seek$', '$seek', 'ek$se', 'k$see'}
self : {'lf$se', 'self$', 'f$sel', 'elf$s', '$self'}
send : {'d$sen', '$send', 'end$s', 'nd$se', 'send$'}
servic : {'$servic', '

In [40]:
keys = hash_table.keys()
def rotate(s,n):
    return s[n:] + s[:n]
permuterm = {}
for key in keys:
    dkey = key + "$"
    for i in range(len(dkey), 0, -1):
        out = rotate(dkey,i)
        permuterm[out] = key

query = "spa*e"
parts = query.split("*")
q = parts[0]

def prefix_match(term, prefix):
    term_list = []
    for tk in term.keys():
        if tk.startswith(prefix):
            term_list.append(term[tk])
    return term_list
def processQuery(q):
    global permuterm, idf
    term_l = prefix_match(permuterm, q)
    print(term_l)
    doc_id = []
    for t in term_l:
        doc_id.append(hash_table[t])
    print(doc_id)
    temp = set()
    for x in doc_id:
        for y in x:
            temp.add(int(y))
    return temp

temp = processQuery(q)

['darkspawn', 'space', 'spam', 'headspac', 'despair', 'spanish', 'spade', 'spark', 'spacer', 'spawn', 'spain', 'spaceship', 'spare']
[{360}, {1121, 746, 2884, 1127}, {980, 1677}, {1082}, {1097}, {1387}, {1566}, {2498, 2503, 3176, 1578, 1712, 2488, 1691}, {1639}, {2291, 1997}, {2696, 2776}, {2724, 2728, 2729, 2716, 2717, 2718}, {2868}]


In [41]:
print("Equality of same wild card query using kgram and permuterm :", res == temp)

Equality of same wild card query using kgram and permuterm : True


In [42]:
'''
mapper_output = []
for i in range(len(stemmed)):
  for token in stemmed[i].split():
    mapper_output.append((token, 1))
for ele in mapper_output:
    print(ele)
'''
mapper_output = []
for key in hash_table.keys():
  for freq in hash_table[key]:
    mapper_output.append((key, 1))
for ele in mapper_output:
  print(ele)

('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('just', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('happen', 1)
('terribl', 1)
('terribl', 1)
('terribl', 1)
('terribl', 1)
('terribl', 1)
('terribl', 1)
('terribl', 1)
('car', 1)
('car', 1)
('car', 1)
('car', 1)
('car', 1)
('

('they', 1)
('they', 1)
('they', 1)
('they', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('probabl', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('still', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show', 1)
('show'

('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('amp', 1)
('guap', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)
('burn', 1)


('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('back', 1)
('traffic', 1)
('traffic', 1)
('traffic', 1)
('traffic', 1)
('traffic', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1)
('got', 1

('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('servic', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur', 1)
('injur'

('super', 1)
('super', 1)
('super', 1)
('super', 1)
('super', 1)
('super', 1)
('jelli', 1)
('dave', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('p', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('hit', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('and', 1)
('block', 1)
('bl

('brass', 1)
('copper', 1)
('cataclysm', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('you', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('happi', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 1)
('use', 

('complet', 1)
('complet', 1)
('complet', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('destroy', 1)
('makeup', 1)
('makeup', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat', 1)
('updat',

('target', 1)
('target', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('blood', 1)
('clot', 1)
('scienc', 1)
('scienc', 1)
('scienc', 1)
('scienc', 1)
('scienc', 1)
('scienc', 1)
('suppli', 1)
('suppli', 1)
('suppli', 1)
('suppli', 1)
('suppli', 1)
('suppli', 1)
('suppli', 1)
('dad', 1)
('dad', 1)
('dad', 1)
('dad', 1)
('dad', 1)
('dad', 1)
('dad', 1)
('held', 1)
('held', 1)
('held', 1)
('held', 1)
('held', 1)
('held', 1)
('held', 1)
('heroin', 1)
('heroin', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('whi', 1)
('ride', 1)
('ride', 1)
('ride', 1)
('ride', 1)
('ride', 1)


('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('red', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('stock', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('hour', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('sign', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 1)
('rt', 

('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('more', 1)
('showcas', 1)
('showcas', 1)
('showcas', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('major', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('reason', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('big', 1)
('rr', 1)
('rr', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
('miss', 1)
(

('rememb', 1)
('rememb', 1)
('rememb', 1)
('rememb', 1)
('rememb', 1)
('rememb', 1)
('rememb', 1)
('crowd', 1)
('crowd', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('around', 1)
('baggag', 1)
('carousel', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('bag', 1)
('remain', 1)
('remain', 1)
('remain', 1)
('remain', 1)
(

('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('two', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('pay', 1)
('fine', 1)
('fine', 1)
('fine', 1)
('fine', 1)
('fine', 1)
('fine', 1)
('saginaw', 1)
('sketch', 1)
('charlton', 1)
('wineri', 1)
('wehtwtvwloc', 1)
('trial', 1)
('burglari', 1)
('burglari', 1)
('connect', 1)
('connect', 1)
('connect', 1)
('connect', 1)
('shawne', 1)
('olath', 1)
('kan', 1)
('court', 1)
('court', 1)
('court', 1)
('court', 1)
('court', 1)
('court', 1)
('court', 1)
('court', 1)
('wednesday', 1)
('wednesday', 1)
('wednesday', 1)
('wednesday', 1)
('wednesday', 1)
('wednesday', 1)
('accus', 1)
('accus', 1)
('accus', 1)
('accus', 1)
('kzim', 1)
('arrest', 1)
('arrest', 1)
('arrest', 1)
('arrest',

('imagin', 1)
('imagin', 1)
('imagin', 1)
('imagin', 1)
('imagin', 1)
('imagin', 1)
('imagin', 1)
('plenti', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('talk', 1)
('math', 1)
('teacher', 1)
('teacher', 1)
('teacher', 1)
('teacher', 1)
('teacher', 1)
('liverpool', 1)
('liverpool', 1)
('similar', 1)
('similar', 1)
('similar', 1)
('similar', 1)
('rare', 1)
('rare', 1)
('rare', 1)
('rare', 1)
('rare', 1)
('vintag', 1)
('vintag', 1)
('vintag', 1)
('collect', 1)
('collect', 1)
('collect', 1)
('collect', 1)
('windriv', 1)
('edt', 1)
('edt', 1)
('edt', 1)
('edt', 1)
('edt', 1)
('ml', 1)
('ml', 1)
('spray', 1)
('spray', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 1)
('deal', 

('me', 1)
('me', 1)
('me', 1)
('me', 1)
('me', 1)
('dat', 1)
('dat', 1)
('dat', 1)
('creami', 1)
('stuff', 1)
('stuff', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('your', 1)
('reaction', 1)
('reaction', 1)
('port', 1)
('port', 1)
('port', 1)
('coquitlam', 1)
('coquitlam', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('crew', 1)
('electr', 1)
('electr', 1)
('electr', 1)
('electr', 1)
('electr', 1)
('electr', 1)
('electr', 1)
('towel', 1)
('warmer', 1)
('four', 1)
('four', 1)
('four', 1)
('four', 1)
('four', 1)
('four', 1)
('four', 1)
('alarm', 1)
('alarm', 1)
('alarm', 1)
('alarm', 1)
('alarm', 1)
('alarm', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever', 1)
('sever

('cleveland', 1)
('height', 1)
('height', 1)
('height', 1)
('height', 1)
('shaker', 1)
('councilman', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('o', 1)
('neill', 1)
('internet', 1)
('internet', 1)
('internet', 1)
('internet', 1)
('internet', 1)
('loser', 1)
('leav', 1)
('leav', 1)
('leav', 1)
('leav', 1)
('leav', 1)
('leav', 1)
('snide', 1)
('remark', 1)
('upon', 1)
('upon', 1)
('upon', 1)
('upon', 1)
('upon', 1)
('upon', 1)
('import', 1)
('import', 1)
('import', 1)
('import', 1)
('import', 1)
('import', 1)
('import', 1)
('inquisit', 1)
('darkspawn', 1)
('entir', 1)
('entir', 1)
('entir', 1)
('entir', 1)
('entir', 1)
('entir', 1)
('entir', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('member', 1)
('norwalk', 1)
('ord

('fabric', 1)
('camouflag', 1)
('camouflag', 1)
('outdoor', 1)
('purpl', 1)
('zipper', 1)
('camo', 1)
('camo', 1)
('wallet', 1)
('vuitton', 1)
('vuitton', 1)
('monogram', 1)
('monogram', 1)
('sophi', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('limit', 1)
('edit', 1)
('edit', 1)
('edit', 1)
('clutch', 1)
('nikon', 1)
('nikon', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('mp', 1)
('digit', 1)
('digit', 1)
('digit', 1)
('digit', 1)
('digit', 1)
('digit', 1)
('slr', 1)
('slr', 1)
('batteri', 1)
('batteri', 1)
('charger', 1)
('charger', 1)
('charger', 1)
('charger', 1)
('charger', 1)
('charger', 1)
('charger', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('brown', 1)
('saumur', 1)
('dracula', 1)
('vampir', 1)
('small', 1)
('small', 1)
('small', 1)
('sma

('associ', 1)
('associ', 1)
('associ', 1)
('associ', 1)
('associ', 1)
('associ', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('press', 1)
('nyt', 1)
('nyt', 1)
('nyt', 1)
('nyt', 1)
('nyt', 1)
('nyt', 1)
('sanitis', 1)
('narr', 1)
('narr', 1)
('bell', 1)
('bell', 1)
('bell', 1)
('bell', 1)
('bell', 1)
('toll', 1)
('toll', 1)
('toll', 1)
('toll', 1)
('toll', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('thursday', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)
('collaps', 1)


('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('comput', 1)
('weekend', 1)
('weekend', 1)
('weekend', 1)
('weekend', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('sorri', 1)
('crit', 1)
('matter', 1)
('matter', 1)
('matter', 1)
('matter', 1)
('matter', 1)
('prob', 1)
('prob', 1)
('bibl', 1)
('bibl', 1)
('bibl', 1)
('bibl', 1)
('psalm', 1)
('psalm', 1)
('prodig', 1)
('son', 1)
('son', 1)
('son', 1)
('son', 1)
('son', 1)
('son', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('return', 1)
('welp', 1)
('welp', 1)
('dunc', 1)
('accident', 1)
('cigarett', 1)
('cigarett', 1)
('cigarett', 1)
('cigarett', 1)
('pitch', 1)
('pitch', 1)
('inn', 1)
('crazi', 1)
('crazi', 1)
('crazi', 1)
('crazi', 1)
('crazi', 1)
('crazi', 1)
('crazi', 1)

('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('war', 1)
('afghan', 1)
('violenc', 1)
('violenc', 1)
('violenc', 1)
('unfortun', 1)
('unfortun', 1)
('utilis', 1)
('welli', 1)
('aucklnd', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('memori', 1)
('wth', 1)
('daniel', 1)
('daniel', 1)
('daniel', 1)
('daniel', 1)
('daniel', 1)
('bunch', 1)
('bunch', 1)
('untrain', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('obama', 1)
('lambast', 1)
('respect', 1)
('respect', 1)
('respect', 1)
('respect', 1)
('respect', 1)
('joke', 1)
('joke', 1)
('joke', 1)
('

('chem', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('explos', 1)
('toxic', 1)
('toxic', 1)
('drill', 1)
('drill', 1)
('drill', 1)
('drill', 1)
('pope', 1)
('pope', 1)
('sept', 1)
('sept', 1)
('sept', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spill', 1)
('spohn', 1)
('shorelin', 1)
('shorelin', 1)
('standard', 1)
('standard', 1)
('behavior', 1)
('mixer', 1)
('purpos', 1)
('purpos', 1)
('jyb', 1)
('bacup', 1)
('bacup', 1)
('bacup', 1)
('smell', 1)
('smell', 1)
('smell', 1)
('smell', 1)
('smell', 1)
('smell', 1)
('smell', 1)
('rossendal', 1)
('hammondvil', 1)
('hammondvil', 1)
('jewel', 1)
('jewel', 1)
('jewel', 1)
('attend', 1)
('attend', 1)
('attend', 1)
('attend', 1)
('rins', 1)

('vamo', 1)
('newel', 1)
('funfact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('fact', 1)
('transgend', 1)
('sam', 1)
('sam', 1)
('sam', 1)
('runaway', 1)
('runaway', 1)
('lizzi', 1)
('explor', 1)
('explor', 1)
('explor', 1)
('explor', 1)
('explor', 1)
('unjust', 1)
('boundari', 1)
('trust', 1)
('trust', 1)
('trust', 1)
('trust', 1)
('trust', 1)
('uel', 1)
('ethic', 1)
('bill', 1)
('bill', 1)
('bill', 1)
('bill', 1)
('bill', 1)
('clinton', 1)
('clinton', 1)
('democrat', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('plant', 1)
('lmao', 1)
('lmao', 1)
('lmao', 1)
('lastword', 1)
('pierc', 1)
('pierc', 1)
('veil', 1)
('veil', 1)
('bracelet', 1)
('bracelet', 1)
('bracelet', 1)
('wristband', 1)
('instead', 1)
('instead', 1)
('instead', 1)
('promot', 1)
('promot', 1)
('fals', 1)
('fals', 1)
('fals', 1

('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('confirm', 1)
('union', 1)
('union', 1)
('aircraft', 1)
('aircraft', 1)
('aircraft', 1)
('aircraft', 1)
('increasingli', 1)
('confid', 1)
('confid', 1)
('analysi', 1)
('analysi', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('conclus', 1)
('rmt', 1)
('befor', 1)
('befor', 1)
('befor', 1)
('befor', 1)
('befor', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('delug', 1)
('

('mum', 1)
('mum', 1)
('cell', 1)
('recal', 1)
('recal', 1)
('recal', 1)
('recal', 1)
('recal', 1)
('recal', 1)
('cp', 1)
('cp', 1)
('corpor', 1)
('corpor', 1)
('wilder', 1)
('kodak', 1)
('graph', 1)
('pvp', 1)
('beacon', 1)
('bombard', 1)
('bombard', 1)
('abomin', 1)
('kindl', 1)
('kindl', 1)
('thriller', 1)
('thriller', 1)
('hobbit', 1)
('hobbit', 1)
('hobbit', 1)
('hobbit', 1)
('smaug', 1)
('smaug', 1)
('smaug', 1)
('smaug', 1)
('smaug', 1)
('disc', 1)
('copi', 1)
('despair', 1)
('cameo', 1)
('colbert', 1)
('amidst', 1)
('aftermath', 1)
('aftermath', 1)
('aftermath', 1)
('aftermath', 1)
('aftermath', 1)
('aftermath', 1)
('olympu', 1)
('interestud', 1)
('lamb', 1)
('sheeran', 1)
('emot', 1)
('emot', 1)
('emot', 1)
('emot', 1)
('emot', 1)
('emot', 1)
('addict', 1)
('addict', 1)
('awar', 1)
('thu', 1)
('thu', 1)
('thu', 1)
('thu', 1)
('thrust', 1)
('enslav', 1)
('enslav', 1)
('spell', 1)
('spell', 1)
('ronda', 1)
('overal', 1)
('english', 1)
('english', 1)
('english', 1)
('english', 1)

('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('dust', 1)
('fidget', 1)
('bullet', 1)
('bullet', 1)
('ricochet', 1)
('chase', 1)
('ghostprotocol', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('mad', 1)
('max', 1)
('max', 1)
('max', 1)
('max', 1)
('max', 1)
('combat', 1)
('combat', 1)
('combat', 1)
('combat', 1)
('pc', 1)
('pc', 1)
('pc', 1)
('pc', 1)
('pc', 1)
('susfu', 1)
('roll', 1)
('roll', 1)
('roll', 1)
('roll', 1)
('cptesco', 1)
('magnum', 1)
('askceep', 1)
('trailer', 1)
('trailer', 1)
('trailer', 1)
('trailer', 1)
('trailer', 1)
('photoset', 1)
('hookier', 1)
('mar', 1)
('mar', 1)
('mar', 1)
('pickathon', 1)
('sadli', 1)
('sadli', 1)
('disappear', 1)
('aussi', 1)
('aussi', 1)
('aussi', 1)
('sacramento', 1)
('settl', 1)
('settl', 1)
('prevail', 1)
('prevail', 1)
('pheonix', 1)
('nasa', 1)
(

('committe', 1)
('approv', 1)
('approv', 1)
('legisl', 1)
('administr', 1)
('administr', 1)
('busta', 1)
('splash', 1)
('splash', 1)
('denmark', 1)
('auction', 1)
('reliabl', 1)
('crematorium', 1)
('crematorium', 1)
('crematorium', 1)
('provok', 1)
('provok', 1)
('provok', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('famin', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('reuter', 1)
('intldevelop', 1)
('perish', 1)
('heroic', 1)
('heroic', 1)
('shame', 1)
('shame', 1)
('tragic', 1)
('tragic', 1)
('kyiv', 1)
('kiev', 1)
('ukrainian', 1)
('ukrainian', 1)
('ua', 1)
('exagger', 1)
('msm', 1)
('msm', 1)
('dwell', 1)
('confin', 1)
('egypt', 1)
('egypt', 1)
('egypt', 1)
('egypt', 1)
('miner', 1)
('miner', 1)
('miner', 1)
('miner', 1)
('botha', 1)
('apartheid', 1)
('sa', 1)
('sa', 1)
('sa', 1)
('sa', 1)
('tutu', 1)
('nobel', 1)
('prize

('pisgah', 1)
('acr', 1)
('acr', 1)
('acr', 1)
('brush', 1)
('krnv', 1)
('reno', 1)
('nv', 1)
('chopper', 1)
('mostli', 1)
('mostli', 1)
('slownewsday', 1)
('globalwarm', 1)
('spend', 1)
('spend', 1)
('spend', 1)
('spend', 1)
('spend', 1)
('spend', 1)
('agen', 1)
('cossack', 1)
('hussar', 1)
('oo', 1)
('riggd', 1)
('rough', 1)
('shelter', 1)
('shelter', 1)
('shelter', 1)
('glade', 1)
('kindlng', 1)
('glowng', 1)
('hollow', 1)
('frnch', 1)
('vineyard', 1)
('tbh', 1)
('lockdown', 1)
('stubborn', 1)
('provinc', 1)
('provinc', 1)
('provinc', 1)
('rz', 1)
('iranelect', 1)
('worth', 1)
('worth', 1)
('worth', 1)
('worth', 1)
('worth', 1)
('sharpli', 1)
('sharpli', 1)
('florida', 1)
('florida', 1)
('florida', 1)
('wildlif', 1)
('wildlif', 1)
('bitterroot', 1)
('mountaintop', 1)
('scent', 1)
('wildflow', 1)
('distant', 1)
('impend', 1)
('fieldworksmel', 1)
('q', 1)
('eleph', 1)
('consum', 1)
('consum', 1)
('consum', 1)
('clash', 1)
('clash', 1)
('clash', 1)
('clash', 1)
('rivercomplex', 1)
('ca

('nonononono', 1)
('blackhat', 1)
('tomislav', 1)
('tomislav', 1)
('salopek', 1)
('salopek', 1)
('most', 1)
('most', 1)
('most', 1)
('most', 1)
('aunft', 1)
('closet', 1)
('intrud', 1)
('ndtv', 1)
('exchang', 1)
('exchang', 1)
('exchang', 1)
('exchang', 1)
('exchang', 1)
('exchang', 1)
('exchang', 1)
('taxi', 1)
('taxi', 1)
('randomactsofrom', 1)
('sinai', 1)
('branch', 1)
('execut', 1)
('execut', 1)
('execut', 1)
('croatian', 1)
('croatian', 1)
('croatian', 1)
('croatian', 1)
('affili', 1)
('affili', 1)
('affili', 1)
('chaco', 1)
('crunchysens', 1)
('footwear', 1)
('discov', 1)
('discov', 1)
('discov', 1)
('antiviru', 1)
('vendor', 1)
('damballa', 1)
('infosec', 1)
('teamnyl', 1)
('shirt', 1)
('shirt', 1)
('shirt', 1)
('tx', 1)
('tx', 1)
('bidtim', 1)
('beta', 1)
('while', 1)
('while', 1)
('tobia', 1)
('hankel', 1)
('femal', 1)
('femal', 1)
('overpow', 1)
('overpow', 1)
('modifi', 1)
('modifi', 1)
('manori', 1)
('gnocchi', 1)
('thine', 1)
('thine', 1)
('rbpk', 1)
('snction', 1)
('milt

('admir', 1)
('nationwid', 1)
('shortchang', 1)
('booni', 1)
('usmc', 1)
('airsoft', 1)
('paintbal', 1)
('woodland', 1)
('texian', 1)
('iliad', 1)
('hardin', 1)
('stephen', 1)
('vol', 1)
('lulgzimbestpict', 1)
('lulgzimbestpict', 1)
('oiler', 1)
('gpw', 1)
('hottest', 1)
('hottest', 1)
('seat', 1)
('mindset', 1)
('negoti', 1)
('ford', 1)
('mutt', 1)
('phrobi', 1)
('iii', 1)
('bayonet', 1)
('bayonet', 1)
('sheath', 1)
('nr', 1)
('canadian', 1)
('rifl', 1)
('rifl', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('mudslid', 1)
('tajikistan', 1)
('tajik', 1)
('bakeofffriend', 1)
('gbbo', 1)
('gbbo', 1)
('gbbo', 1)
('gbbo', 1)
('burrito', 1)
('chew', 1)
('tyre', 1)
('dorrett', 1)
('blackforestgateau', 1)
('greatbritishbakeoff', 1)
('ineedcak', 1)
('toopain', 1)
('scream', 1)
('scream', 1)
('scream', 1)
('scream', 1)
('scream', 1)
('scream', 1)
('scream', 1)

('rescuer', 1)
('rescuer', 1)
('rescuer', 1)
('rescuer', 1)
('rescuer', 1)
('vulcan', 1)
('mediterran', 1)
('mediterran', 1)
('mediterran', 1)
('mediterran', 1)
('netflix', 1)
('netflix', 1)
('med', 1)
('carryi', 1)
('biker', 1)
('theonion', 1)
('garbag', 1)
('garbag', 1)
('garbag', 1)
('into', 1)
('into', 1)
('rsl', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('riot', 1)
('bateman', 1)
('score', 1)
('fran', 1)
('blowmandyup', 1)
('blowmandyup', 1)
('broad', 1)
('broad', 1)
('eight', 1)
('eight', 1)
('root', 1)
('root', 1)
('nanci', 1)
('feminist', 1)
('feminist', 1)
('feminist', 1)
('feminist', 1)
('msg', 1)
('msg', 1)
('pax', 1)
('modiministri', 1)
('role', 1)
('ulzzang', 1)
('hyung', 1)
('seok', 1)
('line

('plung', 1)
('plung', 1)
('plung', 1)
('plung', 1)
('weisberg', 1)
('gat', 1)
('kiddi', 1)
('stalin', 1)
('alley', 1)
('theocraci', 1)
('yoga', 1)
('compass', 1)
('mugger', 1)
('beef', 1)
('idgaf', 1)
('idgaf', 1)
('owe', 1)
('apolog', 1)
('remedyyyy', 1)
('jiwa', 1)
('yezidi', 1)
('slave', 1)
('quell', 1)
('introvert', 1)
('squar', 1)
('nerv', 1)
('westi', 1)
('belief', 1)
('teambrianmundi', 1)
('chile', 1)
('chile', 1)
('chile', 1)
('shore', 1)
('shore', 1)
('topstori', 1)
('santiago', 1)
('billionair', 1)
('motta', 1)
('duli', 1)
('involuntari', 1)
('winc', 1)
('holibob', 1)
('wozni', 1)
('circumst', 1)
('jsett', 1)
('ghost', 1)
('ghost', 1)
('divorc', 1)
('poverti', 1)
('mme', 1)
('austin', 1)
('marijuana', 1)
('marijuana', 1)
('ptsd', 1)
('ptsd', 1)
('ptsd', 1)
('hempoil', 1)
('hempoil', 1)
('cann', 1)
('sickmund', 1)
('ncjfcj', 1)
('byrn', 1)
('summit', 1)
('clevel', 1)
('unaddress', 1)
('therapi', 1)
('therapi', 1)
('newauthor', 1)
('maimonid', 1)
('pediatr', 1)
('onbeingwithkr

In [43]:
reducer_output = {}
for item in mapper_output:
  if item[0] not in reducer_output.keys():
    reducer_output[item[0]] = 0
  reducer_output[item[0]] += 1
for key, value in sorted(reducer_output.items()):
    print(key, ":", value)

a : 136
aapatwork : 1
aaron : 1
ab : 1
aba : 7
abandon : 7
abba : 1
abbog : 1
abbott : 1
abbswinston : 3
abc : 12
abcnew : 1
abdul : 1
abe : 1
abella : 1
abh : 1
abid : 1
abil : 1
abl : 2
ablaz : 12
abnorm : 1
abolish : 1
abomin : 1
aboooooortiooooonnnnn : 1
aborigin : 1
abort : 2
abound : 1
about : 8
abrianna : 1
absol : 1
absolut : 14
abstorm : 4
abt : 1
abus : 5
academi : 1
acapella : 1
accept : 2
access : 3
accid : 36
accident : 1
accomplish : 1
accord : 4
account : 12
accur : 3
accus : 4
acdelco : 1
ace : 1
achiev : 2
achimota : 1
acid : 4
acoust : 1
acquisit : 3
acr : 3
acreativedc : 1
across : 7
act : 9
action : 7
activ : 10
activist : 1
actu : 1
actual : 16
ad : 8
adam : 1
adani : 1
adapt : 1
adariu : 1
add : 3
addict : 2
address : 2
adjust : 2
admin : 2
administ : 1
administr : 2
admir : 2
admit : 4
adult : 4
adultress : 1
advanc : 2
advancedwarfar : 1
advantag : 1
adventur : 1
advic : 2
advisori : 4
aesthet : 1
af : 3
affect : 19
affili : 3
affluent : 1
afford : 4
afghan : 1


brave : 1
bravo : 1
break : 28
breakdown : 1
breaker : 1
breakingnew : 5
breath : 4
breather : 1
breez : 1
breh : 1
brelsford : 1
brendan : 1
brevoort : 1
brew : 2
brian : 3
bribe : 1
bricktop : 1
bride : 2
bridg : 18
brief : 3
brigad : 2
brighton : 1
brine : 1
bring : 17
britain : 2
british : 3
britney : 2
briton : 1
brittani : 1
bro : 6
broad : 2
broadcast : 1
broadwat : 1
broadway : 3
broke : 6
brokelynati : 1
broken : 9
brook : 1
brooklyn : 8
broom : 1
brooo : 1
broth : 1
brother : 12
brought : 5
brown : 14
browni : 1
browser : 6
bruce : 1
bruh : 3
brush : 1
brutal : 3
bsc : 1
bstrd : 1
btm : 1
btsprep : 1
btw : 2
bu : 13
buck : 1
bucket : 1
buckl : 1
bud : 1
buddi : 2
budget : 5
buffer : 1
buffett : 1
bug : 4
buhari : 1
build : 64
builder : 1
buildingmuseum : 1
buildup : 2
built : 4
bull : 1
bullet : 2
bulletin : 2
bulli : 1
bum : 1
bunch : 2
bundl : 1
bunk : 1
bunker : 1
bureaucrat : 1
burger : 1
burglari : 2
buri : 4
burlington : 1
burn : 75
burna : 1
burner : 1
burnin : 1
burnt

diff : 2
differ : 6
dig : 2
digit : 6
dijk : 3
dijon : 1
dilapit : 1
dilemma : 1
dime : 1
din : 1
dinah : 1
dinner : 3
dinosaur : 1
dire : 2
direct : 7
direction : 5
directli : 2
director : 4
dirk : 1
dirt : 1
dirtyli : 1
disabl : 2
disadvantag : 1
disappear : 1
disappoint : 1
disast : 58
disc : 1
disclo : 1
disco : 3
disconnect : 1
discov : 3
discoveri : 1
discu : 3
disea : 6
diseas : 4
disgust : 3
dishonest : 1
dislik : 1
dismay : 2
disney : 5
displac : 15
display : 1
displeas : 1
dispos : 2
disposit : 1
disregard : 1
disrespect : 1
disrupt : 1
distanc : 1
distant : 1
distinct : 3
distress : 1
district : 1
disturb : 2
diva : 2
dive : 7
diversifi : 1
divest : 1
divid : 1
divorc : 1
dixi : 1
dixon : 1
dj : 2
dk : 3
dkarma : 1
dkhqgv : 1
dl : 1
dlc : 1
dll : 1
dm : 1
dnb : 3
dnr : 1
dnt : 1
do : 23
doc : 1
dock : 1
doctor : 2
document : 4
doczon : 1
dodg : 2
doe : 8
doer : 1
doesn : 1
dog : 10
dogbit : 1
dogsarebetterthancat : 1
doin : 1
doinghashtagsright : 1
dolakha : 1
dolc : 1
doll 

gabriel : 1
gadget : 3
gaelit : 1
gag : 1
gaga : 1
gah : 1
gain : 4
gal : 1
galact : 10
gallipoli : 2
gallup : 1
gambit : 1
gambl : 1
game : 28
gamechang : 1
gameofthron : 1
gameplay : 2
gamescom : 2
gandhi : 1
gang : 4
ganwilson : 1
garag : 2
garbag : 3
garcia : 1
garden : 3
garfield : 2
gasolin : 1
gat : 1
gate : 3
gatensburi : 1
gateway : 1
gaug : 1
gave : 4
gawker : 1
gawlowski : 1
gay : 8
gaymaro : 1
gayuk : 1
gaza : 3
gazan : 1
gaze : 1
gbbo : 4
ge : 1
gear : 4
gel : 1
geller : 6
gem : 11
gender : 1
gener : 6
geni : 1
geniu : 1
geno : 2
genocid : 5
gent : 1
genuin : 3
geoengin : 1
geolog : 1
georg : 1
georgegallagh : 1
georgia : 3
georgian : 1
germ : 8
german : 2
germani : 2
get : 141
getaway : 1
gether : 1
getin : 1
getitbeforeitsgon : 1
gg : 1
gh : 1
ghanem : 1
ghee : 1
ghetto : 1
ghost : 2
ghostprotocol : 1
giant : 10
gibraltar : 1
gif : 1
gift : 2
gig : 1
gigatech : 1
gilgit : 1
gim : 2
girl : 20
gist : 1
giuliani : 1
give : 26
giveaway : 4
given : 3
gl : 1
glacier : 1
glad :

jacket : 2
jackson : 5
jacksonvil : 1
jacobin : 1
jah : 1
jail : 3
jakartapost : 1
jake : 1
jakey : 1
jalapeno : 1
jam : 1
jamal : 1
jame : 3
jamesbond : 1
jammu : 1
jan : 1
jane : 1
januzaj : 1
jap : 1
japan : 25
japanes : 5
jar : 2
jaron : 1
jay : 1
jayelectronica : 1
jaysen : 1
jd : 1
jean : 4
jeep : 3
jeezu : 1
jeff : 7
jelli : 1
jennif : 1
jerri : 1
jersey : 2
jerusalem : 2
jerusalempost : 1
jessica : 1
jesu : 3
jet : 2
jew : 1
jewel : 3
jewelri : 1
jfb : 1
jfc : 1
jfk : 1
jgf : 1
jgisavq : 1
jihad : 1
jihadist : 1
jim : 2
jima : 1
jimmyfallon : 1
jiwa : 1
jk : 1
joann : 1
job : 9
jodorowski : 1
joe : 6
joel : 2
joelheyman : 1
john : 7
johnmtaita : 1
johnni : 1
johnson : 1
join : 10
jojowizphilipp : 1
jokal : 1
joke : 3
jokethey : 1
jon : 1
jonathan : 5
jone : 1
jordan : 2
jordyn : 1
joseph : 1
jou : 1
journal : 4
journalist : 3
jp : 2
jpii : 1
jplocalfirst : 1
jpmorgan : 1
jsett : 1
jst : 2
jsyk : 1
ju : 2
judg : 5
juli : 9
jump : 3
junction : 1
june : 2
junk : 1
juri : 1
just : 

mood : 1
moon : 4
moor : 1
mop : 2
more : 34
moreland : 1
morgan : 2
moriya : 1
morn : 12
morocco : 1
mortal : 1
mortalkombat : 1
mortalkombatx : 1
mortem : 1
mortgag : 1
mosqu : 19
mosquito : 2
most : 4
mostli : 2
motel : 1
moth : 1
mother : 9
mothernaturenetwork : 1
motor : 1
motorcycl : 1
motorcyclist : 3
motorway : 1
motta : 1
mouna : 1
mount : 7
mountain : 9
mountaintop : 1
mourn : 4
mous : 1
mouth : 3
move : 16
movement : 2
movi : 14
mp : 10
mpd : 1
mph : 6
mr : 2
mre : 2
mri : 1
msf : 1
msg : 2
msha : 1
msm : 2
msnbc : 1
msv : 1
mt : 2
mtech : 1
mtvhottest : 6
mtvsummerstar : 1
much : 22
muckrock : 1
mud : 1
mudslid : 13
mugger : 1
muh : 1
muhammad : 1
muhammadu : 1
mull : 1
multi : 1
multidim : 1
multidimen : 1
multidimensi : 1
multidimensio : 1
multidimensiona : 1
multimillion : 1
multipl : 8
multiplay : 2
multitudin : 1
mum : 2
mumbai : 1
mump : 1
mundo : 1
municip : 4
murder : 39
museum : 1
music : 12
musician : 2
musim : 1
muslim : 15
must : 17
mustang : 2
muster : 1
mustre

pope : 2
popobawa : 1
popul : 5
popular : 4
porn : 2
port : 3
portion : 3
portland : 5
portmoodi : 1
portug : 1
pose : 1
posit : 3
possibl : 13
possum : 1
post : 43
postcard : 1
poster : 5
pot : 1
potato : 1
potenti : 4
potu : 4
pound : 1
pour : 2
pov : 1
poverti : 1
power : 21
powerlin : 1
pp : 2
ppe : 1
ppl : 8
pqhaxp : 1
pqphistoryweekend : 1
praam : 1
prabhu : 1
practic : 1
prada : 1
pradesh : 5
prairi : 1
prater : 1
pray : 14
prayer : 3
pre : 7
preach : 1
prebreak : 11
preciou : 1
predict : 5
pree : 1
pref : 1
prefer : 3
pregnant : 3
prematur : 3
prepar : 7
prepared : 1
prepper : 2
prescient : 1
preseason : 3
present : 6
preserv : 2
presid : 4
presqu : 1
press : 11
presstitut : 1
pressur : 1
pretend : 3
pretti : 13
prevail : 2
prevent : 4
preview : 1
previou : 1
previouslyondoyintv : 1
price : 4
pride : 1
primarili : 2
prime : 1
princ : 1
princess : 2
print : 5
prior : 1
prioriti : 1
prison : 2
privat : 2
privatis : 1
privileg : 1
privleg : 1
prize : 1
prkng : 1
pro : 2
prob : 2
p

shh : 1
shhhh : 1
shi : 1
shield : 1
shift : 12
shin : 1
shine : 2
shinichi : 1
shinto : 1
ship : 7
shipper : 1
shira : 2
shironu : 1
shirt : 3
shirtless : 1
shit : 24
shitti : 1
shld : 2
shock : 7
shoe : 3
shoot : 15
shooter : 4
shootn : 1
shop : 4
shore : 2
shorelin : 2
short : 9
shortag : 1
shortchang : 1
shortcut : 1
shortest : 1
shot : 11
should : 6
shoulder : 13
shout : 4
show : 37
showcas : 3
shower : 4
showgirldayoff : 1
shown : 1
shred : 2
shreveport : 1
shrnnclbautista : 1
shtfplan : 1
shudder : 1
shuffl : 3
shush : 1
shut : 4
shuttl : 1
si : 1
sicili : 1
sick : 4
sicklif : 1
sickmund : 1
side : 12
sidearm : 1
sidelin : 3
sidewalk : 1
sieg : 1
sierra : 1
sigalert : 1
sight : 3
sign : 19
signific : 1
sikh : 1
silenc : 2
silent : 4
silli : 1
silver : 6
silverado : 1
similar : 4
simpl : 2
simpli : 3
simul : 2
sin : 4
sinai : 1
sinc : 18
sincereti : 1
sing : 7
singapor : 1
singer : 1
singl : 9
sinjar : 4
sink : 12
sinkhol : 11
sir : 5
siren : 27
sisk : 1
sismo : 3
sissi : 1
siste

tisa : 1
titan : 2
titl : 1
tix : 2
tksg : 1
tl : 1
tlot : 1
tltltltltlttlt : 1
tmh : 1
to : 67
tobacco : 1
tobia : 1
today : 47
toddler : 4
toe : 1
togeth : 6
toke : 1
tokyo : 2
told : 9
toler : 1
toll : 5
tom : 1
tomb : 1
tomislav : 2
tomlandri : 1
tomlinson : 1
tommygshow : 1
tomorrow : 11
ton : 2
tone : 2
tonedidit : 1
tongu : 3
toni : 2
tonight : 19
tonka : 1
tonto : 11
too : 6
took : 9
tool : 2
toopain : 1
toothpast : 1
toothsom : 1
top : 26
topix : 1
topstori : 1
torch : 2
tore : 1
tori : 3
torn : 1
tornado : 11
tornadogiveaway : 2
toronto : 2
torpedo : 1
tortur : 2
toshikazukatayama : 1
toss : 1
total : 12
tote : 5
touch : 3
tough : 2
tour : 1
tourniquet : 1
toward : 4
towel : 1
tower : 2
towlin : 1
town : 7
townhous : 1
township : 1
toxic : 2
toy : 3
tpp : 1
tr : 2
trace : 3
track : 8
traction : 2
tractor : 3
trade : 2
tradeinservicesagr : 1
traderi : 1
tradit : 2
traffic : 5
traffick : 1
tragedi : 17
tragic : 2
trail : 1
trailer : 5
train : 45
traine : 1
traintuesday : 1
trak

ypg : 1
yr : 21
yup : 1
yyc : 6
yychail : 1
yycstorm : 3
yyctraff : 1
yycweath : 1
z : 3
zack : 1
zayn : 2
zealand : 2
zero : 5
zeroedg : 1
zerohedg : 1
zion : 1
zionist : 3
zipper : 1
zirngast : 1
zix : 1
zombi : 5
zone : 14
zoui : 1
zouma : 3
zuma : 1
zx : 1


In [44]:
'''
word_count = {}
for i in range(len(stemmed)):
  for token in stemmed[i].split():
    if token not in word_count.keys():
      word_count[token] = 0
    word_count[token] += 1
for key, value in sorted(word_count.items()):
    print(key, ":", value)
'''
word_count = {}
for key in hash_table.keys():
  word_count[key] = len(hash_table[key])
for key, value in sorted(word_count.items()):
    print(key, ":", value)

a : 136
aapatwork : 1
aaron : 1
ab : 1
aba : 7
abandon : 7
abba : 1
abbog : 1
abbott : 1
abbswinston : 3
abc : 12
abcnew : 1
abdul : 1
abe : 1
abella : 1
abh : 1
abid : 1
abil : 1
abl : 2
ablaz : 12
abnorm : 1
abolish : 1
abomin : 1
aboooooortiooooonnnnn : 1
aborigin : 1
abort : 2
abound : 1
about : 8
abrianna : 1
absol : 1
absolut : 14
abstorm : 4
abt : 1
abus : 5
academi : 1
acapella : 1
accept : 2
access : 3
accid : 36
accident : 1
accomplish : 1
accord : 4
account : 12
accur : 3
accus : 4
acdelco : 1
ace : 1
achiev : 2
achimota : 1
acid : 4
acoust : 1
acquisit : 3
acr : 3
acreativedc : 1
across : 7
act : 9
action : 7
activ : 10
activist : 1
actu : 1
actual : 16
ad : 8
adam : 1
adani : 1
adapt : 1
adariu : 1
add : 3
addict : 2
address : 2
adjust : 2
admin : 2
administ : 1
administr : 2
admir : 2
admit : 4
adult : 4
adultress : 1
advanc : 2
advancedwarfar : 1
advantag : 1
adventur : 1
advic : 2
advisori : 4
aesthet : 1
af : 3
affect : 19
affili : 3
affluent : 1
afford : 4
afghan : 1


bowel : 1
bowl : 3
bowman : 1
box : 1
boy : 25
boyfriend : 3
boyxboy : 1
bp : 1
bracelet : 3
bracket : 1
bradley : 1
brain : 9
braininjuri : 2
brake : 6
branch : 1
brand : 1
brandac : 1
brass : 1
brassier : 1
brat : 1
brave : 1
bravo : 1
break : 28
breakdown : 1
breaker : 1
breakingnew : 5
breath : 4
breather : 1
breez : 1
breh : 1
brelsford : 1
brendan : 1
brevoort : 1
brew : 2
brian : 3
bribe : 1
bricktop : 1
bride : 2
bridg : 18
brief : 3
brigad : 2
brighton : 1
brine : 1
bring : 17
britain : 2
british : 3
britney : 2
briton : 1
brittani : 1
bro : 6
broad : 2
broadcast : 1
broadwat : 1
broadway : 3
broke : 6
brokelynati : 1
broken : 9
brook : 1
brooklyn : 8
broom : 1
brooo : 1
broth : 1
brother : 12
brought : 5
brown : 14
browni : 1
browser : 6
bruce : 1
bruh : 3
brush : 1
brutal : 3
bsc : 1
bstrd : 1
btm : 1
btsprep : 1
btw : 2
bu : 13
buck : 1
bucket : 1
buckl : 1
bud : 1
buddi : 2
budget : 5
buffer : 1
buffett : 1
bug : 4
buhari : 1
build : 64
builder : 1
buildingmuseum : 1
build

destini : 5
destroy : 43
destruct : 17
detail : 7
detect : 1
detectado : 1
detector : 1
determin : 4
deton : 47
detona : 1
detroit : 2
deutsch : 1
devalu : 1
devast : 36
develop : 5
devic : 4
devil : 2
devji : 1
devot : 2
dgm : 1
dhhj : 1
dhsscitech : 1
di : 5
diabet : 1
diablo : 1
dial : 2
diamond : 3
dian : 2
dick : 4
did : 2
didn : 3
didnt : 4
die : 32
diebold : 1
diesel : 1
diet : 1
dieva : 1
diff : 2
differ : 6
dig : 2
digit : 6
dijk : 3
dijon : 1
dilapit : 1
dilemma : 1
dime : 1
din : 1
dinah : 1
dinner : 3
dinosaur : 1
dire : 2
direct : 7
direction : 5
directli : 2
director : 4
dirk : 1
dirt : 1
dirtyli : 1
disabl : 2
disadvantag : 1
disappear : 1
disappoint : 1
disast : 58
disc : 1
disclo : 1
disco : 3
disconnect : 1
discov : 3
discoveri : 1
discu : 3
disea : 6
diseas : 4
disgust : 3
dishonest : 1
dislik : 1
dismay : 2
disney : 5
displac : 15
display : 1
displeas : 1
dispos : 2
disposit : 1
disregard : 1
disrespect : 1
disrupt : 1
distanc : 1
distant : 1
distinct : 3
distress :

friend : 10
friendli : 1
friendship : 2
fring : 1
frnch : 1
from : 21
fromthedesk : 1
fromthefield : 1
front : 8
frontlin : 3
froze : 1
frozen : 1
frugal : 1
fruit : 3
ft : 7
fte : 2
fu : 1
fucboi : 1
fuck : 35
fuckin : 4
fuckkkkkk : 1
fuel : 5
fugazzi : 1
fukushima : 9
fulfil : 2
full : 49
fume : 1
fun : 11
funch : 1
function : 1
fund : 2
fundrais : 1
funfact : 1
funni : 6
funniest : 1
funtenna : 9
furi : 1
furnitur : 2
fusionfestiv : 1
futur : 7
futurist : 2
fy : 1
g : 6
ga : 2
gabon : 3
gabriel : 1
gadget : 3
gaelit : 1
gag : 1
gaga : 1
gah : 1
gain : 4
gal : 1
galact : 10
gallipoli : 2
gallup : 1
gambit : 1
gambl : 1
game : 28
gamechang : 1
gameofthron : 1
gameplay : 2
gamescom : 2
gandhi : 1
gang : 4
ganwilson : 1
garag : 2
garbag : 3
garcia : 1
garden : 3
garfield : 2
gasolin : 1
gat : 1
gate : 3
gatensburi : 1
gateway : 1
gaug : 1
gave : 4
gawker : 1
gawlowski : 1
gay : 8
gaymaro : 1
gayuk : 1
gaza : 3
gazan : 1
gaze : 1
gbbo : 4
ge : 1
gear : 4
gel : 1
geller : 6
gem : 11
gende

involuntari : 1
involv : 8
ion : 1
ipad : 2
iphon : 1
iplay : 1
ipok : 1
ir : 4
ira : 1
iran : 12
irand : 2
iranelect : 1
iranian : 1
iraq : 2
iraqi : 2
iri : 3
irin : 2
iron : 2
irongi : 1
irrelev : 2
irretriev : 1
irvin : 1
irwin : 1
is : 55
isa : 1
isabel : 1
isi : 10
isil : 2
isl : 1
isla : 1
islam : 15
island : 18
isn : 2
isnt : 1
iso : 1
isol : 1
isra : 12
israel : 19
israelnew : 1
issaquah : 1
issit : 1
issu : 23
it : 77
itali : 3
italian : 6
ite : 1
item : 4
ithat : 1
itinerari : 1
iturasya : 1
iu : 1
iverson : 1
ivoic : 1
iwo : 1
iwouldntgetelectedbecaus : 1
j : 6
ja : 2
jack : 1
jacket : 2
jackson : 5
jacksonvil : 1
jacobin : 1
jah : 1
jail : 3
jakartapost : 1
jake : 1
jakey : 1
jalapeno : 1
jam : 1
jamal : 1
jame : 3
jamesbond : 1
jammu : 1
jan : 1
jane : 1
januzaj : 1
jap : 1
japan : 25
japanes : 5
jar : 2
jaron : 1
jay : 1
jayelectronica : 1
jaysen : 1
jd : 1
jean : 4
jeep : 3
jeezu : 1
jeff : 7
jelli : 1
jennif : 1
jerri : 1
jersey : 2
jerusalem : 2
jerusalempost : 1
jess

molten : 2
mom : 10
moment : 10
momentsathil : 1
momentum : 1
mommi : 2
momtherbot : 1
mon : 1
mona : 1
monday : 1
money : 11
monitor : 2
monkey : 1
monogram : 2
monsoon : 2
monster : 4
mont : 1
montani : 1
montano : 1
monterrey : 1
month : 12
monument : 4
mood : 1
moon : 4
moor : 1
mop : 2
more : 34
moreland : 1
morgan : 2
moriya : 1
morn : 12
morocco : 1
mortal : 1
mortalkombat : 1
mortalkombatx : 1
mortem : 1
mortgag : 1
mosqu : 19
mosquito : 2
most : 4
mostli : 2
motel : 1
moth : 1
mother : 9
mothernaturenetwork : 1
motor : 1
motorcycl : 1
motorcyclist : 3
motorway : 1
motta : 1
mouna : 1
mount : 7
mountain : 9
mountaintop : 1
mourn : 4
mous : 1
mouth : 3
move : 16
movement : 2
movi : 14
mp : 10
mpd : 1
mph : 6
mr : 2
mre : 2
mri : 1
msf : 1
msg : 2
msha : 1
msm : 2
msnbc : 1
msv : 1
mt : 2
mtech : 1
mtvhottest : 6
mtvsummerstar : 1
much : 22
muckrock : 1
mud : 1
mudslid : 13
mugger : 1
muh : 1
muhammad : 1
muhammadu : 1
mull : 1
multi : 1
multidim : 1
multidimen : 1
multidimensi :

podcast : 2
poetri : 3
point : 5
pointless : 1
poison : 2
pok : 1
pokemon : 1
polaroid : 1
pole : 3
polic : 54
policeman : 2
polici : 8
polit : 8
politician : 4
politifiact : 1
poll : 2
pollut : 1
pompey : 1
pond : 4
pont : 2
pool : 7
pooper : 1
poor : 3
pop : 5
popcorn : 1
pope : 2
popobawa : 1
popul : 5
popular : 4
porn : 2
port : 3
portion : 3
portland : 5
portmoodi : 1
portug : 1
pose : 1
posit : 3
possibl : 13
possum : 1
post : 43
postcard : 1
poster : 5
pot : 1
potato : 1
potenti : 4
potu : 4
pound : 1
pour : 2
pov : 1
poverti : 1
power : 21
powerlin : 1
pp : 2
ppe : 1
ppl : 8
pqhaxp : 1
pqphistoryweekend : 1
praam : 1
prabhu : 1
practic : 1
prada : 1
pradesh : 5
prairi : 1
prater : 1
pray : 14
prayer : 3
pre : 7
preach : 1
prebreak : 11
preciou : 1
predict : 5
pree : 1
pref : 1
prefer : 3
pregnant : 3
prematur : 3
prepar : 7
prepared : 1
prepper : 2
prescient : 1
preseason : 3
present : 6
preserv : 2
presid : 4
presqu : 1
press : 11
presstitut : 1
pressur : 1
pretend : 3
pretti 

shannon : 1
shape : 12
share : 9
sharecrop : 1
shark : 4
sharpli : 2
shastadam : 1
shawn : 1
shawne : 1
she : 10
sheath : 1
shed : 3
sheer : 1
sheeran : 1
sheet : 1
shelbi : 1
shell : 6
shelli : 3
shellno : 1
shelter : 3
shh : 1
shhhh : 1
shi : 1
shield : 1
shift : 12
shin : 1
shine : 2
shinichi : 1
shinto : 1
ship : 7
shipper : 1
shira : 2
shironu : 1
shirt : 3
shirtless : 1
shit : 24
shitti : 1
shld : 2
shock : 7
shoe : 3
shoot : 15
shooter : 4
shootn : 1
shop : 4
shore : 2
shorelin : 2
short : 9
shortag : 1
shortchang : 1
shortcut : 1
shortest : 1
shot : 11
should : 6
shoulder : 13
shout : 4
show : 37
showcas : 3
shower : 4
showgirldayoff : 1
shown : 1
shred : 2
shreveport : 1
shrnnclbautista : 1
shtfplan : 1
shudder : 1
shuffl : 3
shush : 1
shut : 4
shuttl : 1
si : 1
sicili : 1
sick : 4
sicklif : 1
sickmund : 1
side : 12
sidearm : 1
sidelin : 3
sidewalk : 1
sieg : 1
sierra : 1
sigalert : 1
sight : 3
sign : 19
signific : 1
sikh : 1
silenc : 2
silent : 4
silli : 1
silver : 6
silverad

time : 71
timedo : 1
timelaps : 1
timelin : 1
timesnewsdesk : 1
timzak : 1
tinder : 1
tini : 3
tink : 1
tip : 4
tire : 5
tisa : 1
titan : 2
titl : 1
tix : 2
tksg : 1
tl : 1
tlot : 1
tltltltltlttlt : 1
tmh : 1
to : 67
tobacco : 1
tobia : 1
today : 47
toddler : 4
toe : 1
togeth : 6
toke : 1
tokyo : 2
told : 9
toler : 1
toll : 5
tom : 1
tomb : 1
tomislav : 2
tomlandri : 1
tomlinson : 1
tommygshow : 1
tomorrow : 11
ton : 2
tone : 2
tonedidit : 1
tongu : 3
toni : 2
tonight : 19
tonka : 1
tonto : 11
too : 6
took : 9
tool : 2
toopain : 1
toothpast : 1
toothsom : 1
top : 26
topix : 1
topstori : 1
torch : 2
tore : 1
tori : 3
torn : 1
tornado : 11
tornadogiveaway : 2
toronto : 2
torpedo : 1
tortur : 2
toshikazukatayama : 1
toss : 1
total : 12
tote : 5
touch : 3
tough : 2
tour : 1
tourniquet : 1
toward : 4
towel : 1
tower : 2
towlin : 1
town : 7
townhous : 1
township : 1
toxic : 2
toy : 3
tpp : 1
tr : 2
trace : 3
track : 8
traction : 2
tractor : 3
trade : 2
tradeinservicesagr : 1
traderi : 1
trad

yourself : 1
youth : 3
youtub : 5
ypg : 1
yr : 21
yup : 1
yyc : 6
yychail : 1
yycstorm : 3
yyctraff : 1
yycweath : 1
z : 3
zack : 1
zayn : 2
zealand : 2
zero : 5
zeroedg : 1
zerohedg : 1
zion : 1
zionist : 3
zipper : 1
zirngast : 1
zix : 1
zombi : 5
zone : 14
zoui : 1
zouma : 3
zuma : 1
zx : 1
