In [1]:
import sys

# uncomment any library you need to install and run the cell

#!{sys.executable} -m pip install numpy
#!{sys.executable} -m pip install matplotlib
#!{sys.executable} -m pip install sklearn
#!{sys.executable} -m pip install emoji
#!{sys.executable} -m pip install spacy
#!{sys.executable} -m pip install gensim

In [2]:
import os, re, gzip, json, pickle, shutil, random, joblib, csv, random

import spacy

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

import numpy as np
import matplotlib.pyplot as plt

from emoji import UNICODE_EMOJI

%matplotlib inline

In [3]:
#!{sys.executable} -m spacy download en_core_web_sm

In [None]:
#
# Test module based off dev work
#

In [4]:
sys.path.append('../app')
from config import Config
from preprocessing import Preprocessing

In [5]:
config = Config()

In [6]:
preprocessor = Preprocessing(config)

In [7]:
preprocessor.extract_type_and_text()

Extracting raw troll tweets...
Extracted 2116866 troll tweets (badtabs: 0, badlines: 1, skipped nonenglish: 829340)
Extracting raw user tweets...
0
1000000
2000000
3000000
4000000
5000000
0
1000000
2000000
3000000
bad tabs: 217985, bad lines: 223
Extracted 8783464 normal user tweets (badtabs: 217985, badlines: 223)


In [7]:
preprocessor.clean_tweets()

Saving 2116866 cleaned troll tweets to ../data/troll_tweets_clean.pkl.gz
0
1000000
2000000
3000000
4000000
5000000
6000000
7000000
8000000
Saving 8783464 cleaned user tweets to ../data/user_tweets_clean.pkl.gz


In [8]:
with gzip.open(config.troll_tweet_clean_path, 'rb') as fz:
    troll_tweets = pickle.load(fz)
    
with gzip.open(config.user_tweet_clean_path, 'rb') as fz:
    user_tweets = pickle.load(fz)

In [10]:
#truncate and store smaller set
random.shuffle(troll_tweets)
config.troll_tweet_clean_path = '../data/troll_tweets_clean_small.pkl.gz'
with gzip.open(config.troll_tweet_clean_path, 'wb') as oz:
    pickle.dump(troll_tweets[:100000], oz)
        
random.shuffle(user_tweets)
config.user_tweet_clean_path = '../data/user_tweets_clean_small.pkl.gz'
with gzip.open(config.user_tweet_clean_path, 'wb') as oz:
    pickle.dump(user_tweets[:100000], oz)

In [11]:
preprocessor = Preprocessing(config)

In [12]:
preprocessor.tokenize_tweets()

Storing troll tweets tagged so far: 0 commercial	<LINK> So, did you crush your workout? <USER> @_kaskp <USER> <USER> <USER> <USER> <USER>	<LINK> So , did you crush your workout ? <USER> @_kaskp <USER> <USER> <USER> <USER> <USER>	<LINK> so , do you crush your workout ? <USER> @_kaskp <USER> <USER> <USER> <USER> <USER>	TAG RB , VBD PRP VB PRP$ NN . TAG NNP TAG TAG TAG TAG TAG	your_workout	
Storing complete 100000 tagged troll tweets
Storing user tweets tagged so far: 0 NormalUser	Congrats to Toby Warrior Shaye G. of New York, NY, the winner of the Toby Keith Norway Flyaway Sweepstakes! - <LINK>	Congrats to Toby Warrior Shaye G. of New York , NY , the winner of the Toby Keith Norway Flyaway Sweepstakes ! - <LINK>	congrat to Toby Warrior Shaye G. of New York , NY , the winner of the Toby Keith Norway Flyaway sweepstake ! - <LINK>	NNS IN NNP NNP NNP NNP IN NNP NNP , NNP , DT NN IN DT NNP NNP NNP NNP NNS . : TAG	Toby_Warrior_Shaye_G. New_York the_winner the_Toby_Keith_Norway_Flyaway_Sweepsta

In [None]:
#
# Dev work
#

In [18]:
data_path = 'data'
troll_tweet_path = '%s/russian-troll-tweets' % data_path
norm_tweet_path = '%s/twitter_cikm_2010' % data_path

#extracted text
troll_tweet_texts_path = '%s/troll_tweets.pkl.gz' % data_path
user_tweet_texts_path = '%s/user_tweets.pkl.gz' % data_path

spacy_nlp = spacy.load('en_core_web_sm')

In [35]:
def get_tweets(troll_tweet_path, norm_tweet_path):
    troll_tweets = get_troll_tweets(troll_tweet_path)
    
    #training_set_tweets.txt, training_set_users.txt, test_set_tweets.txt, test_set_users.txt
    #15861944	5766883977	@techiepalar Please prove to me that you are a real human by following me based on this tweet? I quit TrueTwitValidation & won't use it.	2009-11-16 09:04:01
    norm_tweets = get_norm_tweets('%s/training_set_tweets.txt' % norm_tweet_path)
    print('%d norm tweets from training file' % len(norm_tweets))
    norm_tweets.extend(get_norm_tweets('%s/test_set_tweets.txt' % norm_tweet_path))
    print('%d total norm tweets' % len(norm_tweets))
    
    return troll_tweets, norm_tweets


def get_troll_tweets(troll_tweet_path):
    troll_tweets = []
    badtab=0
    badline=0
    nonenglish=0
    for fn in os.listdir(troll_tweet_path):
        if not fn.endswith('.csv'):
            continue
        with open('%s/%s' % (troll_tweet_path, fn), 'r', encoding='utf-8', newline='\n') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            header = next(reader)
            #external_author_id, author, content, region, language, publish_date, arvested_date, following,
            #followers, updates, post_type, account_type, retweet, account_category, new_june_2018, alt_external_id,
            # tweet_id, article_url, tco1_step1, tco2_step1, tco3_step1
            for fields in reader:
                if not len(fields)==21:
                    badtab+=1
                    continue
                    
                if not fields[4].lower().strip()=='english':
                    nonenglish+=1
                    continue
                
                text = fields[2].strip()
                if not text:
                    badline+=1
                    continue
                    
                troll_type = fields[13].strip()
                troll_tweets.append('%s\t%s' % (troll_type, text))
                
        print(fn, len(troll_tweets), badtab, badline, nonenglish)
    return troll_tweets

            
def get_norm_tweets(path):
    norm_tweets = []
    badtab=0
    badline=0
    
    with open(path, 'r', encoding='utf-8') as f:
        for i,line in enumerate(f):
            if i%250000==0:
                print(i)
            if not line.count('\t')==3:
                badtab+=1
                continue
            line = line.replace('\n','').split('\t')[2].strip()
            if not line:
                badline+=1
                continue
            norm_tweets.append('NormalUser\t%s' % line)
            
    print('bad tabs: %d, bad lines: %d' % (badtab, badline))
    
    return norm_tweets

In [36]:
if not os.path.exists(troll_tweet_texts_path):
    troll_tweets, norm_tweets = get_tweets(troll_tweet_path, norm_tweet_path)
    
    with gzip.open('%s/troll_tweets.pkl.gz' % data_path, 'wb') as oz:
        pickle.dump(troll_tweets, oz)
        
    with gzip.open('%s/normal_tweets.pkl.gz' % data_path, 'wb') as oz:
        pickle.dump(norm_tweets, oz)
else:
    with gzip.open('%s/troll_tweets.pkl.gz' % data_path, 'rb') as fz:
        troll_tweets = pickle.load(fz)
        
    with gzip.open('%s/normal_tweets.pkl.gz' % data_path, 'rb') as fz:
        norm_tweets = pickle.load(fz)
        
len(troll_tweets), len(norm_tweets)

IRAhandle_tweets_1.csv 243891 0 0 53639
IRAhandle_tweets_10.csv 501002 0 0 136284
IRAhandle_tweets_11.csv 751623 0 0 192906
IRAhandle_tweets_12.csv 990973 0 0 256290
IRAhandle_tweets_13.csv 1011867 0 0 265581
IRAhandle_tweets_2.csv 1262387 0 0 319645
IRAhandle_tweets_3.csv 1505289 0 0 348110
IRAhandle_tweets_4.csv 1752083 0 0 403696
IRAhandle_tweets_5.csv 1969306 0 0 460357
IRAhandle_tweets_6.csv 2227356 0 1 507917
IRAhandle_tweets_7.csv 2469708 0 1 602085
IRAhandle_tweets_8.csv 2712666 0 1 721318
IRAhandle_tweets_9.csv 2946206 0 1 829340
0
250000
500000
750000
1000000
1250000
1500000
1750000
2000000
2250000
2500000
2750000
3000000
3250000
3500000
3750000
bad tabs: 170147, bad lines: 176
3675302 norm tweets from training file
0
250000
500000
750000
1000000
1250000
1500000
1750000
2000000
2250000
2500000
2750000
3000000
3250000
3500000
3750000
4000000
4250000
4500000
4750000
5000000
bad tabs: 47838, bad lines: 47
8783464 total norm tweets


(2946206, 8783464)

In [41]:
# preproc cleaning 
# - replace links with <LINK>, replace usertags with <USER>
# - normalize spaces, quotes, double quotes, etc
def clean_tweets(troll_tweets, norm_tweets):
    clean_troll_tweets = []
    for i,troll_tweet in enumerate(troll_tweets):
        clean_troll_tweets.append(clean_tweet(troll_tweet))
        
    clean_norm_tweets = []
    for i,norm_tweet in enumerate(norm_tweets):
        clean_norm_tweets.append(clean_tweet(norm_tweet))
    
    return clean_troll_tweets, clean_norm_tweets


def clean_tweet(tweet, repl={'‘':"'", '’':"'", '“':'"', '”':'"'}):
    #normalize spaces, quotes, double quotes, etc ‘’“”
    for f,r in repl.items():
        tweet = tweet.replace(f,r)
        
    #norm spaces
    tweet = re.sub('  +', ' ', tweet)
        
    #replace links with <LINK>, replace usertags with <USER>, leave hashtags
    tweet = re.sub('https?:[^ ]+', '<LINK>', tweet)
    
    tweet = re.sub('@[a-zA-Z][^ ]+', '<USER>', tweet)
    
    clean = []
    for char in tweet:
        if char in UNICODE_EMOJI['en']:
            clean.append('<EMOJI>')
        else:
            clean.append(char)
            
    return ''.join(clean)

In [42]:
clean_troll_tweets, clean_norm_tweets = clean_tweets(troll_tweets, norm_tweets)

In [43]:
with gzip.open('%s/troll_tweets_clean.pkl.gz' % data_path, 'wb') as oz:
    pickle.dump(clean_troll_tweets, oz)

with gzip.open('%s/normal_tweets_clean.pkl.gz' % data_path, 'wb') as oz:
    pickle.dump(clean_norm_tweets, oz)

In [40]:
for i,ctt in enumerate(clean_norm_tweets):
    if i>100:
        break
    print(ctt)

NormalUser	<USER> coo. thanks. just dropped you a line.
NormalUser	<USER> shit it ain't lettin me DM you back, what's your email?
NormalUser	<USER> hey cody, quick question...can you dm me?
NormalUser	<USER> dang. you need anything? I got some left over meds!
NormalUser	maybe i'm late in the game on this one, but this lowender vst is making my apt rumble!
NormalUser	i really hope A.I. makes the most of this second chance in philly. i'm glad he's goin home.
NormalUser	<USER> danny boy! wanna check out d-nice at the afex 1 year tonight? we could pre-game at mine and walk over.
NormalUser	...and if you have ppl that you care about, make sure to let them know. life is too short to lose friends over bullshit. peace and love.
NormalUser	...that shit weighs heavy on me. take responsibility for your life. I don't blame anyone for where I am in this world...
NormalUser	...including his last failed relationship. and while I know that none of it is grounded in reality, and I actually am worried a

In [26]:
special_tags = set(['<USER>', '<LINK>', '<EMOJI>'])

In [74]:
def tokenize_tweets(troll_tweets, user_tweets,
                    get_lemmas=False, get_pos=False, get_phrases=False, get_ents=False):
    spacy_nlp.tokenizer.add_special_case(f"<USER>", [{spacy.attrs.ORTH: f"<USER>"}])
    spacy_nlp.tokenizer.add_special_case(f"<LINK>", [{spacy.attrs.ORTH: f"<LINK>"}])
    spacy_nlp.tokenizer.add_special_case(f"<EMOJI>", [{spacy.attrs.ORTH: f"<EMOJI>"}])

    tagged_troll_tweets = []
    for i,troll_tweet in enumerate(troll_tweets):
        tweet_type, tweet_text = troll_tweet.split('\t')
        toks, lemmas, pos, phrases, ents = tokenize_text(tweet_text, get_lemmas, get_pos, get_phrases, get_ents)
        tagged_troll_tweets.append('%s\t%s\t%s\t%s\t%s\t%s\t%s' % (tweet_type, tweet_text, toks, lemmas, pos, phrases, ents))
        
        if i%100000==0:
            print(i, tagged_troll_tweets[-1])
            with gzip.open('%s/troll_tweets_tagged.pkl.gz' % data_path, 'wb') as oz:
                pickle.dump(tagged_troll_tweets, oz)
        
    tagged_user_tweets = []
    for i,user_tweet in enumerate(user_tweets):
        tweet_type, tweet_text = user_tweet.split('\t')
        toks, lemmas, pos, phrases, ents = tokenize_text(tweet_text, get_lemmas, get_pos, get_phrases, get_ents)
        tagged_user_tweets.append('%s\t%s\t%s\t%s\t%s\t%s\t%s' % (tweet_type, tweet_text, toks, lemmas, pos, phrases, ents))
        
        if i%100000==0:
            print(i, tagged_user_tweets[-1])
            with gzip.open('%s/user_tweets_tagged.pkl.gz' % data_path, 'wb') as oz:
                pickle.dump(tagged_user_tweets, oz)
    
    return tagged_troll_tweets, tagged_user_tweets


def tokenize_text(text, get_lemmas=True, get_pos=True, get_phrases=True, get_ents=True):
    toks = []
    lemmas = []
    pos = []
    phrases = []
    ents = []
    
    doc = spacy_nlp(text)
    if get_phrases:
        for chunk in doc.noun_chunks:
            if not ' ' in chunk.text:
                continue
            phrases.append(chunk.text.replace(' ', '_'))
        
    ent_type=''
    ent=[]
    for tok in doc:
        toks.append(tok.text)
        
        if tok.text in special_tags:
            if get_lemmas:
                lemmas.append(tok.text)
            if get_pos:
                pos.append('TAG')
            continue
        
        if get_ents:
            if tok.ent_iob == 3: #start
                ent_type = tok.ent_type_
                ent.append(tok.text)
            elif tok.ent_iob == 1: #continue
                ent.append(tok.text)
            else:
                if ent:
                    ents.append('%s:%s' % ('_'.join(ent), ent_type))
                    ent=[]
                    ent_type=''
                    
        if get_lemmas:
            lemmas.append(tok.lemma_)
            
        if get_pos:
            pos.append(tok.tag_)
        
    toks = ' '.join(toks)
    lemmas = ' '.join(lemmas)
    pos = ' '.join(pos)
    phrases = ' '.join(phrases)
    ents = ' '.join(ents)
    
    return toks, lemmas, pos, phrases, ents

In [7]:
with gzip.open('%s/troll_tweets_clean.pkl.gz' % data_path, 'rb') as fz:
    clean_troll_tweets = pickle.load(fz)

with gzip.open('%s/user_tweets_clean.pkl.gz' % data_path, 'rb') as fz:
    clean_user_tweets = pickle.load(fz)

In [None]:
get_lemmas=True
get_pos=True
get_phrases=True
get_ents=True

tagged_troll_tweets, tagged_user_tweets = tokenize_tweets(clean_troll_tweets, clean_user_tweets,
                                                          get_lemmas, get_pos, get_phrases, get_ents)

0 RightTroll	"We have a sitting Democrat US Senator on trial for corruption and you've barely heard a peep from the mainstream media." ~ <USER> <LINK>	" We have a sitting Democrat US Senator on trial for corruption and you 've barely heard a peep from the mainstream media . " ~ <USER> <LINK>	" we have a sit Democrat US Senator on trial for corruption and you 've barely hear a peep from the mainstream medium . " ~ <USER> <LINK>	`` PRP VBP DT VBG NNP NNP NNP IN NN IN NN CC PRP VBP RB VBN DT NN IN DT NN NNS . '' NFP TAG TAG	a_sitting_Democrat_US_Senator a_peep the_mainstream_media	Democrat_US:GPE
100000 RightTroll	1%ers defrauded People of their President in Primary. If wise they'll give We The People something if a #12thAmendmentElection doesn't occur <LINK>	1%ers defrauded People of their President in Primary . If wise they 'll give We The People something if a # 12thAmendmentElection does n't occur <LINK>	1%er defraud People of their President in Primary . if wise they 'll give we the 

In [None]:
with gzip.open('%s/troll_tweets_tagged.pkl.gz' % data_path, 'wb') as oz:
    pickle.dump(tagged_troll_tweets, oz)
    
with gzip.open('%s/user_tweets_tagged.pkl.gz' % data_path, 'wb') as oz:
    pickle.dump(tagged_user_tweets, oz)

In [None]:
#
# Data Analysis
#

In [15]:
# compare histograms
# - lens of troll vs normal tweets
# - user mentions, link mentions
# - given a dict, percent of words in tweet not in the dict

In [3]:
punct = set(["'", '"', '.', ',', '~', '!', '@', '#', '$', '%', '^', '&', '*', '|',
             '(', ')', '-', '_', '+', '=', '{','}','[',']',';', ':', '<', '>', '?', '/'])

In [4]:
from nltk.corpus import words
#nltk.download('words')

In [5]:
from nltk.corpus import wordnet 
#nltk.download('wordnet')

In [6]:
vocab = set(words.words()) | set(wordnet.words())
type(vocab), len(vocab) #235892  331327

(set, 331327)

In [21]:
with gzip.open('../data/troll_tweets_tagged.pkl.gz', 'rb') as fz:
    troll_tweets = pickle.load(fz)

with gzip.open('../data/user_tweets_tagged.pkl.gz', 'rb') as fz:
    user_tweets = pickle.load(fz)
len(troll_tweets), len(user_tweets)

In [7]:
def get_features(tweets, vocab, punct):
    feats = []
    for i,tweet in enumerate(tweets):
        if i and i%100000==0:
            print(i, feats[-1])
            
        tp,txt,toks,lems,pos,phrs,ents = tweet.replace('\xa0','').split('\t')
        
        if tp=='NonEnglish':
            continue
        
        num_toks = toks.count(' ')+1
        emoji_ratio, link_ratio, user_ratio = 0,0,0
        emoji_ratio = txt.count('<EMOJI>')/num_toks
        link_ratio = txt.count('<LINK>')/num_toks
        user_ratio = txt.count('<USER>')/num_toks
        
        toks = toks.replace('< ', '<').replace(' >', '>').replace('# ','#')
        
        clean_ents = set()
        ent_types = set()
        ent_toks = set()
        for ent in ents.split(' '):
            if ent=='#:CARDINAL':
                continue
            items = ent.split(':')
            for t in items[0].split('_'):
                ent_toks.add(t)
            typ = items[-1]
            if not typ:
                continue
            if typ[0]=='#':
                continue
            if '\xa0' in typ:
                continue
            clean_ents.add(ent)
            ent_types.add(typ)
        clean_ents = list(clean_ents)
        ent_types = list(ent_types)
        ent_toks = list(ent_toks)
        
        num_ents = ents.count(' ')+1
                
        lems = lems.lower()
        lems = re.sub('<[^>]+>', '', lems)
        lems = lems.replace('# ','#')
        
        voc=[]
        novoc=[]
        for lem in lems.split(' '):
            if not lem or lem in punct or lem[0]=='#' or lem in ent_toks:
                continue
            if lem in vocab:
                voc.append(lem)
            else:
                novoc.append(lem)
                
        ratio = 0
        if voc or novoc:
            ratio = len(novoc)/(len(voc)+len(novoc))
            #print('%s\nvoc: %s\nno voc: %s\nratio: %.6f\n' % (txt, voc, novoc, ratio))
            
        tags = re.findall('#[^ ]+', txt)
            
        feats.append({
            'type':tp,
            'text':txt,
            'tokens':toks,
            'lemmas':lems,
            'pos':pos,
            'phrases':phrs,
            'entities':clean_ents,
            'ent_types':ent_types,
            'hashtags':tags,
            'oov_words':' '.join(novoc),
            'emoji_ratio':emoji_ratio, 
            'link_ratio':link_ratio, 
            'user_ratio':user_ratio,
            'oov_ratio':ratio
        })
        
    return feats

In [119]:
troll_feats = get_features(troll_tweets, vocab, punct)

100000 {'type': 'RightTroll', 'text': 'California Bakersfield #breaking #VoteTrump #MakeAmericaGreatAgain #MAGA3X <LINK>', 'tokens': 'California Bakersfield #breaking #VoteTrump #MakeAmericaGreatAgain #MAGA3X <LINK>', 'lemmas': 'california bakersfield #break #votetrump #makeamericagreatagain #maga3x ', 'pos': 'NNP NNP $ VBG $ NNP NN NNP NN FW TAG', 'phrases': 'California_Bakersfield', 'entities': ['#_breaking_#:MONEY', 'California:GPE'], 'ent_types': ['GPE', 'MONEY'], 'hashtags': ['#breaking', '#VoteTrump', '#MakeAmericaGreatAgain', '#MAGA3X'], 'oov_words': '', 'emoji_ratio': 0.0, 'link_ratio': 0.09090909090909091, 'user_ratio': 0.0, 'oov_ratio': 0.0}
200000 {'type': 'RightTroll', 'text': 'RT PoliticalShort: "Kim Jong-un, the world always thought he wasn\'t a responsible leader...He\'s acting more respon… <LINK>', 'tokens': 'RT PoliticalShort : " Kim Jong - un , the world always thought he was n\'t a responsible leader ... He \'s acting more respon … <LINK>', 'lemmas': 'rt politicalsho

1500000 {'type': 'NewsFeed', 'text': '5.2 magnitude earthquake hits Borrego Springs #local #SanDiego', 'tokens': '5.2 magnitude earthquake hits Borrego Springs #local #SanDiego', 'lemmas': '5.2 magnitude earthquake hit borrego springs #local #sandiego', 'pos': 'CD NN NN VBZ NNP NNP $ JJ $ NNP', 'phrases': '5.2_magnitude_earthquake Borrego_Springs #local_#SanDiego', 'entities': ['5.2:CARDINAL', 'Borrego_Springs:FAC'], 'ent_types': ['FAC', 'CARDINAL'], 'hashtags': ['#local', '#SanDiego'], 'oov_words': 'borrego springs', 'emoji_ratio': 0.0, 'link_ratio': 0.0, 'user_ratio': 0.0, 'oov_ratio': 0.4}
1600000 {'type': 'RightTroll', 'text': 'RT magabitenews: #BREAKING: Driver of the car in #Charlottesville was a member of the left. Read More: … <LINK>', 'tokens': 'RT magabitenews : #BREAKING : Driver of the car in #Charlottesville was a member of the left . Read More : … <LINK>', 'lemmas': 'rt magabitenew : #breaking : driver of the car in #charlottesville be a member of the left . read more : …

2900000 {'type': 'NewsFeed', 'text': 'Over 23,000 Customers Without Power Due To PECO Substation Fire <LINK> <LINK>', 'tokens': 'Over 23,000 Customers Without Power Due To PECO Substation Fire <LINK> <LINK>', 'lemmas': 'over 23,000 customer without power due to peco substation fire  ', 'pos': 'IN CD NNS IN NNP IN IN NNP NNP NNP TAG TAG', 'phrases': 'Over_23,000_Customers PECO_Substation_Fire', 'entities': [], 'ent_types': [], 'hashtags': [], 'oov_words': '23,000 peco', 'emoji_ratio': 0.0, 'link_ratio': 0.16666666666666666, 'user_ratio': 0.0, 'oov_ratio': 0.2}


In [120]:
with gzip.open('../data/troll_features.pkl.gz', 'wb') as oz:
    pickle.dump(troll_feats, oz)

In [8]:
with gzip.open('../data/user_tweets_tagged.pkl.gz', 'rb') as fz:
    user_tweets = pickle.load(fz)
len(user_tweets)

2500001

In [9]:
user_feats = get_features(user_tweets[:2500000], vocab, punct)

100000 {'type': 'NormalUser', 'text': '<USER> it was part of her "new music generator" feature. Her only knowledge was Marr being in them, probably thought he founded them', 'tokens': '<USER> it was part of her " new music generator " feature . Her only knowledge was Marr being in them , probably thought he founded them', 'lemmas': ' it be part of her " new music generator " feature . her only knowledge be marr be in they , probably think he found they', 'pos': "XX ADD XX PRP VBD NN IN PRP$ `` JJ NN NN '' NN . PRP$ JJ NN VBD NNP VBG IN PRP , RB VBD PRP VBD PRP", 'phrases': 'her_"new_music_generator"_feature Her_only_knowledge', 'entities': ['Marr:PERSON'], 'ent_types': ['PERSON'], 'hashtags': [], 'oov_words': 'marr', 'emoji_ratio': 0.0, 'link_ratio': 0.0, 'user_ratio': 0.034482758620689655, 'oov_ratio': 0.045454545454545456}
200000 {'type': 'NormalUser', 'text': '<USER> oh perdon, puedes intentar en facebook si quieres?', 'tokens': '<USER> oh perdon , puedes intentar en facebook si qui

1500000 {'type': 'NormalUser', 'text': '<USER> The good news is, BOTH names were acceptable in the broadleaf ID test I probably just flunked.', 'tokens': '<USER> The good news is , BOTH names were acceptable in the broadleaf ID test I probably just flunked .', 'lemmas': ' the good news be , both name be acceptable in the broadleaf id test i probably just flunk .', 'pos': 'XX ADD XX DT JJ NN VBZ , DT NNS VBD JJ IN DT NNP NNP NN PRP RB RB VBN .', 'phrases': 'The_good_news BOTH_names the_broadleaf_ID_test', 'entities': [], 'ent_types': [], 'hashtags': [], 'oov_words': '', 'emoji_ratio': 0.0, 'link_ratio': 0.0, 'user_ratio': 0.045454545454545456, 'oov_ratio': 0.0}
1600000 {'type': 'NormalUser', 'text': 'Monsignor Murphys on #Yelp: Friends and I were sent here for drinks while waiting for a table at the oh so popular C... <LINK>', 'tokens': 'Monsignor Murphys on #Yelp : Friends and I were sent here for drinks while waiting for a table at the oh so popular C ... <LINK>', 'lemmas': 'monsignor

In [13]:
with gzip.open('../data/user_features.pkl.gz', 'wb') as oz:
    pickle.dump(user_feats, oz)

In [None]:
#
# test some sklearn models
#

In [3]:
with gzip.open('../data/troll_features.pkl.gz', 'rb') as fz:
    troll_feats = pickle.load(fz)
troll_feats = troll_feats[:500000]
len(troll_feats)

500000

In [4]:
with gzip.open('../data/user_features.pkl.gz', 'rb') as fz:
    user_feats = pickle.load(fz)
user_feats = user_feats[:len(troll_feats)]
len(user_feats)

500000

In [5]:
random.shuffle(troll_feats)
random.shuffle(user_feats)

In [6]:
feats = troll_feats
while user_feats:
    feats.append(user_feats.pop())
random.shuffle(feats)
len(feats)

1000000

In [7]:
y = [1 if f['type']=='NormalUser' else 0 for f in feats]

In [8]:
with gzip.open('../data/1000000_features_x.pkl.gz', 'wb') as oz:
    pickle.dump(feats, oz)

In [9]:
with gzip.open('../data/1000000_features_y.pkl', 'wb') as o:
    pickle.dump(y, o)

In [10]:
texts = [f['lemmas'].strip() for f in feats]

In [11]:
feats=troll_feats=user_feats=None

In [13]:
tfidf_vectorizer = TfidfVectorizer(max_features=50000, stop_words="english")
tfidf_vectorizer.fit(texts)

In [14]:
tfidf = tfidf_vectorizer.fit_transform(texts)

In [15]:
joblib.dump(tfidf, '1000000_tfidf_lemmas.joblib') 

['1000000_tfidf_lemmas.joblib']

In [18]:
svd = TruncatedSVD(256)
lsa = svd.fit_transform(tfidf)
sum(svd.explained_variance_ratio_)

0.20754998021218518

In [23]:
tfidf.shape, lsa.shape

((1000000, 50000), (1000000, 256))

In [19]:
joblib.dump(lsa, '1000000_lsa256_lemmas.joblib') 

['1000000_lsa256_lemmas.joblib']

In [24]:
x_lsa_train = lsa[:-10000,:]
y_lsa_train = np.array(y[:-10000])
x_lsa_test = lsa[-10000:,:]
y_lsa_test = np.array(y[-10000:])

x_lsa_train.shape

(990000, 256)

In [25]:
svm = LinearSVC()

In [26]:
svm.fit(x_lsa_train, y_lsa_train)
svm.score(x_lsa_test, y_lsa_test)

0.7964

In [27]:
x_tfidf_train = tfidf[:-10000,:]
y_tfidf_train = np.array(y[:-10000])
x_tfidf_test = tfidf[-10000:,:]
y_tfidf_test = np.array(y[-10000:])
x_tfidf_train.shape

(990000, 50000)

In [28]:
svm.fit(x_tfidf_train, y_tfidf_train)
svm.score(x_tfidf_test, y_tfidf_test)

0.8943

In [32]:
coefargs = np.argsort(svm.coef_)#.tolist()[::-1]
coefargs.shape

(1, 50000)

In [34]:
svm.coef_[0,coefargs[-10:]]

array([[-6.65339554, -6.36759972, -5.98454767, ...,  3.66902463,
         3.73444047,  4.86132776]])

In [39]:
most_important_tokens = coefargs[-100:][0].tolist()[::-1]

In [48]:
voc = tfidf_vectorizer.get_feature_names()
for i, word_idx in enumerate(most_important_tokens):
    print(i, voc[word_idx])

0 com
1 10news
2 nctime
3 www
4 h1n1
5 youmightbealiberal
6 hcr
7 cofc
8 stupak
9 awsr
10 proverbs28
11 proverbs29
12 2009
13 acorn
14 39
15 worldcupdraw
16 tbot
17 dawgfootball
18 youraudiofix
19 2010
20 menino
21 dbj
22 kt
23 clench
24 upi
25 olemiss
26 cejapa
27 bettor
28 cagop
29 8203
30 parkville
31 corzine
32 shjob
33 fthood
34 hanley
35 dawgfan
36 k4c
37 prisonindustrialcomplex
38 music4games
39 payitforwardfun
40 scozzafava
41 amdt
42 09
43 drifter51
44 odetose
45 dpost
46 worldsthinnestbook
47 foreclosure
48 ga09
49 mobsterworld
50 ksm
51 cadebate
52 noh8
53 forthood
54 iranelection
55 wwiimuseum
56 blog
57 newsvine
58 buzzflash
59 efcg
60 washooting
61 swine
62 psfk
63 rvtip
64 cnen
65 ps3
66 chs
67 ocra
68 opportuniy
69 businessonline
70 apos
71 openwebaward
72 washoote
73 johann3216
74 latism
75 thepatriotjournal
76 nasatweetup
77 horry
78 allvolunteerforce
79 indymedia
80 n9ne
81 infos
82 ny23
83 appt
84 palin
85 hasan
86 berlinwall
87 sayfie
88 docluv
89 140conf
90 lastfm

865 weis
866 wheretheydothatat
867 motocross
868 karina
869 radric
870 ogilvy
871 goe
872 flashforward
873 aspca
874 turnon
875 toolbar
876 ick
877 roo
878 ocho
879 motha
880 tokio
881 firms
882 rockingham
883 edem
884 mp3
885 txte
886 extraordinaire
887 m6
888 bif5
889 diego
890 cmj
891 o_o
892 printable
893 proprietary
894 tinychat
895 cowfilm
896 nidal
897 leno
898 gretchen
899 titans
900 corcoran
901 beck
902 umaga
903 secgen
904 strumpet
905 caitlin
906 ahmadinejad
907 intuit
908 odb
909 dysfamwk
910 arouse
911 devout
912 mezvinsky
913 jm
914 musicskins
915 weakass
916 hooyah
917 frankel
918 clunker
919 patersoninbrooklyn
920 moca
921 perspectives
922 razorfish
923 skatepark
924 telco
925 carm
926 neu
927 rocawear
928 pac10
929 whatchu
930 gc
931 xiii
932 dlc
933 fillmore
934 319
935 procrastinate
936 chosen
937 gases
938 benign
939 unh
940 catalina
941 nkh
942 chillen
943 40pm
944 appleton
945 torchwood
946 dugard
947 yogananda
948 fifty100
949 kiddin
950 hokies
951 muah
952 lmmf

1594 wok
1595 leander
1596 dungy
1597 hahaa
1598 kronos
1599 wmg
1600 bestfeele
1601 rodan
1602 thekiller
1603 pdc09
1604 vtpk
1605 affiliate
1606 ibrahimovic
1607 timeshare
1608 leotard
1609 impressionist
1610 verne
1611 brenner
1612 w00
1613 waitley
1614 mcnair
1615 techcrunch
1616 edufair
1617 xoxox
1618 greasemonkey
1619 tallahassee
1620 lyricist
1621 hatin
1622 bulletin
1623 scrobble
1624 dmca
1625 arealwife
1626 994
1627 basset
1628 refreshmiami
1629 initiatives
1630 potrero
1631 tp
1632 nbs
1633 mex
1634 shrm
1635 rachelle
1636 dyer
1637 uve
1638 floridaspl
1639 ponzi
1640 thingsilearnedfromjamesbond
1641 e2
1642 kqed
1643 steny
1644 impend
1645 hdr
1646 bostonisbetter
1647 networking
1648 evar
1649 improves
1650 trenches
1651 ohh
1652 collier
1653 rpm
1654 slr
1655 peed
1656 mutha
1657 swarovski
1658 dobbs
1659 zoe
1660 foramilliondollar
1661 vod
1662 estimates
1663 scarily
1664 sojourn
1665 provid
1666 ddb
1667 agile
1668 adweek
1669 koss
1670 ahh
1671 sidewiki
1672 mbp
1673 c

2382 convicted
2383 corvette
2384 complimentary
2385 aia
2386 digium
2387 2ba
2388 whitacre
2389 lettertomyex
2390 atlantis
2391 saveballoonboy
2392 bowden
2393 earmark
2394 betting
2395 pdf
2396 lymelife
2397 leaner
2398 stossel
2399 git
2400 wec
2401 katz
2402 bein
2403 embryonic
2404 detyme
2405 slips
2406 twoddler
2407 interactive
2408 livemocha
2409 _nemesis
2410 shih
2411 flagstaff
2412 allll
2413 dx_f
2414 slumdog
2415 opel
2416 greenbuild
2417 mlpa
2418 qik
2419 agilence
2420 sebastopol
2421 fees
2422 reuse
2423 gfoc09
2424 residual
2425 thankz
2426 ennio
2427 standin
2428 payitforward
2429 asthmatic
2430 distributed
2431 glasow
2432 lk
2433 ily
2434 cyberspace
2435 customize
2436 ghosts
2437 gophers
2438 ats
2439 ashoka
2440 calswap
2441 carbs
2442 udoka
2443 karr
2444 cal
2445 stache
2446 ronson
2447 idealab
2448 arrowhead
2449 ale
2450 monique
2451 ftw
2452 abilities
2453 comreal
2454 videos
2455 myths
2456 usin
2457 armin
2458 ths
2459 podcamp
2460 discounts
2461 xd
2462 he

3212 seas
3213 overnite
3214 cliq
3215 indo
3216 amawmi
3217 vernacular
3218 mimasummit
3219 fondo
3220 jone
3221 harem
3222 hoh
3223 playwatchvilla
3224 266
3225 rates
3226 pauls
3227 perriello
3228 ghandi
3229 api
3230 okstate
3231 liberator
3232 eeoc
3233 gwap
3234 bigchampagne
3235 cocktails
3236 pps
3237 currently
3238 strips
3239 haasdesign
3240 tumbling
3241 matisyahu
3242 mmmmm
3243 deals
3244 watering
3245 halvarez
3246 nonetheless
3247 casablancas
3248 trinket
3249 pwd
3250 lincecum
3251 penetration
3252 wc
3253 ac2
3254 kingsolver
3255 homeschool
3256 avalon
3257 carchat
3258 lello
3259 windows7
3260 edina
3261 333autismnew
3262 gregor
3263 exclusives
3264 settings
3265 cheerio
3266 ie6
3267 gamefly
3268 carbonite
3269 cct2009
3270 roadtrip
3271 sauteed
3272 carville
3273 adapter
3274 goin
3275 unfamiliar
3276 cowper
3277 louboutin
3278 12seconds
3279 deanna
3280 anthropology
3281 twitterberry
3282 yeahhhhh
3283 architects
3284 larger
3285 leighton
3286 trotter
3287 youfailb

4100 whomever
4101 puck
4102 caribou
4103 batteries
4104 whyyy
4105 dragonage
4106 gators
4107 workload
4108 cask
4109 snapname
4110 wipes
4111 typepad
4112 luckily
4113 nobelpeaceprize
4114 runyon
4115 516
4116 springfield
4117 waaaay
4118 zomg
4119 proctor
4120 copywriting
4121 131
4122 meatloaf
4123 carb
4124 359
4125 vodpod
4126 showcases
4127 betcha
4128 fone
4129 peripheral
4130 blogomator
4131 panache
4132 huff
4133 uwe
4134 _nickt
4135 lawful
4136 2400
4137 8d
4138 hehe
4139 runyan
4140 brodeur
4141 senor
4142 attitudes
4143 coisa
4144 wher
4145 crowne
4146 bricktown
4147 relational
4148 rhinestones
4149 rocc
4150 miseducation
4151 isagenix
4152 kristen
4153 casket
4154 zellers
4155 images
4156 tincture
4157 krystal
4158 inexpensive
4159 snd
4160 kalish
4161 jimmie
4162 letz
4163 civicrm
4164 seesmic
4165 ethernet
4166 mywesttexas
4167 optics
4168 beaner
4169 netproductcenter
4170 madmen
4171 electrification
4172 omen
4173 recs
4174 mealbalance
4175 samba
4176 oprah
4177 gettn


4978 peale
4979 pssst
4980 wiper
4981 cowboys
4982 satin
4983 garnish
4984 stampington
4985 minuten
4986 finance
4987 finishing
4988 entrust
4989 picking
4990 coka
4991 davy
4992 shinn
4993 droids
4994 lorrie
4995 426
4996 smcswfl
4997 circ
4998 nomadic
4999 drawings
5000 loops
5001 quart
5002 converting
5003 salon
5004 kpbs
5005 tko
5006 blu
5007 rufus
5008 tomm
5009 aries
5010 fixed
5011 hom
5012 transactions
5013 ddd
5014 _iammclovin
5015 lomond
5016 agian
5017 fillin
5018 lube
5019 exp
5020 marisa
5021 worlds
5022 skipping
5023 tmnt
5024 meriweather
5025 chicagoonlinemarketingmeetup
5026 wei
5027 twig
5028 koin
5029 coldfusion
5030 endlich
5031 tweet4lit
5032 enlightened
5033 sharrie
5034 porto
5035 foryourentertainment
5036 pomona
5037 wetlands
5038 thankfull
5039 mma
5040 devildriver
5041 cs3
5042 sustainable
5043 rework
5044 etrbc
5045 mac
5046 paloma
5047 typeface
5048 demo
5049 vaca
5050 hoss
5051 nyer
5052 downturn
5053 5793
5054 utilities
5055 feehan
5056 pairs
5057 embroide

5767 megadeth
5768 fishnet
5769 clausen
5770 flog
5771 faq
5772 doxo
5773 collabo
5774 mice
5775 whatz
5776 gurl
5777 callback
5778 daytrotter
5779 guitars
5780 onions
5781 yeaa
5782 heh
5783 gosta
5784 adresse
5785 band
5786 patio
5787 sputnik
5788 lounge
5789 mathematic
5790 cpk
5791 vladimir
5792 djbooth
5793 waaaaaay
5794 alain
5795 hawthorne
5796 yummm
5797 hrs
5798 imsohigh
5799 gibt
5800 dmb
5801 woo
5802 mae
5803 pwnage
5804 modification
5805 innovation
5806 tyra
5807 486
5808 reconstruct
5809 microstock
5810 hecho
5811 nighty
5812 furlough
5813 gente
5814 octubre
5815 burton
5816 login
5817 mayne
5818 conferences
5819 tc
5820 yello
5821 coems
5822 unconference
5823 benitez
5824 lol
5825 toughens
5826 mga
5827 dennis
5828 lettin
5829 pick247
5830 0fficialdubz
5831 teletubby
5832 booksellers
5833 nightwatchman
5834 cuchat
5835 ableton
5836 contracting
5837 sev
5838 salvia
5839 skool
5840 turismo
5841 sbdc
5842 lá
5843 shaumburg
5844 chic
5845 perfume
5846 kingdom
5847 hra
5848 r

6691 collie
6692 skelanimals
6693 communication
6694 gracefully
6695 clojure
6696 mpc
6697 sprinkles
6698 complement
6699 whad
6700 anchorage
6701 ping
6702 dogster
6703 312
6704 reduced
6705 prune
6706 newletter
6707 adam
6708 entrepreneur
6709 installation
6710 lovee
6711 elle
6712 jou
6713 borrow
6714 recommendation
6715 itis
6716 ster
6717 ri
6718 headhunter
6719 machines
6720 kindle
6721 mead
6722 irregardless
6723 chatroom
6724 glittery
6725 variable
6726 belvedere
6727 nietzsche
6728 grandaughter
6729 faceoff
6730 rubik
6731 tile
6732 rackspace
6733 maddhatta
6734 yorktown
6735 niles
6736 mgrs
6737 tenenbaum
6738 tvn
6739 earthfootwear
6740 motorsports
6741 yfrog
6742 muff
6743 mallet
6744 burleson
6745 cwb
6746 pattie
6747 electro
6748 builders
6749 mediadailynews
6750 teenaged
6751 saas
6752 raffle
6753 9000
6754 brewers
6755 rubinbrown
6756 n97
6757 birdland
6758 ecstatic
6759 cavitation
6760 tkt
6761 sayin
6762 waz
6763 brickfish
6764 smokers
6765 bicep
6766 diana
6767 b2
67

7451 suite
7452 previews
7453 onlyndahood
7454 ladybug
7455 sliver
7456 adrenaline
7457 sites
7458 playgirl
7459 nita
7460 naples
7461 technorati
7462 knowdat
7463 nbaking
7464 ewoks
7465 bizbookaward
7466 psyche
7467 booklet
7468 otw
7469 lair
7470 smirnoff
7471 humpty
7472 strauss
7473 snuggly
7474 famil
7475 mcclintock
7476 layout
7477 dayumm
7478 steez
7479 sm2day
7480 bratwurst
7481 tron
7482 merlot
7483 guitar
7484 gotomeeting
7485 heyday
7486 imapmyride
7487 eclair
7488 tenacity
7489 msa
7490 wits
7491 recharge
7492 ohhhhhh
7493 msdn
7494 domaine
7495 misspelling
7496 fbc
7497 relaxin
7498 sounds
7499 finalists
7500 booooo
7501 lucie
7502 superslow
7503 chung
7504 chilin
7505 effects
7506 amanda
7507 chevrolet
7508 90210
7509 complacency
7510 incur
7511 karmaloop
7512 sweeeet
7513 danko
7514 wrek
7515 leroy
7516 brownies
7517 ze
7518 jacksonville
7519 mariana
7520 napolean
7521 whaaaaat
7522 mcginley
7523 goldfrapp
7524 elvis
7525 gamblers
7526 h3
7527 channels
7528 knw
7529 noc

8281 _missmanda
8282 16th
8283 nom
8284 xango
8285 mascara
8286 007
8287 chili
8288 compatible
8289 gpl
8290 engagingtime
8291 francine
8292 reggaegoldsf
8293 bien
8294 jahvid
8295 dodgeball
8296 girlie
8297 caramel
8298 ha
8299 niche
8300 tijden
8301 katt
8302 zoekt
8303 harmonious
8304 ruthie
8305 movies
8306 twitfam
8307 hypocrit
8308 mi
8309 bxtch
8310 54am
8311 xdance
8312 couchdb
8313 redenvelope
8314 servin
8315 kayla
8316 designchat
8317 wanta
8318 yow
8319 2mrw
8320 pouty
8321 tiarathedopegrl
8322 spirits
8323 wanting
8324 nxt
8325 likewise
8326 gudd
8327 preferences
8328 gnd
8329 manufactured
8330 gramps
8331 earnings
8332 rediculous
8333 ohlendorf
8334 passin
8335 trials
8336 florafriday
8337 shootaround
8338 pushes
8339 maryanne
8340 digestion
8341 moxsie
8342 oktoberfest
8343 resonant
8344 atom
8345 teacup
8346 ihre
8347 gigabyte
8348 uptempo
8349 499
8350 weloveuhex
8351 4sho
8352 recieve
8353 telephony
8354 paintball
8355 wearin
8356 pancrea
8357 sephoraclaus
8358 youkno

9118 q6
9119 ladie
9120 smx
9121 rubios
9122 kept
9123 oe
9124 sebelius
9125 cheapie
9126 mij
9127 celery
9128 commin
9129 ascent
9130 brentwood
9131 conception
9132 jars
9133 dialog
9134 bobo
9135 carolyn
9136 growl
9137 rainin
9138 nyquil
9139 cual
9140 offspring
9141 hour
9142 messages
9143 specialists
9144 waterboarding
9145 sparrow
9146 b1
9147 babymama
9148 7fm
9149 awesome
9150 itune
9151 poken
9152 sm
9153 compress
9154 gamestop
9155 jetzt
9156 ams
9157 babylegs
9158 704
9159 mamba
9160 lydia
9161 eh
9162 alec
9163 missy
9164 mag
9165 tinsel
9166 ruiz
9167 tipo
9168 wht
9169 theakronhammer
9170 zetia
9171 ssa09
9172 faite
9173 fettuccine
9174 costarica
9175 blastin
9176 saddle
9177 thank
9178 duece
9179 paula
9180 bctampa
9181 kmb
9182 icantstand
9183 murad
9184 eff
9185 volcom
9186 worldly
9187 aloha
9188 any1
9189 whatev
9190 sergey
9191 peeps
9192 wordpress
9193 scion
9194 intercom
9195 biggup
9196 extravaganza
9197 peo
9198 imitation
9199 yh
9200 chasm
9201 brill
9202 covet

9986 haveuever
9987 entertained
9988 called
9989 phishe
9990 enthusia
9991 thet
9992 keyanna
9993 hundreds
9994 efficiency
9995 alienation
9996 leche
9997 tastefully
9998 fishin
9999 smoking
10000 streamy
10001 iono
10002 impossibility
10003 accion
10004 coraline
10005 frou
10006 accent
10007 woah
10008 okayy
10009 coltrane
10010 spec
10011 auch
10012 brib
10013 acquaintance
10014 cmo
10015 betwn
10016 chihuahua
10017 plaxico
10018 sl
10019 cheng
10020 mfinley210
10021 geologist
10022 macaroon
10023 herr
10024 setting
10025 gina
10026 costs
10027 zin
10028 hiking
10029 dunno
10030 execs
10031 ave
10032 breasts
10033 bushwick
10034 dbd
10035 cullen
10036 targeted
10037 tisk
10038 lmfaoooooo
10039 4adfan
10040 hiss
10041 stockmarket
10042 chauncey
10043 casper
10044 stuff
10045 musikfest
10046 sexte
10047 gwave
10048 comforting
10049 falalalaebay
10050 cilantro
10051 yooo
10052 yrd
10053 trickretweet
10054 ishow
10055 bayonetta
10056 bose
10057 gsm
10058 boredtodeath
10059 boudoir
10060 

10923 invisibleshield
10924 durty
10925 arroz
10926 jacques
10927 mgm
10928 noisecreep
10929 ee
10930 monger
10931 entourage
10932 hodi
10933 brannan
10934 madden
10935 sophocle
10936 systen
10937 talkie
10938 naturals
10939 thurs
10940 wakeboard
10941 unification
10942 relatively
10943 himss
10944 830
10945 gobierno
10946 oi
10947 nationalize
10948 oncology
10949 moodle
10950 hugely
10951 tsp
10952 acad
10953 dammit
10954 tnite
10955 announcer
10956 chemo
10957 metric
10958 hubby
10959 tailwind
10960 pismo
10961 items
10962 bedside
10963 confucius
10964 sorta
10965 dogma
10966 iguard
10967 vegge
10968 dooo
10969 alcatraz
10970 grat
10971 ther
10972 slacker
10973 cca
10974 thur
10975 chaminade
10976 godiva
10977 linda
10978 leisure
10979 hustlin
10980 homeschoole
10981 gamecock
10982 glide
10983 mime
10984 faulk
10985 pmt
10986 pleasee
10987 dependable
10988 ginuwine
10989 routing
10990 alanis
10991 housewives
10992 moderne
10993 vows
10994 bootie
10995 morrison
10996 omgosh
10997 mave

11976 decapitation
11977 kbco
11978 vittana
11979 alimony
11980 vader
11981 scotia
11982 shall
11983 catalog
11984 backdoor
11985 shanedawson
11986 inevitable
11987 unstrapp
11988 dinners
11989 momentarily
11990 upload
11991 scorer
11992 tastic
11993 rosenberg
11994 motorhead
11995 horribly
11996 gutsy
11997 paramus
11998 saturation
11999 15am
12000 grapefruit
12001 retractable
12002 spreadable
12003 sata
12004 orig
12005 worldmate
12006 hrw
12007 tinyurl
12008 decemberists
12009 coldwell
12010 vinden
12011 clothier
12012 slowness
12013 streaker
12014 csa
12015 inhalation
12016 arthur
12017 dundee
12018 industry
12019 hang
12020 citifield
12021 changers
12022 tar
12023 musicindustry
12024 spandex
12025 careen
12026 infoworld
12027 dido
12028 baskin
12029 muhammed
12030 cartman
12031 highschool
12032 leading
12033 spün
12034 verde
12035 trey
12036 sarasota
12037 sleeep
12038 concise
12039 gatos
12040 sunlight
12041 oooooh
12042 turnabout
12043 lending
12044 yamaha
12045 jhun
12046 andal

12623 maradona
12624 pouco
12625 317
12626 _clevername
12627 dissin
12628 hoo
12629 fyi
12630 hapless
12631 leopold
12632 lizzy
12633 superstore
12634 salvation
12635 teapot
12636 andá
12637 jermaine
12638 5tevenw
12639 9mmninaross
12640 coveted
12641 reiterate
12642 diligence
12643 littlest
12644 socio
12645 demokratiezweinull
12646 cinematography
12647 viceroy
12648 estos
12649 belate
12650 behringer
12651 fol
12652 w00tstock
12653 wit
12654 ahn
12655 333
12656 fanfarlo
12657 megadroid
12658 ogle
12659 hl
12660 richardson
12661 flt
12662 blacklight
12663 kevlar
12664 investment
12665 crux
12666 commish
12667 awe
12668 dreamin
12669 watched
12670 octomom
12671 sweepstake
12672 catchphrase
12673 mondo
12674 geraldine
12675 fischer
12676 alexandre
12677 trus
12678 desalination
12679 wire
12680 letscreate
12681 bogo
12682 15pm
12683 aggy
12684 newhouse
12685 scotch
12686 cruisin
12687 itaipu
12688 recycler
12689 perkin
12690 natives
12691 einstein
12692 examiners
12693 treating
12694 hyd

13712 iedereen
13713 intangible
13714 denton
13715 harmonica
13716 _webstarr
13717 cuando
13718 beaded
13719 modify
13720 deplete
13721 intreste
13722 photographer
13723 tisch
13724 wiltern
13725 statutory
13726 potts
13727 sol
13728 groucho
13729 itty
13730 instrument
13731 dum
13732 ezra
13733 infused
13734 inspired
13735 iwish
13736 thicken
13737 dcs
13738 hummus
13739 marke
13740 packs
13741 elicit
13742 ejm
13743 twivert
13744 email
13745 girltalk
13746 fl
13747 yacht
13748 sightglass
13749 serg
13750 sangria
13751 interviewing
13752 kuiper
13753 sketching
13754 reo
13755 cookbook
13756 repentance
13757 butter
13758 kernel
13759 hugs
13760 bdp
13761 awwnaw
13762 render
13763 dan
13764 spirituality
13765 madlem
13766 expressions
13767 gette
13768 sitemap
13769 niiiice
13770 thuram
13771 economics
13772 lumix
13773 shyne
13774 palladium
13775 burrito
13776 marginally
13777 porno
13778 ding
13779 sanitizer
13780 vac
13781 poppy
13782 djembe
13783 tekgroup
13784 pointe
13785 roh
13786

14585 lfnt
14586 lehman
14587 543
14588 amd
14589 tizzy
14590 signup
14591 cuzzo
14592 corbin
14593 nz
14594 xyz
14595 roadhouse
14596 chai
14597 yonder
14598 tru
14599 meatball
14600 appraisers
14601 babez
14602 assassin
14603 companion
14604 got
14605 photoshoot
14606 josiah
14607 chick
14608 defecate
14609 sienna
14610 blazers
14611 gage
14612 vids
14613 inpatient
14614 seifert
14615 sunny
14616 hmmmmm
14617 sweetie
14618 pix
14619 primal
14620 rim
14621 gloucester
14622 half
14623 blowup
14624 invite
14625 barbara
14626 kijk
14627 skaffers
14628 crazy
14629 dejar
14630 sama
14631 shuld
14632 5dmkii
14633 prenup
14634 bedfellow
14635 pays
14636 changs
14637 extended
14638 freshly
14639 ahha
14640 strangest
14641 metals
14642 dominos
14643 tia
14644 msg
14645 kills
14646 maye
14647 financing
14648 tane
14649 prevention
14650 reference
14651 bikini
14652 kaleb
14653 download
14654 power106
14655 blah
14656 abo
14657 el
14658 mispelle
14659 exhausted
14660 immaturity
14661 peaks
14662 

15748 falar
15749 venue
15750 doritos
15751 discouraging
15752 luenell
15753 tdk
15754 suitable
15755 dw
15756 mav
15757 domino
15758 orlando
15759 fox
15760 travelers
15761 hush
15762 turnberry
15763 bgca
15764 gandhi
15765 abuela
15766 kumasi
15767 assistance
15768 nickname
15769 pl
15770 earn
15771 database
15772 ldrs
15773 mani
15774 riversidegop
15775 means
15776 edison
15777 clovis
15778 covert
15779 amicus
15780 duplex
15781 believable
15782 yahoo
15783 itll
15784 detainer
15785 logistical
15786 omggg
15787 rib
15788 sworn
15789 mtt
15790 mobileme
15791 vacancy
15792 birder
15793 gd
15794 sunridge
15795 remix
15796 locator
15797 memphis
15798 zac
15799 pete
15800 throu
15801 beh
15802 workout
15803 anatole
15804 mintue
15805 105
15806 diggy
15807 soiree
15808 smokehouse
15809 harold
15810 snakebite
15811 lanterns
15812 iei
15813 dazzlewhitescam
15814 cypher
15815 pleads
15816 pre
15817 crashed
15818 moolah
15819 dph
15820 machinist
15821 expanding
15822 agai
15823 fascinating
15

16581 blooming
16582 contaminant
16583 sleepwalker
16584 rendition
16585 webinars
16586 durable
16587 forensic
16588 marks
16589 saturday
16590 eatin
16591 neglect
16592 lucrative
16593 okayyy
16594 aaahhh
16595 cong
16596 findable
16597 iquit
16598 vernon
16599 supercomputer
16600 boomer
16601 chemistry
16602 socialnomic
16603 squash
16604 pathetically
16605 mk
16606 writer
16607 ldn
16608 bridal
16609 ousted
16610 evidently
16611 scent
16612 obsession
16613 electrifying
16614 drinker
16615 bummy
16616 northwestern
16617 horseradish
16618 slingshot
16619 preview
16620 ehow
16621 elsinore
16622 mckinsey
16623 yard
16624 yolk
16625 infinity
16626 vierne
16627 adept
16628 craigslist
16629 ceg
16630 sample
16631 cube
16632 chris
16633 lorenzo
16634 carnival
16635 sourdough
16636 eis
16637 cba
16638 breakaway
16639 incontinence
16640 lotus
16641 yeahhhh
16642 quinto
16643 noc
16644 scr
16645 tnt
16646 illest
16647 ellis
16648 disasters
16649 uhm
16650 apprecate
16651 bran
16652 hot
16653 p

17413 ksc
17414 65
17415 rbc
17416 tools
17417 respiratory
17418 trickery
17419 row
17420 fg
17421 303
17422 crap
17423 anne
17424 uncontrollably
17425 communities
17426 beethoven
17427 timewarner
17428 teams
17429 achieve
17430 round
17431 alliance
17432 44th
17433 decor
17434 pickled
17435 shower
17436 quite
17437 scottsdale
17438 micheal
17439 boba
17440 jeannie
17441 ide
17442 barclay
17443 vectors
17444 lowcountry
17445 follo
17446 singers
17447 shaper
17448 pas
17449 ook
17450 rawr
17451 kombat
17452 koby
17453 gpu
17454 tumor
17455 duplication
17456 barracuda
17457 unassisted
17458 steele
17459 schawbel
17460 maersk
17461 gn
17462 whoohoo
17463 megan
17464 mercury
17465 33
17466 fallacy
17467 d700x
17468 hungry
17469 headley
17470 liebe
17471 soulja
17472 exchanges
17473 307
17474 pancake
17475 seg
17476 sallie
17477 conversational
17478 aweber
17479 tweethearts
17480 strangegibberish
17481 wellington
17482 madagascar
17483 advantages
17484 sober
17485 seneca
17486 lobbyists
174

18580 miyazaki
18581 cassidyhaley
18582 consolidate
18583 jeff
18584 meridian
18585 staircase
18586 thunder
18587 postponement
18588 toothpaste
18589 haters
18590 jetblue
18591 razzle
18592 steve
18593 spanish
18594 appropriately
18595 competitive
18596 bearing
18597 coalesce
18598 landscape
18599 audubon
18600 vegetable
18601 completely
18602 anoche
18603 sooooon
18604 faces
18605 4000
18606 prenatal
18607 babes
18608 escalade
18609 hurley
18610 scary
18611 smartrend
18612 refer
18613 askin
18614 volvoshine
18615 fart
18616 distractions
18617 distressed
18618 albums
18619 sheila
18620 vodka
18621 zillow
18622 biddy
18623 bossmack
18624 wordnik
18625 pwn
18626 electrify
18627 burroughs
18628 dungeoneers
18629 manifesto
18630 25th
18631 greengiftsfair09
18632 agencies
18633 gestapo
18634 perot
18635 commodity
18636 accommodate
18637 albert
18638 stitcher
18639 sg
18640 nominating
18641 cuisinart
18642 jalapeño
18643 haden
18644 universal
18645 bia
18646 e2conf
18647 infectious
18648 vis

19435 4hrs
19436 lesbian
19437 gsa
19438 layaway
19439 damnit
19440 gameplay
19441 ccsf
19442 spinach
19443 dcist
19444 betts
19445 seemingly
19446 dispatcher
19447 essay
19448 mona
19449 overkill
19450 725
19451 dazs
19452 foundationforyoutharts
19453 canine
19454 llama
19455 sedaris
19456 gregg
19457 akoustic
19458 digsby
19459 midst
19460 60th
19461 loves
19462 jud
19463 120
19464 directing
19465 bjs
19466 bbw
19467 passive
19468 handshake
19469 planting
19470 bes
19471 666
19472 handbags
19473 dori
19474 experience
19475 seatbelt
19476 posit
19477 easily
19478 luc
19479 ponder
19480 preventative
19481 razorbacks
19482 corn
19483 elam
19484 trouncing
19485 dweet
19486 trot
19487 macht
19488 audiobook
19489 pam
19490 komen
19491 3641
19492 miraculous
19493 categories
19494 follw
19495 covenant
19496 scooby
19497 294
19498 publications
19499 ese
19500 startupday
19501 311
19502 terrestrial
19503 syncage
19504 barometer
19505 booksmith
19506 40
19507 bel
19508 mont
19509 intercontinent

20075 nccu
20076 clicquot
20077 suggested
20078 dele
20079 marquez
20080 utd
20081 fondling
20082 tram
20083 insomnia
20084 ruffled
20085 halfway
20086 monde
20087 minimal
20088 selah
20089 swingers
20090 debit
20091 waltz
20092 suzi
20093 slimy
20094 chap
20095 raincheck
20096 giant
20097 kevin
20098 miserables
20099 aclc
20100 copyright
20101 membership
20102 9727
20103 leftover
20104 node
20105 blackfriday
20106 lego
20107 airborne
20108 applicatie
20109 silliness
20110 compliment
20111 callous
20112 tracey
20113 odd
20114 proactive
20115 rplac
20116 buffett
20117 larsson
20118 posted
20119 disconnected
20120 igottacrushon
20121 punishable
20122 p1
20123 angels
20124 curiosity
20125 masturbate
20126 baer
20127 neeeed
20128 jager
20129 bottoms
20130 thoughts
20131 video
20132 haus
20133 ni
20134 youngin
20135 connection
20136 exclaim
20137 tyrese
20138 goodtimes
20139 calculus
20140 alternatives
20141 hears
20142 anew
20143 wale
20144 roasters
20145 learning
20146 graphics
20147 tick

20716 muddle
20717 onl
20718 9b
20719 370z
20720 participation
20721 ahi
20722 phantom
20723 vip
20724 restaurant
20725 ripa
20726 matching
20727 length
20728 littleton
20729 cycling
20730 babble
20731 caught
20732 amir
20733 realtalk
20734 tanya
20735 jiffy
20736 lickin
20737 trim
20738 orpheum
20739 dates
20740 gpb
20741 structural
20742 yan
20743 alonzo
20744 cashcrate
20745 drab
20746 tabletop
20747 cycle
20748 su
20749 accomplished
20750 gratitude
20751 breakfast
20752 invalidate
20753 econo
20754 chimney
20755 stole
20756 hume
20757 newbie
20758 imo
20759 titles
20760 ficar
20761 perils
20762 pricing
20763 egan
20764 impromptu
20765 déjà
20766 amar
20767 nuptial
20768 congregation
20769 symptom
20770 track
20771 ahhhhhhhh
20772 lapse
20773 homecome
20774 diabetics
20775 artofficial
20776 worldaidsday
20777 dependency
20778 sullivan
20779 bower
20780 attendee
20781 cassette
20782 contrast
20783 grapevine
20784 highly
20785 calf
20786 fan
20787 bludgeon
20788 bogey
20789 ughhhh
207

21575 di
21576 coin
21577 applause
21578 meet
21579 dyk
21580 sistah
21581 fuels
21582 cyndi
21583 tangle
21584 hai
21585 olympics
21586 ski
21587 starch
21588 snatcher
21589 shoots
21590 charms
21591 1887
21592 wanda
21593 interviewed
21594 emurse
21595 spinning
21596 kin
21597 lease
21598 word
21599 myclubhop
21600 confirmed
21601 jackie
21602 lutheran
21603 bangladeshi
21604 mon
21605 sharply
21606 unconcerned
21607 mommy
21608 rabbis
21609 cottage
21610 christopher
21611 theatre
21612 saporta
21613 uplay
21614 inflame
21615 olson
21616 barack
21617 whitesox
21618 twee
21619 hayne
21620 lobbying
21621 breve
21622 intern
21623 menu
21624 mitigate
21625 screw
21626 buckingham
21627 pending
21628 jesse
21629 breeders
21630 righ
21631 diligently
21632 sht
21633 automotive
21634 1501
21635 bleeding
21636 cure
21637 twain
21638 rting
21639 liste
21640 regina
21641 oft
21642 liz
21643 bender
21644 recipe
21645 zzzzzz
21646 penske
21647 validation
21648 realtime
21649 bollock
21650 antifree

22658 pacquaio
22659 fatcow
22660 improvise
22661 empathetic
22662 bambino
22663 looooong
22664 inflation
22665 ziploc
22666 straighten
22667 perennial
22668 cosas
22669 grimy
22670 udall
22671 note
22672 sluggish
22673 whitehall
22674 verona
22675 castor
22676 eng
22677 mama
22678 salts
22679 owls
22680 92101
22681 initiative
22682 yarnell
22683 stor
22684 sodium
22685 elections
22686 weirdness
22687 bruised
22688 102
22689 anthro
22690 parka
22691 felt
22692 rupaul
22693 homestead
22694 a320
22695 topp
22696 belated
22697 lori
22698 barista
22699 verdict
22700 prn
22701 iwo
22702 chromosome
22703 aurora
22704 2003
22705 insiders
22706 divergent
22707 camera
22708 seven
22709 tiene
22710 ought
22711 match
22712 londzell
22713 parque
22714 lsu
22715 therapy
22716 wallace
22717 neeson
22718 supports
22719 apart
22720 pasta
22721 scooter
22722 pong
22723 eclectic
22724 lifechurch
22725 salem
22726 adaptation
22727 backlot
22728 fir
22729 caricature
22730 accra
22731 busey
22732 heroes
22

23584 eighteen
23585 arch
23586 collaboratemd
23587 tuesday
23588 abject
23589 rave
23590 vaccinated
23591 cereal
23592 entice
23593 lve
23594 unbeaten
23595 x3
23596 giuseppe
23597 33rd
23598 doowop
23599 loving
23600 sprint
23601 738
23602 in2
23603 cornfield
23604 idd
23605 adaptable
23606 lds
23607 cre8
23608 centimeter
23609 cath
23610 mandalay
23611 couldn
23612 edition
23613 garrard
23614 sub
23615 appreciative
23616 privy
23617 fucking
23618 kenny
23619 favs
23620 cialis
23621 engineering
23622 dahl
23623 spectacular
23624 enterprising
23625 francis
23626 256
23627 routinely
23628 doors
23629 thisss
23630 nutriiveda
23631 comentario
23632 glassy
23633 swizz
23634 amos
23635 owl
23636 heal
23637 tt
23638 address
23639 dartmouth
23640 gordon
23641 brick
23642 exemplify
23643 saludo
23644 gemma
23645 evolve
23646 fulfill
23647 counselor
23648 supervise
23649 pant
23650 cheerful
23651 librarian
23652 backward
23653 cock
23654 borne
23655 wsh
23656 putting
23657 colleges
23658 telek

24536 ferro
24537 aiken
24538 slackin
24539 strew
24540 etf
24541 masterpiece
24542 paradise
24543 inspection
24544 demoralize
24545 emerging
24546 scots
24547 ceolive
24548 crater
24549 whirl
24550 performance
24551 colorful
24552 consumption
24553 508
24554 blitz
24555 attire
24556 restaurateur
24557 alcoholism
24558 mainland
24559 maps
24560 safran
24561 mothers
24562 curl
24563 transforming
24564 hhhmagazine
24565 eyed
24566 strife
24567 akin
24568 jellybean
24569 activities
24570 sfr
24571 atcha
24572 clothe
24573 mosh
24574 satellitedirect
24575 flow
24576 franz
24577 frustration
24578 bipartisan
24579 23k
24580 yam
24581 foolproof
24582 exhume
24583 acordar
24584 trop
24585 foreplay
24586 semifinalist
24587 vans
24588 695
24589 totaly
24590 hype
24591 flirta
24592 aria
24593 wrangler
24594 ble
24595 banish
24596 pt
24597 road
24598 gros
24599 levee
24600 looted
24601 delhi
24602 lick
24603 hand
24604 hemisphere
24605 timetable
24606 lecturer
24607 conclusion
24608 uniform
24609 

25507 barrett
25508 nobodys
25509 hong
25510 cet
25511 boots
25512 pensacola
25513 presse
25514 wiaa
25515 haywood
25516 ola
25517 roaming
25518 emage
25519 august
25520 waterford
25521 scamme
25522 chez
25523 significant
25524 ender
25525 jacinto
25526 jumpstart
25527 impeccable
25528 stinker
25529 stiffness
25530 armstrong
25531 600th
25532 rosey
25533 hofer
25534 kush
25535 seismic
25536 advisor
25537 selfish
25538 biff
25539 golan
25540 thaksin
25541 handcraft
25542 forgot
25543 66
25544 stupid
25545 infor
25546 hrt
25547 handbag
25548 bergman
25549 unravel
25550 adventure
25551 uncivilized
25552 royce
25553 64th
25554 cook
25555 misleading
25556 eileen
25557 pac
25558 heaters
25559 weller
25560 blackwell
25561 convo
25562 daring
25563 blasts
25564 illustrated
25565 orders
25566 pleaser
25567 exceptional
25568 artbook
25569 homepros
25570 floods
25571 6th
25572 madrid
25573 pellet
25574 matey
25575 jobmob
25576 lloyd
25577 cheer
25578 centralize
25579 soak
25580 babel
25581 joker
2

26555 joyfulbathco
26556 mti
26557 dreambighustlehard
26558 mcarolbabe
26559 demboislandscape
26560 mayorgimenez
26561 operationdegeneres
26562 mugging
26563 927kkuu
26564 delinquencie
26565 wallop
26566 dabeatminerz
26567 losttt
26568 sympathies
26569 beyonslay
26570 delporableme
26571 7500sf
26572 openborders
26573 saabsquad
26574 nemtsov
26575 kag
26576 letsgetrollingpromotion
26577 benqmusic
26578 cold187um
26579 nutritionmarket
26580 represe
26581 dodt2003
26582 kruufm
26583 bossmackstreetwear
26584 kiddinaroundusa
26585 senatedebate
26586 virtualbeginnings
26587 technosweat
26588 semtex
26589 trumpstaxes
26590 kidrocksenator
26591 nyczombiecrawl
26592 kpsells
26593 nyjah
26594 actualrealitynotvirtual
26595 mooseutilities
26596 _imcnasty
26597 cssmusic
26598 votehandmade
26599 blackxmas2
26600 votedjt1
26601 disavowing
26602 offox
26603 officialnewmooncountdown
26604 lahjikmusic
26605 nswpol
26606 ntvnyr173
26607 nubiacreativeimages
26608 trueislam
26609 truemaga
26610 colgatewisp

27586 burning
27587 brinks
27588 kremlincohorts
27589 contract
27590 gentler
27591 changer
27592 remeber
27593 glennbeck
27594 simplistic
27595 scorpion
27596 searcher
27597 redbone
27598 greet
27599 crab
27600 brenda
27601 mpp
27602 old
27603 snider
27604 torae
27605 flav
27606 restaurants
27607 lionsgate
27608 40am
27609 12000
27610 lambaste
27611 tier
27612 musician
27613 curren
27614 viol
27615 qualifie
27616 lugar
27617 gendarmerie
27618 emerges
27619 picky
27620 clintonbodycount
27621 rank
27622 topsy
27623 pushin
27624 fantasyland
27625 induction
27626 dpp
27627 replica
27628 badly
27629 enhancer
27630 manheim
27631 unter
27632 roth
27633 skip
27634 ducky
27635 shedd
27636 electricity
27637 cypress
27638 208
27639 condone
27640 sitter
27641 revis
27642 eligible
27643 vandy
27644 fireside
27645 sasha
27646 funday
27647 mom
27648 organize
27649 lahore
27650 iditarod
27651 presidio
27652 motogp
27653 mayfield
27654 autocomplete
27655 exaggeration
27656 scheming
27657 jasmine
27658 

28218 commentator
28219 jakarta
28220 erupts
28221 domani
28222 busby
28223 intrusion
28224 jogging
28225 amman
28226 carolinas
28227 rhinestone
28228 sayyaf
28229 springsteen
28230 meteorite
28231 fellow
28232 hoarder
28233 purposefully
28234 walk
28235 daybreak
28236 skb_sara
28237 outskirt
28238 brescia
28239 exude
28240 plumber
28241 cristo
28242 enlighten
28243 maximum
28244 matchbox
28245 nopcbs
28246 hunt
28247 tank
28248 incapable
28249 selection
28250 unknown
28251 163
28252 18
28253 knives
28254 tie
28255 awesomely
28256 subscription
28257 marina
28258 050
28259 bongo
28260 ice
28261 reimburse
28262 cow
28263 celebration
28264 studying
28265 makeitstop
28266 atomic
28267 seedy
28268 arid
28269 pusher
28270 contraction
28271 remarkable
28272 peg
28273 dusty
28274 gametime
28275 panel
28276 31
28277 wolves
28278 scottish
28279 grip
28280 bonk
28281 absolutely
28282 flyers
28283 boe
28284 inhumane
28285 threesome
28286 solo
28287 skepticism
28288 einem
28289 attempting
28290 bru

29289 lear
29290 ibarguen
29291 quota
29292 playmate
29293 rothenberg
29294 spruce
29295 ustede
29296 zzzzzzzz
29297 lyfe
29298 meddle
29299 familyprotection
29300 reed
29301 pajamas
29302 acrobat
29303 terence
29304 tiananmen
29305 corrupthillary
29306 belgian
29307 stater
29308 baylor
29309 ailing
29310 excessive
29311 cotton
29312 calculation
29313 identity
29314 cld
29315 crocodile
29316 gameday
29317 parkland
29318 choi
29319 lifter
29320 rodent
29321 kagan
29322 4the
29323 slide
29324 mayday
29325 duo
29326 immediately
29327 beam
29328 doozy
29329 valero
29330 pensioner
29331 ashtray
29332 ahhhhhhhhh
29333 imports
29334 imaginable
29335 branch
29336 aired
29337 packers
29338 augustus
29339 fognini
29340 352
29341 pnw
29342 overpriced
29343 oas
29344 neuroscience
29345 carmaker
29346 knowledge
29347 pictures
29348 flames
29349 smoke
29350 funtime
29351 scotsman
29352 reuters
29353 epiphany
29354 expectation
29355 thai
29356 snow
29357 insect
29358 dispatch
29359 dour
29360 claus
2

30266 predictable
30267 gooding
30268 socialization
30269 belief
30270 member
30271 allo
30272 missourigop
30273 intellectual
30274 wick
30275 nosharia
30276 beautifully
30277 rickey
30278 4day
30279 pakistani
30280 weary
30281 survive
30282 rascals
30283 12k
30284 jessa
30285 bod
30286 wives
30287 rearrange
30288 ricardo
30289 earphone
30290 rockabilly
30291 formulate
30292 competent
30293 berto
30294 fantasizes
30295 losangeles
30296 72
30297 andover
30298 swedenincident
30299 wembley
30300 documents
30301 campaigns
30302 stabbed
30303 temblor
30304 aspirin
30305 sequoia
30306 hardwired
30307 relaunch
30308 formula
30309 feckless
30310 sar
30311 porn
30312 wizards
30313 advisors
30314 praxis
30315 cheerleader
30316 capito
30317 realized
30318 qldaah
30319 dino
30320 pivot
30321 jedi
30322 perverted
30323 psychopathic
30324 bin
30325 cdq
30326 moissycramayel
30327 14h
30328 jacquelinesauvage
30329 ernst
30330 kart
30331 ago
30332 debates
30333 jet
30334 mango
30335 dyke
30336 supervis

31236 context
31237 profession
31238 abba
31239 school
31240 hoe
31241 ramirez
31242 998
31243 refined
31244 bafta
31245 pooch
31246 173
31247 pulmonary
31248 brangelina
31249 neurology
31250 obliterate
31251 thyme
31252 blue
31253 dyslexic
31254 analyze
31255 loki
31256 shelve
31257 patent
31258 pour
31259 bizarro
31260 sharktank
31261 disapproval
31262 hurdles
31263 deleon
31264 desmond
31265 fetish
31266 precursor
31267 platinum
31268 victorian
31269 trumka
31270 jenn
31271 thc
31272 hailey
31273 etat
31274 dumoulin
31275 resound
31276 gloria
31277 petey
31278 striking
31279 sel
31280 influence
31281 tammie
31282 tuskegee
31283 hosted
31284 gillespie
31285 cognitive
31286 banksy
31287 2ot
31288 vez
31289 cozy
31290 philippines
31291 illumination
31292 redistributethepain
31293 populated
31294 pedal
31295 nasheed
31296 hotdog
31297 patrio
31298 trails
31299 barricade
31300 ritchie
31301 dk
31302 blister
31303 mound
31304 milf
31305 xl
31306 auditor
31307 jace
31308 oneness
31309 naus

32148 abdomen
32149 kegel
32150 vigilante
32151 dga
32152 roasting
32153 jaheim
32154 saboteur
32155 taqiyya
32156 buddhist
32157 thrust
32158 vuelta
32159 unionized
32160 jpb_53
32161 kisner
32162 audacious
32163 fumes
32164 importa
32165 agut
32166 globally
32167 unimaginable
32168 reiter
32169 compound
32170 maverick
32171 derangement
32172 accurately
32173 concession
32174 peer
32175 vital
32176 nevada
32177 flooring
32178 diamond
32179 bacall
32180 bakery
32181 boos
32182 voyage
32183 digable
32184 cheapest
32185 knifeman
32186 unqualified
32187 robotics
32188 publish
32189 dncdebate
32190 dry
32191 dermatologist
32192 croft
32193 wuhan
32194 grammy620
32195 authentic
32196 std
32197 chimamanda
32198 waterway
32199 taiz
32200 originalpoetry
32201 protestpoetry
32202 lemonade
32203 replicate
32204 alveda
32205 podestaemails31
32206 effectiveness
32207 woeful
32208 explode
32209 westside
32210 latino
32211 snoring
32212 vece
32213 pepsi
32214 sério
32215 plump
32216 435
32217 turkst

33078 votre
33079 getabuzz
33080 proudly
33081 uneven
33082 plantain
33083 555
33084 arboretum
33085 courageous
33086 tourney
33087 musiq
33088 eyebrow
33089 apartment
33090 reusable
33091 chairlift
33092 antiquity
33093 distinction
33094 feline
33095 dwellers
33096 clementine
33097 exist
33098 hobbit
33099 potency
33100 society
33101 kimdotcom
33102 vídeo
33103 quarry
33104 bot
33105 lionize
33106 caliber
33107 patrick
33108 olga
33109 beat
33110 addresses
33111 complaint
33112 jan
33113 admiral
33114 cinematic
33115 deceptively
33116 finding
33117 butthis
33118 superb
33119 rocket
33120 villain
33121 happ
33122 raise
33123 preparing
33124 footballer
33125 jbro_1776
33126 dotson
33127 detector
33128 2s
33129 slogan
33130 82
33131 fuca
33132 carver
33133 meal
33134 archer
33135 25yr
33136 bernadino
33137 67th
33138 firm
33139 enrico
33140 campground
33141 torrents
33142 disappointment
33143 irreversible
33144 broaden
33145 empire
33146 rottweiler
33147 isiah
33148 conscience
33149 tran

34060 criticizing
34061 inhabitant
34062 liberate
34063 aesthetically
34064 1110
34065 inclusive
34066 films
34067 infantry
34068 enchantment
34069 interpersonal
34070 proceed
34071 nunn
34072 foxnewsworld
34073 rob
34074 afrika
34075 incessantly
34076 peppa
34077 boutta
34078 temper
34079 springs
34080 shortstop
34081 zell
34082 gpusa
34083 target
34084 baton
34085 militarize
34086 mukilteo
34087 arms
34088 kindred
34089 pookie
34090 voluntary
34091 slick
34092 scrapper
34093 licenses
34094 lambast
34095 rumours
34096 fke
34097 hearing
34098 formosa
34099 originate
34100 lob
34101 wager
34102 anderson
34103 curry
34104 guilty
34105 grovel
34106 opportunities
34107 beim
34108 stabilize
34109 changed
34110 falmouth
34111 50cent
34112 yucatan
34113 infidel
34114 vermont
34115 workers
34116 mounted
34117 bashir
34118 dawg_lb
34119 735
34120 207
34121 windfall
34122 ms804
34123 maybach
34124 ramp
34125 arrestobama
34126 assassinated
34127 montana
34128 maire
34129 quicken
34130 withdrawal


34985 warden
34986 whine
34987 kyrgyzstan
34988 takeover
34989 eleto
34990 transit
34991 demagogue
34992 kimchi
34993 postpone
34994 oosthuizen
34995 boldly
34996 sno
34997 kelsey
34998 hanging
34999 syndicate
35000 nincompoop
35001 deterrent
35002 lakhvi
35003 depression
35004 texa
35005 boogie
35006 incompetence
35007 perfection
35008 deadline
35009 yelling
35010 abate
35011 opens
35012 propane
35013 eel
35014 folk
35015 unlocking
35016 powerful
35017 politico
35018 bansharialaw
35019 devastate
35020 kippur
35021 freely
35022 aero
35023 dedicate
35024 colluded
35025 tessa
35026 prettier
35027 believing
35028 raindance
35029 stigma
35030 drs
35031 face
35032 prime
35033 polluted
35034 centerfold
35035 harbor
35036 year
35037 rookie
35038 weh
35039 asked
35040 khartoum
35041 10k
35042 mekong
35043 benning
35044 departing
35045 german
35046 correctional
35047 alene
35048 glorify
35049 boulevard
35050 parent
35051 mitsubishi
35052 rowdy
35053 hide
35054 frankly
35055 nota
35056 fuego
350

35811 scuffle
35812 six6sixty6xx
35813 sixth
35814 shopkeeper
35815 irrational
35816 auntie
35817 voluntarily
35818 supergroup
35819 magnussen
35820 brace
35821 warlord
35822 timid
35823 está
35824 percocet
35825 hainan
35826 robmitchellmp
35827 bayonne
35828 juventus
35829 kokanee
35830 flare
35831 bleep
35832 livid
35833 degenerative
35834 semperfi
35835 stop
35836 intervene
35837 scorn
35838 stoic
35839 engle
35840 cpt
35841 pools
35842 bolivia
35843 brady_pta
35844 alert
35845 erode
35846 ponytail
35847 terrified
35848 unearth
35849 cacao
35850 189
35851 drove
35852 298
35853 holocaustremembranceday
35854 directorial
35855 vibe
35856 satirical
35857 91st
35858 correspond
35859 guardians
35860 justify
35861 gallipoli
35862 falkland
35863 347
35864 discovering
35865 calum
35866 179
35867 stan
35868 lox
35869 necessity
35870 politicians
35871 autre
35872 heartbreaking
35873 mixes
35874 divide
35875 nearly
35876 bridge
35877 workmanalice
35878 bestiality
35879 bahamas
35880 ambassador


36955 courthouse
36956 muerto
36957 insufferable
36958 lane
36959 bento
36960 gasol
36961 distinctive
36962 gtf
36963 q5
36964 moin
36965 gas
36966 plow
36967 bartende
36968 unicorn
36969 methane
36970 sanjay
36971 precision
36972 hind
36973 worshiper
36974 jest
36975 bearded
36976 molest
36977 neill
36978 illinois
36979 rico
36980 euthanize
36981 vida
36982 seek
36983 222
36984 principal
36985 whitesupremacist
36986 hiring
36987 masters
36988 seamstress
36989 appalachia
36990 celestial
36991 rabbit
36992 preordere
36993 holmes
36994 seeking
36995 orbital
36996 alleges
36997 imwithchuck
36998 schmid
36999 persuasion
37000 zhao
37001 dimwit
37002 mason
37003 duncan
37004 patchy
37005 dardenne
37006 ww1
37007 cannon
37008 tailor
37009 concern
37010 corleone
37011 netneutrality
37012 707
37013 fainting
37014 groceries
37015 cub
37016 nostra
37017 gigi
37018 heterosexual
37019 309
37020 wilt
37021 gazillion
37022 shutitdown
37023 24hrs
37024 clintoncrimefoundation
37025 wants
37026 explain

37958 councilwoman
37959 koster
37960 barrage
37961 ashghebranious
37962 875
37963 elway
37964 noam
37965 yolo
37966 authorized
37967 pronouns
37968 fruitful
37969 havana
37970 walkway
37971 mypresident
37972 intentionally
37973 involuntary
37974 trumpthehill
37975 defundun
37976 zien
37977 marie
37978 demented
37979 rangers
37980 corrections
37981 circulate
37982 nightclubs
37983 anotherfollowtrain
37984 29th
37985 reporter
37986 dumbledore
37987 emancipation
37988 titanic
37989 artificially
37990 acapulco
37991 surpasse
37992 285
37993 vote2016
37994 spray
37995 lorraineallison
37996 constitutionally
37997 signee
37998 opposition
37999 trusted
38000 subsidies
38001 codeine
38002 plaster
38003 portlandstabbing
38004 hills
38005 hubris
38006 congestion
38007 humpback
38008 anagram
38009 supersonic
38010 sisterhood
38011 heston
38012 mcfarlane
38013 marvell
38014 folly
38015 phenomdigital
38016 simons
38017 rundown
38018 bush
38019 peanuts
38020 cutty
38021 heartwarming
38022 quo
38023 

38587 skew
38588 onus
38589 iglesias
38590 yap
38591 lithium
38592 steen
38593 gerber
38594 djibouti
38595 inward
38596 cicero
38597 people_of_tony
38598 comparison
38599 poll
38600 antif
38601 murders
38602 miranda
38603 ck
38604 nuclearoption
38605 capture
38606 annapurna
38607 blackstreet
38608 divisiveness
38609 inflammation
38610 papal
38611 sigmund
38612 genx
38613 operative
38614 gant
38615 fraudster
38616 arizona
38617 clif
38618 ethno
38619 stagnation
38620 innuendo
38621 prayforosu
38622 admirable
38623 nycin3word
38624 hanks
38625 inexcusable
38626 geegju
38627 kaymer
38628 pastors
38629 xenophon
38630 elude
38631 katiecouric
38632 cosmonaut
38633 davidsweat
38634 catches
38635 thedailyledger
38636 bookcase
38637 sarin
38638 thedemocrats
38639 undisclose
38640 billionaire
38641 marijuana
38642 messed
38643 polycom
38644 intellectually
38645 carpooling
38646 identify
38647 loove
38648 mass
38649 876
38650 renewable
38651 unpopular
38652 cahill
38653 berkley
38654 plush
38655 

39608 concussion
39609 yakima
39610 superpac
39611 army
39612 podestaemails15
39613 decriminalization
39614 reemerge
39615 affected
39616 tornadoes
39617 reveal
39618 torino
39619 bana
39620 motherfucker
39621 locos
39622 fraught
39623 snake
39624 juggernaut
39625 undergoes
39626 expansion
39627 shailene
39628 trump2016fan
39629 135
39630 frauds
39631 tropical
39632 litmus
39633 wavy
39634 fallon
39635 accountability
39636 sdc0914
39637 tolstoy
39638 odious
39639 grosse
39640 142
39641 2keep
39642 hhl
39643 floppy
39644 rant
39645 controlled
39646 miri
39647 storm
39648 federal
39649 painkiller
39650 kanyewest
39651 wnd
39652 hiroshima
39653 crucify
39654 flyby
39655 rescue
39656 guccimane
39657 elbert
39658 followtrick
39659 inedible
39660 tariff
39661 gta
39662 javelin
39663 circular
39664 appeal
39665 heritage
39666 misspelled
39667 breezy
39668 larvae
39669 collaborator
39670 roxanne
39671 hutton
39672 varied
39673 porzingis
39674 1984
39675 sunni
39676 coastguard
39677 dumbest
396

40592 northcarolina
40593 pederson
40594 prohibit
40595 repatriation
40596 rescued
40597 stepson
40598 taxis
40599 lightly
40600 timenout
40601 licensing
40602 enticement
40603 conceal
40604 incoherent
40605 wimbledon
40606 ao
40607 bicker
40608 mysterious
40609 illegitimate
40610 snatch
40611 unstable
40612 kaczynski
40613 balkans
40614 dije
40615 10c
40616 ex
40617 cascade
40618 security
40619 jeanie
40620 familia
40621 bc
40622 ryancare
40623 fargo
40624 gab
40625 caliphate
40626 du
40627 cordial
40628 ventures
40629 defect
40630 bs
40631 penney
40632 services4sale
40633 demar
40634 blacklist
40635 chrgdup1973
40636 gleefully
40637 alt_fedemployee
40638 millions
40639 stonewall
40640 musty
40641 waterboard
40642 ariya
40643 camouflage
40644 gatekeeper
40645 fistfight
40646 1920
40647 nutmeg
40648 vest
40649 staged
40650 mercenaries
40651 voteoutgop
40652 skipper
40653 godbless
40654 33k
40655 votes
40656 bonney
40657 pearlharbor
40658 cleric
40659 pals
40660 expose
40661 pratt
40662

41264 psd
41265 whyiresist
41266 busyness
41267 turku
41268 jannawilkinso69
41269 syriastrike
41270 greenwood
41271 halve
41272 beachykate69
41273 indefensible
41274 sugarcoat
41275 dts
41276 iamwithher
41277 psychiatric
41278 amoral
41279 prayfororlando
41280 noroviru
41281 scarborough
41282 historic
41283 remembranceproject
41284 tribal
41285 warn
41286 prayforgermany
41287 kristol
41288 fillon
41289 ostensibly
41290 paris
41291 pamela
41292 implants
41293 tcf
41294 imams
41295 whywemustimpeachtrumpin7word
41296 pantsuit
41297 tur
41298 mumford
41299 takedown
41300 sioa
41301 atv
41302 cui
41303 rifle
41304 renounce
41305 leasing
41306 carlson
41307 coroner
41308 120k
41309 binks
41310 feds
41311 84b
41312 aetna
41313 illogical
41314 shepardsmith
41315 testifies
41316 preet
41317 grownup
41318 oui
41319 barbies
41320 santos
41321 cvilleaug12
41322 reentry
41323 hesitation
41324 hypervisibility
41325 edmonds
41326 deflated
41327 kidney
41328 madie
41329 gunbattle
41330 gertrude
41331 

41856 foxwood
41857 horseman
41858 mexicans
41859 gyal
41860 viet
41861 exhilarate
41862 unresponsive
41863 hupp
41864 starboard
41865 termanology
41866 profanity
41867 garrison
41868 lackluster
41869 walshfreedom
41870 degrom
41871 bongino
41872 unconditionally
41873 suthenboy1
41874 cristinalaila1
41875 julien_assange
41876 nutcracker
41877 rainfall
41878 insecure
41879 paulheymanprincess
41880 vernia
41881 tightness
41882 describes
41883 reviving
41884 chelseaclinton
41885 ugliness
41886 sartre
41887 j1
41888 mattingly
41889 jihadwatchrs
41890 1972
41891 liking
41892 1936
41893 failures
41894 totalitarian
41895 investigate
41896 stokes
41897 regan
41898 bw
41899 chauffeur
41900 cancellation
41901 mull
41902 drug
41903 bosh
41904 pilots
41905 rapport
41906 gettysburg
41907 canberra
41908 sont
41909 imprison
41910 vatican
41911 costolo
41912 furious
41913 ceasefire
41914 ilk
41915 inquirer
41916 vicinity
41917 norfolk
41918 kale
41919 baidu
41920 surefire
41921 caddell
41922 fishery
4

42855 malo
42856 purposely
42857 trustworthy
42858 jumping
42859 porte
42860 opresse
42861 americanpravda
42862 harbinger
42863 quincy
42864 clintonswalk
42865 rewatch
42866 crackdown
42867 ʷʰʸ
42868 europeans
42869 heartless
42870 noamnesty
42871 uva
42872 onu
42873 stockholm
42874 stopgap
42875 dacula
42876 boudreau
42877 genetically
42878 launder
42879 disruption
42880 fatality
42881 javier
42882 sickout
42883 sabotage
42884 merciless
42885 seatac
42886 neverhillaryortrump
42887 tuscon
42888 realtrumpmafia
42889 sexism
42890 lagarde
42891 impotent
42892 hillarybecause
42893 noose
42894 muhammad
42895 usrc
42896 plata
42897 france
42898 _fisher
42899 idris
42900 hitmaker
42901 inda
42902 convention
42903 psy
42904 chrisrock
42905 phthalate
42906 iis
42907 phylicia
42908 vandal
42909 angler
42910 barcelona
42911 flooded
42912 presidentialdebate
42913 traditionally
42914 subban
42915 hashtagge
42916 hr676
42917 lynx
42918 electrolux
42919 alpine
42920 sainte
42921 keepgoing
42922 elwha

43790 americandream
43791 tussauds
43792 hacks
43793 firefighter
43794 capitulation
43795 tornado
43796 breakingnews
43797 arabmtr
43798 sinai
43799 2048
43800 nationaldogday
43801 latly
43802 spacesuit
43803 imwithjill
43804 saban
43805 407
43806 shepersisted
43807 eritrean
43808 ucberkeley
43809 mois
43810 raucous
43811 palestinians
43812 militarized
43813 durst
43814 biv
43815 hein
43816 munich
43817 kuan
43818 harrypotter
43819 subsidiary
43820 idiotic
43821 sassygayrepub
43822 highlighting
43823 stabs
43824 malik
43825 barbra
43826 spill
43827 4x100
43828 bellevue
43829 wolfblitzer
43830 prayforbrussels
43831 drastic
43832 saline
43833 youngblood
43834 scorch
43835 bargains
43836 7777
43837 unspecified
43838 truestory
43839 50nsexy2014
43840 lied
43841 heartbreake
43842 newjimcrow
43843 tbilisi
43844 mcing
43845 abuser
43846 pink_about_it
43847 nickle
43848 idealist
43849 francois
43850 covefefecreed
43851 subpoenas
43852 shep
43853 justiceforjuanita
43854 ga06
43855 prayformunich

44692 islamicstate
44693 kneel
44694 dakar
44695 michaelkoziol
44696 americanism
44697 tipster
44698 happybirthday
44699 wahhabism
44700 veryfakenew
44701 blinken
44702 pamela_moore13
44703 vaquita
44704 raines
44705 pennyhicks13
44706 forecaster
44707 carjacking
44708 swastikas
44709 astronaut
44710 disband
44711 including
44712 twp
44713 subvert
44714 rose4austin2018
44715 tuesdaythought
44716 devilish
44717 airasia
44718 eke
44719 whitesupremacists
44720 destroy
44721 flushings
44722 misconduct
44723 kermit
44724 amass
44725 splc
44726 parliamentarian
44727 arguments
44728 syd
44729 womenwhovotetrump
44730 pastormarkburn
44731 mccain
44732 higgins
44733 freeform
44734 halt
44735 heytammybruce
44736 1905
44737 hashtagger
44738 mouse458
44739 deceive
44740 therealdonaldtrump
44741 waaah
44742 teamyoutube
44743 protectourcare
44744 obliterates
44745 harassment
44746 alsen
44747 biggovsuck
44748 microcephaly
44749 charolettesville
44750 mindfulness
44751 bestusatoday
44752 devoid
44753 

45229 caress
45230 centene
45231 abi
45232 dalton
45233 phobia
45234 recall
45235 1d
45236 indieadvancement
45237 embroil
45238 overturn
45239 honoree
45240 tfb
45241 ledecky
45242 intoxicate
45243 lauer
45244 cherokee
45245 infraction
45246 10kindredspirit
45247 coffe
45248 manger
45249 dems
45250 liamateniallspant
45251 meekmill
45252 grazie
45253 ringling
45254 1890
45255 mainstreammedia
45256 hefner
45257 gemmar333
45258 blackfish
45259 seguin
45260 turbulence
45261 bioengineere
45262 gendere
45263 nyprisonbreak
45264 gard
45265 ballwin
45266 lewandowski
45267 recount
45268 forgettin
45269 qui
45270 bonanza
45271 frenchelection
45272 adblock
45273 nationalsuperheroday
45274 muslimwomensday
45275 berkeleyprotest
45276 indoctrination
45277 keithlamontscott
45278 adriss
45279 privilege
45280 istandwithahme
45281 ejhirschberger
45282 valeant
45283 bodycamera
45284 gender
45285 blackwomendidthat
45286 previewing
45287 hanover
45288 correa
45289 abiss
45290 fogle
45291 shakeup
45292 inte

46141 chicagomusic
46142 hillarysmigrant
46143 353
46144 redmond
46145 peoplearetiredof
46146 isabelle
46147 justice4jamar
46148 larrynation
46149 aldi
46150 extensively
46151 handmaid
46152 medicaid
46153 notmysuperbowlchamps
46154 bolling
46155 paulding
46156 fuckislam
46157 2give
46158 stammer
46159 thread
46160 marksimoneny
46161 malfunction
46162 songsthataretooweak
46163 agendaofevil
46164 supremecourt
46165 kurds
46166 polution
46167 arrest
46168 miketokes
46169 syriahoax
46170 hydrangea
46171 parliament
46172 boogs
46173 obstruction
46174 bfg
46175 check1showmixsession
46176 djshinner
46177 staffer
46178 warriors
46179 rowling
46180 clintonscandals
46181 muzikkzone
46182 visualhaikus
46183 jonbenet
46184 venditte
46185 murica
46186 luvvie
46187 blackfolk
46188 gosnell
46189 karabo
46190 burglars
46191 horowitz39
46192 zootopia
46193 feminismiscancer
46194 presstv
46195 djokovic
46196 upbringing
46197 hijabs
46198 unesco
46199 scum
46200 ernestine
46201 uw
46202 spitter
46203 pr

46991 lynnepatton
46992 steinbaraka
46993 comermd
46994 machado
46995 deniers
46996 lent
46997 mosul
46998 hetero
46999 makeamovielessromantic
47000 blackhawks
47001 overwatch
47002 inconsistency
47003 thetrumpbureau
47004 savagery
47005 blot
47006 hahn
47007 salling
47008 pusha
47009 whytrumpcanceledrallie
47010 cheryl_kernot
47011 lennaleprena
47012 lillard
47013 tonyabbottmhr
47014 newsalert
47015 neutralize
47016 geils
47017 redress
47018 renton
47019 repcohen
47020 demagoguery
47021 howtoconfuseamillennial
47022 mateen
47023 saveaca
47024 sharknado
47025 brookhaven
47026 spon
47027 vigil
47028 parkinson
47029 obamaday
47030 ajamubaraka
47031 rower
47032 rand
47033 protector
47034 feminism
47035 fatally
47036 worldmentalhealthday
47037 grieving
47038 jbwredsox
47039 _proud_american
47040 hrtablaze
47041 faithgoldy
47042 tulsigabbard
47043 colerain
47044 foodpoisone
47045 whatihatein5words
47046 basta
47047 baltimoreriot
47048 doj
47049 truthfeednews
47050 vimpelcom
47051 winburn
47

47924 pewdiepie
47925 trumpinpicture
47926 stacylstile
47927 stooge
47928 asinine
47929 signsyoureaberniesupporter
47930 trumpster
47931 rhoa
47932 millennial
47933 iwaswinninguntil
47934 bankroll
47935 ruinawestern
47936 droit
47937 fracke
47938 tuckercarlson
47939 musingsfrombarbiesdiary
47940 vfl2013
47941 prisons
47942 daywithoutimmigrant
47943 yordano
47944 starboy
47945 apd
47946 weinstein
47947 lyles
47948 godisgreat
47949 goold
47950 hyperloop
47951 politicize
47952 addabandtoatvshow
47953 ancestry
47954 getongab
47955 emmett
47956 whitneychirps
47957 ojsimpsonparole
47958 opec
47959 gamerlifein4word
47960 badjudgmentin5word
47961 happyindependenceday
47962 skyrider4438
47963 famouscreatures
47964 debates2016
47965 jailer
47966 mangrove
47967 carti
47968 fitzhunter
47969 moab
47970 racheldolezal
47971 westboro
47972 bertha
47973 detroiters
47974 mama_byll
47975 science
47976 beholden
47977 ivanka
47978 nigel_farage
47979 samdubose
47980 derp
47981 umoja
47982 zuckerberg
47983 s

48568 fellon
48569 letitplay
48570 thelastrefuge2
48571 snowflake
48572 walterscott
48573 tremz
48574 idolssa
48575 dab
48576 hollande
48577 acapa
48578 vape
48579 kraemer
48580 frfr
48581 thexfile
48582 fgm
48583 6s
48584 lamesupergroup
48585 _babyblue13
48586 deray
48587 wishiknewhowtoquit
48588 lvsoul
48589 anisiss
48590 gdot
48591 thanksobama
48592 whyitweetin5word
48593 amike4761
48594 unm
48595 insectshowsormovie
48596 tvshow
48597 parisdennard
48598 ifs
48599 listeria
48600 flintwatercrisis
48601 goducks
48602 barksdale
48603 jarreau
48604 theperfectfood
48605 0hour1
48606 windows10
48607 angrygranny1
48608 heartedness
48609 theolderweget
48610 ambientuxr
48611 takeasongfishe
48612 andris
48613 drjillstein
48614 vaccineswork
48615 1966magazine
48616 fencesmovie
48617 wethepeople
48618 my2wordnightmare
48619 lepage
48620 nationalfreedomday
48621 oscarformuhammad
48622 mccarron
48623 medicare4all
48624 cavuto
48625 9thwonder
48626 trish_regan
48627 bakedalaska
48628 llike
48629 a3

49189 bigdata
49190 getitlive
49191 andthatshowiwentviral
49192 genres
49193 showuswhatyougot
49194 cuomo
49195 _youhadonejob
49196 hastert
49197 islamic
49198 barnaby
49199 spieth
49200 thingsiwontbelieve
49201 clinton
49202 turnbullmalcolm
49203 hipstercrime
49204 basedmonitored
49205 undergroundwgn
49206 blackart
49207 hillarysoold
49208 islam
49209 foxnews
49210 renzi
49211 tooteedup
49212 blewuplikeceelosphone
49213 buzzfeed
49214 cruz
49215 hillaryforprison2016
49216 trumper
49217 christichat
49218 w_terrence
49219 weedtv
49220 impeachtrump
49221 epochtech
49222 americanhotlips
49223 makemeabenzac
49224 thisweeksgonnarockbecause
49225 henrico
49226 dtmag
49227 indiedev
49228 tpp
49229 shotsfired
49230 dejong
49231 rtifyouare
49232 bogardthat
49233 gorka
49234 chapo
49235 muhammadali
49236 johntdolan
49237 för
49238 trafficasong
49239 waynesboro
49240 quoteoftheday
49241 cruzcrew
49242 hijab
49243 xavier
49244 thursdaythought
49245 trumptrain45pac
49246 snowden
49247 deplorables
4

49804 spicer
49805 superherowedeserve
49806 aspresidentiwill
49807 wasteamillionin3word
49808 iwouldprefertoforget
49809 mikecarlton01
49810 2a
49811 mustbebanne
49812 valentinesdayin3word
49813 nativeamerican
49814 theresamechele
49815 potus
49816 transgender
49817 theresistance
49818 dumbgeniewishe
49819 uniteblue
49820 myolympicsportwouldbe
49821 thefirst100daystodolist
49822 stonedcomicbooks
49823 addpotinamovie
49824 mentalillnessicebreaker
49825 beingblackis
49826 3rdeyeplug
49827 spinrilla
49828 hipsterschoolsupplieslist
49829 mitchellvii
49830 maketvshowsaustralian
49831 japanamovie
49832 listenlive
49833 1000network
49834 itstimetopanicwhen
49835 hurricanematthew
49836 policebrutality
49837 icantevennameone
49838 12news
49839 mychildhoodhome
49840 writeapanel
49841 environment
49842 ifihadabodydouble
49843 thingspeopleontwitterlike
49844 listen2
49845 moderndeadlysin
49846 addamovieruinamovie
49847 bgn
49848 blackhistorymonth
49849 ilove__butihate
49850 improtestingbecause
498

In [43]:
voc = list(tfidf_vectorizer.vocabulary_)

8416

In [101]:
#person, gpe, org, product
et={}
for t in troll_tweets:
    ents = t.split('\t')[-1]
    if not ents:
        continue
    for ent in ents.split(' '):
        if ent=='#:CARDINAL':
            continue
        typ = ent.split(':')[-1]
        if not typ:
            continue
        if typ[0]=='#':
            continue
        if '\xa0' in typ:
            continue
        if typ not in et:
            et[typ]=0
        et[typ]+=1
et

{'CARDINAL': 303138,
 'DATE': 207540,
 'EVENT': 12242,
 'FAC': 25103,
 'GPE': 554369,
 'LANGUAGE': 1481,
 'LAW': 5641,
 'LOC': 28401,
 'MONEY': 204241,
 'NORP': 185740,
 'ORDINAL': 28766,
 'ORG': 988345,
 'PERCENT': 19105,
 'PERSON': 901969,
 'PRODUCT': 60588,
 'QUANTITY': 7449,
 'TIME': 29112,
 'WORK_OF_ART': 85598,
 'Wher_\x85': 1}

In [76]:
tp,txt,toks,lems,pos,phrs,ents = troll_tweets[3].split('\t')
toks, lems

('JUST IN : President Trump dedicates Presidents Cup golf tournament trophy to the people of Florida , Texas and Puerto Rico . <LINK>',
 'just in : President Trump dedicate Presidents Cup golf tournament trophy to the people of Florida , Texas and Puerto Rico . <LINK>')

In [80]:
def get_in_voc_ratio(lemma, voc):
    lems = []
    lemma = re.sub('< ?emoji ?>', '', lemma)
    lemma = lemma.replace('# ','#').replace('><','> <')
    for l in lemma.split(' '):
        if not l.strip():
            continue
        if l in special_tags:
            continue
        if l in punct:
            continue
        l=l.lower()
        if l[0]=='#' or l[0]=="'":
            continue
        lems.append(l)
    return [l for l in lems if l not in voc]

In [77]:
toks, lemmas, pos, phrases, ents = tokenize_text(txt, get_lemmas=True, get_pos=True, get_phrases=True, get_ents=True)
lemmas

'just in : President Trump dedicate president Cup golf tournament trophy to the people of Florida , Texas and Puerto Rico . < LINK >'

In [81]:
for i in range(30):
    typ,txt,toks,lemma,pos,phrs,ents = troll_tweets[i].split('\t')
    print(lemma, '\n', get_in_voc_ratio(lemma, voc))

" we have a sit Democrat US Senator on trial for corruption and you 've barely hear a peep from the mainstream medium . " ~ <USER> <LINK> 
 []
Marshawn Lynch arrive to game in anti - trump shirt . judge by his sag pant the shirt should say Lynch vs. belt <LINK> 
 ['marshawn', 'vs.']
Daughter of fall Navy Sailor deliver powerful monologue on anthem protest , burn her NFL packer gear . # boycottnfl <LINK> 
 ['nfl']
just in : President Trump dedicate Presidents Cup golf tournament trophy to the people of Florida , Texas and Puerto Rico . <LINK> 
 ['presidents', 'puerto']
19,000 respect our National Anthem ! # standforouranthem 🇺 🇸 <LINK> 
 ['19,000', '🇺', '🇸']
Dan Bongino : " nobody troll liberal well than Donald Trump . " exactly ! <LINK> 
 ['bongino', 'donald']
< emoji><emoji><emoji > <LINK> 
 []
' <USER> <USER> do not matter that CNN do not report on your crime . this will not change the fact that you be go down . ' 
 ['cnn']
as much as I hate promote CNN article , here they be admit e

In [16]:
# do ngram frequency analysis

In [20]:
#emojis used:
#❤️💙💜♥️⌚️🎉💥🔥♀️💯🚨⚡️🌲🙀👹🍄🌸🌻🌺🌼🌹❄️⛄️🐝🐟🐀🦌🦅🐘🦄🗽
#🙏👊👉👍👌☝️✌👋👇👏💁🏼🤷🏼🙋🚶
#😎😱😨😞☹😳😭😮😡😢😊😂🤣😅🤔🙄😬😁😖🤕😤😜
#👀⇩🎥🎣☠️💀🎶🚂👂3️⃣⏪✅➡️🔁⬇️
#✔️✖️⭕🇺🇸⚔️💣📜🎯✨☕️⚓️�⏱📖📚✪✦🎙