In [1]:
!pip install elasticsearch



In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import re
import string
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
import os
from subprocess import Popen, PIPE, STDOUT
from elasticsearch import Elasticsearch
import elasticsearch.helpers

from wordcloud import WordCloud
import matplotlib.pyplot as plt
from transformers import pipeline

In [3]:
#Setting up ElasticSearch

!wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.0.0-linux-x86_64.tar.gz -q
!tar -xzf elasticsearch-7.0.0-linux-x86_64.tar.gz
!chown -R daemon:daemon elasticsearch-7.0.0
es_server = Popen(['elasticsearch-7.0.0/bin/elasticsearch'], 
                  stdout=PIPE, stderr=STDOUT,
                  preexec_fn=lambda: os.setuid(1)  # as daemon
                 )
!curl -X GET "localhost:9200/"
es = Elasticsearch()
es.ping()

{
  "name" : "4366206183fe",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "J6CD9NBiRaSfUy4zT-FDbA",
  "version" : {
    "number" : "7.0.0",
    "build_flavor" : "default",
    "build_type" : "tar",
    "build_hash" : "b7e28a7",
    "build_date" : "2019-04-05T22:55:32.697037Z",
    "build_snapshot" : false,
    "lucene_version" : "8.0.0",
    "minimum_wire_compatibility_version" : "6.7.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


True

In [4]:
def read_data(source_file):
    
    data = pd.read_csv(source_file,index_col=0)
    data = data.fillna(0)
    
    return data

In [5]:
def insert_data(data, es, this_index):

    for row_id, row in data.iterrows():

        doc = row.to_dict()
        es.index(index= this_index, id= row_id, body= doc)
    

In [12]:
def scan_full_database(es, this_index):
    
    documents = elasticsearch.helpers.scan(es,
        index= this_index,
        preserve_order=True,
        query={"query": {"match_all": {}}},
    )
    
    tweets = []
    
    for document in documents:
        cleaned_tweet = clean_text(document["_source"]["tweet"])
        tweets.append(cleaned_tweet)
    
    return tweets


In [5]:
def clean_text(text):
    
    text = re.sub('https?://\S+|www\.\S+', '', text) #Remove URL
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text) #Remove punctuation
    
    return text

In [6]:
def NER_model(text, ner):
    
    #ner = pipeline('ner', grouped_entities=True)
    output = ner(text)
    
    entities = []
    print(output)
    for instance in output:
        entity_tuple = (instance['entity_group'], instance['word'])
        entities.append(entity_tuple)
    
    print(entities)
    return entities
    #doc = nlp(text)
    #return doc.ents

In [16]:

def get_entities (tweets, ner):
    
    entity_dictionary = {}
    
    for tweet in tweets:
        
        entities = NER_model(tweet, ner)
        
        for entity in entities:
            
            if (entity[0] not in entity_dictionary):
                entity_dictionary[entity[0]] = {}
                
            if (entity[1] not in entity_dictionary[entity[0]]):
                entity_dictionary[entity[0]][entity[1]] = 0
                
            entity_dictionary[entity[0]][entity[1]] += 1
            
    return entity_dictionary
        

In [20]:
def show_wordcloud(dictionary):
    wordcloud = WordCloud(max_words=50,normalize_plurals=False,background_color='white').generate_from_frequencies(dictionary)

    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

In [12]:
data = read_data("/kaggle/input/all-trumps-twitter-insults-20152021/trump_insult_tweets_2014_to_2021.csv")
insert_data(data, es, "trump_tweets")


In [13]:
tweets = scan_full_database(es, "trump_tweets")

In [22]:
ner = pipeline('ner', grouped_entities=True)
entity_dictionary = get_entities(tweets[:1000], ner)

[{'entity_group': 'PER', 'score': 0.9986373037099838, 'word': 'Thomas Frieden', 'start': 29, 'end': 43}, {'entity_group': 'ORG', 'score': 0.9980486333370209, 'word': 'CDC', 'start': 47, 'end': 50}, {'entity_group': 'LOC', 'score': 0.971662312746048, 'word': 'West Africa', 'start': 118, 'end': 129}]
[('PER', 'Thomas Frieden'), ('ORG', 'CDC'), ('LOC', 'West Africa')]
[{'entity_group': 'PER', 'score': 0.9986373037099838, 'word': 'Thomas Frieden', 'start': 29, 'end': 43}, {'entity_group': 'ORG', 'score': 0.9980486333370209, 'word': 'CDC', 'start': 47, 'end': 50}, {'entity_group': 'LOC', 'score': 0.971662312746048, 'word': 'West Africa', 'start': 118, 'end': 129}]
[('PER', 'Thomas Frieden'), ('ORG', 'CDC'), ('LOC', 'West Africa')]
[{'entity_group': 'LOC', 'score': 0.9991544485092163, 'word': 'US', 'start': 12, 'end': 14}, {'entity_group': 'MISC', 'score': 0.9249923676252365, 'word': 'AMERICA', 'start': 27, 'end': 34}]
[('LOC', 'US'), ('MISC', 'AMERICA')]
[{'entity_group': 'PER', 'score': 0.

[{'entity_group': 'ORG', 'score': 0.9219020009040833, 'word': 'NYD', 'start': 0, 'end': 3}, {'entity_group': 'ORG', 'score': 0.7011495232582092, 'word': '##yN', 'start': 6, 'end': 8}, {'entity_group': 'PER', 'score': 0.9908110976219178, 'word': 'Mort Zuckerman', 'start': 51, 'end': 65}]
[('ORG', 'NYD'), ('ORG', '##yN'), ('PER', 'Mort Zuckerman')]
[{'entity_group': 'LOC', 'score': 0.9997247159481049, 'word': 'Saudi Arabia', 'start': 0, 'end': 12}, {'entity_group': 'LOC', 'score': 0.999638706445694, 'word': 'United States', 'start': 34, 'end': 47}, {'entity_group': 'ORG', 'score': 0.8380170265833536, 'word': 'AlWale', 'start': 113, 'end': 119}, {'entity_group': 'ORG', 'score': 0.5404005646705627, 'word': '##bin', 'start': 121, 'end': 124}]
[('LOC', 'Saudi Arabia'), ('LOC', 'United States'), ('ORG', 'AlWale'), ('ORG', '##bin')]
[{'entity_group': 'MISC', 'score': 0.9987146258354187, 'word': 'Mexican', 'start': 11, 'end': 18}, {'entity_group': 'LOC', 'score': 0.9997982978820801, 'word': 'Me

[{'entity_group': 'ORG', 'score': 0.8876114785671234, 'word': 'Macys', 'start': 47, 'end': 52}]
[('ORG', 'Macys')]
[{'entity_group': 'ORG', 'score': 0.9981266260147095, 'word': 'Univision', 'start': 34, 'end': 43}]
[('ORG', 'Univision')]
[{'entity_group': 'ORG', 'score': 0.9981266260147095, 'word': 'Univision', 'start': 34, 'end': 43}]
[('ORG', 'Univision')]
[{'entity_group': 'ORG', 'score': 0.9981266260147095, 'word': 'Univision', 'start': 34, 'end': 43}]
[('ORG', 'Univision')]
[{'entity_group': 'LOC', 'score': 0.7186957597732544, 'word': 'Mexico', 'start': 0, 'end': 6}, {'entity_group': 'LOC', 'score': 0.9995744824409485, 'word': 'USA', 'start': 72, 'end': 75}]
[('LOC', 'Mexico'), ('LOC', 'USA')]
[{'entity_group': 'PER', 'score': 0.5581575036048889, 'word': 'Chuck', 'start': 11, 'end': 16}, {'entity_group': 'ORG', 'score': 0.4356880486011505, 'word': '##T', 'start': 16, 'end': 17}, {'entity_group': 'ORG', 'score': 0.9460375110308329, 'word': 'Meet The Press', 'start': 32, 'end': 46}]

[{'entity_group': 'PER', 'score': 0.8798784464597702, 'word': 'Lawrence O ’ Donnell', 'start': 6, 'end': 24}, {'entity_group': 'MISC', 'score': 0.8015155792236328, 'word': 'A', 'start': 85, 'end': 86}, {'entity_group': 'MISC', 'score': 0.9540492296218872, 'word': '##ice', 'start': 92, 'end': 95}]
[('PER', 'Lawrence O ’ Donnell'), ('MISC', 'A'), ('MISC', '##ice')]
[{'entity_group': 'LOC', 'score': 0.9995904266834259, 'word': 'Las Vegas', 'start': 18, 'end': 27}, {'entity_group': 'ORG', 'score': 0.8546401560306549, 'word': 'Hokey', 'start': 58, 'end': 63}, {'entity_group': 'LOC', 'score': 0.9820648729801178, 'word': 'New York', 'start': 72, 'end': 80}]
[('LOC', 'Las Vegas'), ('ORG', 'Hokey'), ('LOC', 'New York')]
[{'entity_group': 'LOC', 'score': 0.9995904266834259, 'word': 'Las Vegas', 'start': 18, 'end': 27}, {'entity_group': 'ORG', 'score': 0.8546401560306549, 'word': 'Hokey', 'start': 58, 'end': 63}, {'entity_group': 'LOC', 'score': 0.9820648729801178, 'word': 'New York', 'start': 72

[{'entity_group': 'ORG', 'score': 0.924489608832768, 'word': 'BreitbartNews', 'start': 0, 'end': 13}, {'entity_group': 'PER', 'score': 0.9937971035639445, 'word': 'Steve Bannon', 'start': 72, 'end': 84}]
[('ORG', 'BreitbartNews'), ('PER', 'Steve Bannon')]
[{'entity_group': 'ORG', 'score': 0.9995632767677307, 'word': 'Des Moines Register', 'start': 27, 'end': 46}, {'entity_group': 'LOC', 'score': 0.9993723034858704, 'word': 'Iowa', 'start': 92, 'end': 96}]
[('ORG', 'Des Moines Register'), ('LOC', 'Iowa')]
[{'entity_group': 'ORG', 'score': 0.9995632767677307, 'word': 'Des Moines Register', 'start': 27, 'end': 46}, {'entity_group': 'LOC', 'score': 0.9993723034858704, 'word': 'Iowa', 'start': 92, 'end': 96}]
[('ORG', 'Des Moines Register'), ('LOC', 'Iowa')]
[{'entity_group': 'ORG', 'score': 0.9995632767677307, 'word': 'Des Moines Register', 'start': 27, 'end': 46}, {'entity_group': 'LOC', 'score': 0.9993723034858704, 'word': 'Iowa', 'start': 92, 'end': 96}]
[('ORG', 'Des Moines Register'),

[{'entity_group': 'ORG', 'score': 0.9316488981246949, 'word': 'CNN CNNPolitics', 'start': 0, 'end': 16}, {'entity_group': 'PER', 'score': 0.9990625381469727, 'word': 'Beck', 'start': 41, 'end': 45}]
[('ORG', 'CNN CNNPolitics'), ('PER', 'Beck')]
[{'entity_group': 'PER', 'score': 0.9995920956134796, 'word': 'Elizabeth Beck', 'start': 7, 'end': 21}]
[('PER', 'Elizabeth Beck')]
[{'entity_group': 'ORG', 'score': 0.8953560789426168, 'word': 'CNN CNNP', 'start': 0, 'end': 9}, {'entity_group': 'PER', 'score': 0.9993744790554047, 'word': 'Elizabeth Beck', 'start': 24, 'end': 38}]
[('ORG', 'CNN CNNP'), ('PER', 'Elizabeth Beck')]
[{'entity_group': 'ORG', 'score': 0.8953560789426168, 'word': 'CNN CNNP', 'start': 0, 'end': 9}, {'entity_group': 'PER', 'score': 0.9993744790554047, 'word': 'Elizabeth Beck', 'start': 24, 'end': 38}]
[('ORG', 'CNN CNNP'), ('PER', 'Elizabeth Beck')]
[{'entity_group': 'ORG', 'score': 0.9963788390159607, 'word': 'CNN', 'start': 0, 'end': 3}, {'entity_group': 'PER', 'score'

[{'entity_group': 'ORG', 'score': 0.5534701943397522, 'word': 'RedState', 'start': 16, 'end': 24}, {'entity_group': 'LOC', 'score': 0.9985686540603638, 'word': 'Atlanta', 'start': 34, 'end': 41}, {'entity_group': 'PER', 'score': 0.9909874081611634, 'word': 'EWErickson', 'start': 68, 'end': 78}]
[('ORG', 'RedState'), ('LOC', 'Atlanta'), ('PER', 'EWErickson')]
[{'entity_group': 'PER', 'score': 0.990967407822609, 'word': 'Carly Fiorina', 'start': 38, 'end': 51}]
[('PER', 'Carly Fiorina')]
[{'entity_group': 'PER', 'score': 0.990967407822609, 'word': 'Carly Fiorina', 'start': 38, 'end': 51}]
[('PER', 'Carly Fiorina')]
[{'entity_group': 'ORG', 'score': 0.991699238618215, 'word': 'FoxNews', 'start': 70, 'end': 77}]
[('ORG', 'FoxNews')]
[]
[]
[]
[]
[]
[]
[{'entity_group': 'PER', 'score': 0.9996204376220703, 'word': 'Rand Paul', 'start': 20, 'end': 29}, {'entity_group': 'LOC', 'score': 0.9997451901435852, 'word': 'Kentucky', 'start': 33, 'end': 41}, {'entity_group': 'MISC', 'score': 0.547936260

[{'entity_group': 'MISC', 'score': 0.7252095937728882, 'word': 'Poll', 'start': 6, 'end': 10}, {'entity_group': 'PER', 'score': 0.9962659080823263, 'word': 'Karl Rove', 'start': 52, 'end': 61}, {'entity_group': 'PER', 'score': 0.9907602071762085, 'word': 'Trump', 'start': 64, 'end': 69}, {'entity_group': 'ORG', 'score': 0.9286031524340311, 'word': 'FoxNews', 'start': 125, 'end': 132}]
[('MISC', 'Poll'), ('PER', 'Karl Rove'), ('PER', 'Trump'), ('ORG', 'FoxNews')]
[{'entity_group': 'LOC', 'score': 0.9998031854629517, 'word': 'Iran', 'start': 21, 'end': 25}]
[('LOC', 'Iran')]
[{'entity_group': 'LOC', 'score': 0.9998294115066528, 'word': 'Iran', 'start': 14, 'end': 18}, {'entity_group': 'LOC', 'score': 0.9991642832756042, 'word': 'US', 'start': 77, 'end': 79}]
[('LOC', 'Iran'), ('LOC', 'US')]
[{'entity_group': 'PER', 'score': 0.9942223230997721, 'word': 'MeghanMcCain', 'start': 0, 'end': 12}, {'entity_group': 'ORG', 'score': 0.8156647483507792, 'word': 'TheFive', 'start': 29, 'end': 36}, {

[{'entity_group': 'PER', 'score': 0.9985437989234924, 'word': 'George Pataki', 'start': 20, 'end': 33}, {'entity_group': 'LOC', 'score': 0.9847272038459778, 'word': 'NY', 'start': 72, 'end': 74}, {'entity_group': 'ORG', 'score': 0.9769161740938822, 'word': 'ZERO', 'start': 89, 'end': 93}]
[('PER', 'George Pataki'), ('LOC', 'NY'), ('ORG', 'ZERO')]
[{'entity_group': 'PER', 'score': 0.9985437989234924, 'word': 'George Pataki', 'start': 20, 'end': 33}, {'entity_group': 'LOC', 'score': 0.9847272038459778, 'word': 'NY', 'start': 72, 'end': 74}, {'entity_group': 'ORG', 'score': 0.9769161740938822, 'word': 'ZERO', 'start': 89, 'end': 93}]
[('PER', 'George Pataki'), ('LOC', 'NY'), ('ORG', 'ZERO')]
[{'entity_group': 'ORG', 'score': 0.9994430343310038, 'word': 'Club For Growth', 'start': 17, 'end': 32}]
[('ORG', 'Club For Growth')]
[{'entity_group': 'ORG', 'score': 0.6140052378177643, 'word': '##4gro', 'start': 66, 'end': 70}]
[('ORG', '##4gro')]
[{'entity_group': 'ORG', 'score': 0.67549202839533

[{'entity_group': 'PER', 'score': 0.9979563057422638, 'word': 'Marco Rubio', 'start': 20, 'end': 31}]
[('PER', 'Marco Rubio')]
[{'entity_group': 'PER', 'score': 0.9969336092472076, 'word': 'Marco Rubio', 'start': 0, 'end': 11}, {'entity_group': 'ORG', 'score': 0.9972898562749227, 'word': 'Gang Of Eight', 'start': 31, 'end': 44}]
[('PER', 'Marco Rubio'), ('ORG', 'Gang Of Eight')]
[{'entity_group': 'PER', 'score': 0.9983814756075541, 'word': 'Jeb Bush', 'start': 13, 'end': 21}, {'entity_group': 'ORG', 'score': 0.7008781433105469, 'word': 'Florida State', 'start': 30, 'end': 43}]
[('PER', 'Jeb Bush'), ('ORG', 'Florida State')]
[{'entity_group': 'PER', 'score': 0.9978453814983368, 'word': 'Marco Rubio', 'start': 30, 'end': 41}]
[('PER', 'Marco Rubio')]
[{'entity_group': 'PER', 'score': 0.9806448022524515, 'word': 'Rubio', 'start': 0, 'end': 5}, {'entity_group': 'ORG', 'score': 0.6519541442394257, 'word': 'Senate Lazy', 'start': 116, 'end': 127}]
[('PER', 'Rubio'), ('ORG', 'Senate Lazy')]
[

[{'entity_group': 'PER', 'score': 0.9295819997787476, 'word': 'OMalley', 'start': 0, 'end': 7}, {'entity_group': 'LOC', 'score': 0.9621219038963318, 'word': 'Baltimore', 'start': 27, 'end': 36}]
[('PER', 'OMalley'), ('LOC', 'Baltimore')]
[{'entity_group': 'ORG', 'score': 0.6873825788497925, 'word': 'STAR', 'start': 18, 'end': 22}]
[('ORG', 'STAR')]
[{'entity_group': 'PER', 'score': 0.9981380701065063, 'word': 'Hillary', 'start': 16, 'end': 23}]
[('PER', 'Hillary')]
[{'entity_group': 'ORG', 'score': 0.8817958474159241, 'word': 'DemDebate', 'start': 0, 'end': 9}]
[('ORG', 'DemDebate')]
[{'entity_group': 'MISC', 'score': 0.5918017029762268, 'word': 'GO', 'start': 4, 'end': 6}, {'entity_group': 'ORG', 'score': 0.8848487138748169, 'word': '##P', 'start': 6, 'end': 7}, {'entity_group': 'ORG', 'score': 0.9983523488044739, 'word': 'CNBC', 'start': 61, 'end': 65}]
[('MISC', 'GO'), ('ORG', '##P'), ('ORG', 'CNBC')]
[{'entity_group': 'ORG', 'score': 0.9990984797477722, 'word': 'CNBC', 'start': 0, 

[{'entity_group': 'PER', 'score': 0.9958383043607076, 'word': 'Joe Biden', 'start': 8, 'end': 17}, {'entity_group': 'PER', 'score': 0.9980543255805969, 'word': 'Hillary', 'start': 98, 'end': 105}]
[('PER', 'Joe Biden'), ('PER', 'Hillary')]
[{'entity_group': 'PER', 'score': 0.9857868254184723, 'word': 'JebBush', 'start': 0, 'end': 7}, {'entity_group': 'LOC', 'score': 0.939028799533844, 'word': 'Americas', 'start': 118, 'end': 126}]
[('PER', 'JebBush'), ('LOC', 'Americas')]
[{'entity_group': 'PER', 'score': 0.8203157186508179, 'word': 'JebB', 'start': 0, 'end': 4}, {'entity_group': 'ORG', 'score': 0.6021618843078613, 'word': '##ush', 'start': 4, 'end': 7}, {'entity_group': 'PER', 'score': 0.9983617961406708, 'word': 'Tim Scott', 'start': 47, 'end': 56}]
[('PER', 'JebB'), ('ORG', '##ush'), ('PER', 'Tim Scott')]
[{'entity_group': 'PER', 'score': 0.8430923819541931, 'word': 'JebBush', 'start': 0, 'end': 7}]
[('PER', 'JebBush')]
[{'entity_group': 'PER', 'score': 0.9989612897237142, 'word': '

[{'entity_group': 'PER', 'score': 0.9983249306678772, 'word': 'Marco Rubio', 'start': 0, 'end': 11}]
[('PER', 'Marco Rubio')]
[{'entity_group': 'PER', 'score': 0.9987497528394064, 'word': 'Jeb Bush', 'start': 3, 'end': 11}, {'entity_group': 'PER', 'score': 0.9991602301597595, 'word': 'Marco', 'start': 73, 'end': 78}, {'entity_group': 'PER', 'score': 0.9994261264801025, 'word': 'Marco', 'start': 107, 'end': 112}]
[('PER', 'Jeb Bush'), ('PER', 'Marco'), ('PER', 'Marco')]
[{'entity_group': 'LOC', 'score': 0.9997565746307373, 'word': 'Iran', 'start': 9, 'end': 13}, {'entity_group': 'LOC', 'score': 0.9995324015617371, 'word': 'US', 'start': 134, 'end': 136}]
[('LOC', 'Iran'), ('LOC', 'US')]
[{'entity_group': 'ORG', 'score': 0.6671736041704813, 'word': 'MorningJoe', 'start': 0, 'end': 10}, {'entity_group': 'PER', 'score': 0.9179537892341614, 'word': 'Marco', 'start': 11, 'end': 16}, {'entity_group': 'ORG', 'score': 0.9978092908859253, 'word': 'CNBC', 'start': 105, 'end': 109}, {'entity_group

[{'entity_group': 'ORG', 'score': 0.9982844789822897, 'word': 'WSJ', 'start': 16, 'end': 19}]
[('ORG', 'WSJ')]
[{'entity_group': 'PER', 'score': 0.9335300922393799, 'word': 'KarlRove', 'start': 0, 'end': 8}, {'entity_group': 'LOC', 'score': 0.9996979236602783, 'word': 'China', 'start': 56, 'end': 61}, {'entity_group': 'ORG', 'score': 0.9988406598567963, 'word': 'TPP', 'start': 66, 'end': 69}]
[('PER', 'KarlRove'), ('LOC', 'China'), ('ORG', 'TPP')]
[{'entity_group': 'PER', 'score': 0.9335300922393799, 'word': 'KarlRove', 'start': 0, 'end': 8}, {'entity_group': 'LOC', 'score': 0.9996979236602783, 'word': 'China', 'start': 56, 'end': 61}, {'entity_group': 'ORG', 'score': 0.9988406598567963, 'word': 'TPP', 'start': 66, 'end': 69}]
[('PER', 'KarlRove'), ('LOC', 'China'), ('ORG', 'TPP')]
[{'entity_group': 'ORG', 'score': 0.9988752007484436, 'word': 'WSJ', 'start': 21, 'end': 24}, {'entity_group': 'PER', 'score': 0.9886171221733093, 'word': 'Karl', 'start': 72, 'end': 76}, {'entity_group': 'O

[{'entity_group': 'PER', 'score': 0.9877448201179504, 'word': 'RondaRousey', 'start': 17, 'end': 28}]
[('PER', 'RondaRousey')]
[{'entity_group': 'PER', 'score': 0.4223669469356537, 'word': 'John', 'start': 0, 'end': 4}, {'entity_group': 'ORG', 'score': 0.5484541058540344, 'word': '##L', 'start': 4, 'end': 5}, {'entity_group': 'ORG', 'score': 0.5778780281543732, 'word': '##re TM', 'start': 8, 'end': 13}, {'entity_group': 'ORG', 'score': 0.6662691831588745, 'word': '##ile', 'start': 15, 'end': 18}, {'entity_group': 'PER', 'score': 0.8577436804771423, 'word': 'John', 'start': 19, 'end': 23}]
[('PER', 'John'), ('ORG', '##L'), ('ORG', '##re TM'), ('ORG', '##ile'), ('PER', 'John')]
[{'entity_group': 'PER', 'score': 0.4223669469356537, 'word': 'John', 'start': 0, 'end': 4}, {'entity_group': 'ORG', 'score': 0.5484541058540344, 'word': '##L', 'start': 4, 'end': 5}, {'entity_group': 'ORG', 'score': 0.5778780281543732, 'word': '##re TM', 'start': 8, 'end': 13}, {'entity_group': 'ORG', 'score': 0.

[{'entity_group': 'PER', 'score': 0.9917967915534973, 'word': 'BilldeBlasio', 'start': 0, 'end': 12}, {'entity_group': 'LOC', 'score': 0.9560423493385315, 'word': 'NYC', 'start': 37, 'end': 40}]
[('PER', 'BilldeBlasio'), ('LOC', 'NYC')]
[{'entity_group': 'PER', 'score': 0.9911332279443741, 'word': 'HillaryClinton', 'start': 0, 'end': 14}]
[('PER', 'HillaryClinton')]
[{'entity_group': 'PER', 'score': 0.9911332279443741, 'word': 'HillaryClinton', 'start': 0, 'end': 14}]
[('PER', 'HillaryClinton')]
[{'entity_group': 'LOC', 'score': 0.9983012676239014, 'word': 'United States', 'start': 39, 'end': 52}, {'entity_group': 'LOC', 'score': 0.9979079961776733, 'word': 'New York City', 'start': 65, 'end': 78}]
[('LOC', 'United States'), ('LOC', 'New York City')]
[{'entity_group': 'PER', 'score': 0.9990752339363098, 'word': 'Bobby Jindal', 'start': 32, 'end': 44}, {'entity_group': 'LOC', 'score': 0.9996399581432343, 'word': 'New Hampshire', 'start': 71, 'end': 84}]
[('PER', 'Bobby Jindal'), ('LOC',

[{'entity_group': 'PER', 'score': 0.9918591380119324, 'word': 'KarlRove', 'start': 0, 'end': 8}, {'entity_group': 'ORG', 'score': 0.5404210686683655, 'word': '##J', 'start': 39, 'end': 40}, {'entity_group': 'ORG', 'score': 0.9283623099327087, 'word': 'FoxNews', 'start': 43, 'end': 50}, {'entity_group': 'ORG', 'score': 0.9168403148651123, 'word': 'Fox', 'start': 111, 'end': 114}, {'entity_group': 'PER', 'score': 0.9758601784706116, 'word': 'Rove', 'start': 127, 'end': 131}]
[('PER', 'KarlRove'), ('ORG', '##J'), ('ORG', 'FoxNews'), ('ORG', 'Fox'), ('PER', 'Rove')]
[{'entity_group': 'PER', 'score': 0.9940863251686096, 'word': 'Donald Trump', 'start': 120, 'end': 132}]
[('PER', 'Donald Trump')]
[{'entity_group': 'PER', 'score': 0.9940863251686096, 'word': 'Donald Trump', 'start': 120, 'end': 132}]
[('PER', 'Donald Trump')]
[]
[]
[{'entity_group': 'ORG', 'score': 0.997346431016922, 'word': 'Boston Globe', 'start': 36, 'end': 48}]
[('ORG', 'Boston Globe')]
[]
[]
[]
[]
[{'entity_group': 'ORG'

[{'entity_group': 'MISC', 'score': 0.9966243505477905, 'word': 'Republican', 'start': 26, 'end': 36}, {'entity_group': 'PER', 'score': 0.9976932406425476, 'word': 'Cheri Jacobus', 'start': 76, 'end': 89}]
[('MISC', 'Republican'), ('PER', 'Cheri Jacobus')]
[{'entity_group': 'MISC', 'score': 0.9966243505477905, 'word': 'Republican', 'start': 26, 'end': 36}, {'entity_group': 'PER', 'score': 0.9976932406425476, 'word': 'Cheri Jacobus', 'start': 76, 'end': 89}]
[('MISC', 'Republican'), ('PER', 'Cheri Jacobus')]
[{'entity_group': 'MISC', 'score': 0.9966243505477905, 'word': 'Republican', 'start': 26, 'end': 36}, {'entity_group': 'PER', 'score': 0.9976932406425476, 'word': 'Cheri Jacobus', 'start': 76, 'end': 89}]
[('MISC', 'Republican'), ('PER', 'Cheri Jacobus')]
[{'entity_group': 'ORG', 'score': 0.9601701100667318, 'word': 'JRubinBlogger', 'start': 0, 'end': 13}, {'entity_group': 'PER', 'score': 0.9952955842018127, 'word': 'Marco Rubio', 'start': 122, 'end': 133}]
[('ORG', 'JRubinBlogger'),

[{'entity_group': 'LOC', 'score': 0.9974201321601868, 'word': 'NYC', 'start': 0, 'end': 3}, {'entity_group': 'LOC', 'score': 0.9972587823867798, 'word': 'United States', 'start': 31, 'end': 44}]
[('LOC', 'NYC'), ('LOC', 'United States')]
[{'entity_group': 'PER', 'score': 0.9524058699607849, 'word': 'JeffBezos', 'start': 53, 'end': 62}, {'entity_group': 'ORG', 'score': 0.6201647520065308, 'word': '##az', 'start': 125, 'end': 127}]
[('PER', 'JeffBezos'), ('ORG', '##az')]
[{'entity_group': 'PER', 'score': 0.9524058699607849, 'word': 'JeffBezos', 'start': 53, 'end': 62}, {'entity_group': 'ORG', 'score': 0.6201647520065308, 'word': '##az', 'start': 125, 'end': 127}]
[('PER', 'JeffBezos'), ('ORG', '##az')]
[{'entity_group': 'PER', 'score': 0.6975359320640564, 'word': 'JeffB', 'start': 59, 'end': 64}, {'entity_group': 'ORG', 'score': 0.7644479870796204, 'word': '##ezos', 'start': 64, 'end': 68}, {'entity_group': 'MISC', 'score': 0.6037890315055847, 'word': 'Amazon Big', 'start': 110, 'end': 1

[{'entity_group': 'ORG', 'score': 0.7295608719189962, 'word': 'MorningJoe', 'start': 0, 'end': 10}, {'entity_group': 'LOC', 'score': 0.9997265934944153, 'word': 'Iowa', 'start': 24, 'end': 28}, {'entity_group': 'ORG', 'score': 0.9656860828399658, 'word': 'CNN', 'start': 59, 'end': 62}, {'entity_group': 'LOC', 'score': 0.999534547328949, 'word': 'Iowa', 'start': 79, 'end': 83}, {'entity_group': 'PER', 'score': 0.9987623691558838, 'word': 'Hillary', 'start': 96, 'end': 103}]
[('ORG', 'MorningJoe'), ('LOC', 'Iowa'), ('ORG', 'CNN'), ('LOC', 'Iowa'), ('PER', 'Hillary')]
[{'entity_group': 'PER', 'score': 0.5134913921356201, 'word': '##c', 'start': 14, 'end': 15}, {'entity_group': 'ORG', 'score': 0.33408409357070923, 'word': '##ru', 'start': 15, 'end': 17}]
[('PER', '##c'), ('ORG', '##ru')]
[{'entity_group': 'PER', 'score': 0.782337948679924, 'word': 'tedcru', 'start': 0, 'end': 6}]
[('PER', 'tedcru')]
[{'entity_group': 'PER', 'score': 0.9909678101539612, 'word': 'Hillary', 'start': 47, 'end'

[{'entity_group': 'PER', 'score': 0.4192323386669159, 'word': '##nk', 'start': 16, 'end': 18}]
[('PER', '##nk')]
[{'entity_group': 'LOC', 'score': 0.9996609687805176, 'word': 'Iraq', 'start': 8, 'end': 12}, {'entity_group': 'PER', 'score': 0.9082020123799642, 'word': 'KarlRove', 'start': 42, 'end': 50}, {'entity_group': 'PER', 'score': 0.8828257322311401, 'word': 'Georgez', 'start': 59, 'end': 66}, {'entity_group': 'ORG', 'score': 0.5709148049354553, 'word': '##Will', 'start': 66, 'end': 70}, {'entity_group': 'ORG', 'score': 0.9127370913823446, 'word': 'FoxNews', 'start': 112, 'end': 119}]
[('LOC', 'Iraq'), ('PER', 'KarlRove'), ('PER', 'Georgez'), ('ORG', '##Will'), ('ORG', 'FoxNews')]
[{'entity_group': 'LOC', 'score': 0.9996609687805176, 'word': 'Iraq', 'start': 8, 'end': 12}, {'entity_group': 'PER', 'score': 0.9082020123799642, 'word': 'KarlRove', 'start': 42, 'end': 50}, {'entity_group': 'PER', 'score': 0.8828257322311401, 'word': 'Georgez', 'start': 59, 'end': 66}, {'entity_group':

[{'entity_group': 'PER', 'score': 0.9254940748214722, 'word': 'JebBush', 'start': 0, 'end': 7}]
[('PER', 'JebBush')]
[{'entity_group': 'PER', 'score': 0.9254940748214722, 'word': 'JebBush', 'start': 0, 'end': 7}]
[('PER', 'JebBush')]
[{'entity_group': 'MISC', 'score': 0.9925856590270996, 'word': 'Democrats', 'start': 8, 'end': 17}, {'entity_group': 'ORG', 'score': 0.9635599851608276, 'word': 'ISIS', 'start': 73, 'end': 77}]
[('MISC', 'Democrats'), ('ORG', 'ISIS')]
[{'entity_group': 'MISC', 'score': 0.9940250515937805, 'word': 'US', 'start': 33, 'end': 35}, {'entity_group': 'LOC', 'score': 0.9996699094772339, 'word': 'Afghanistan', 'start': 46, 'end': 57}]
[('MISC', 'US'), ('LOC', 'Afghanistan')]
[{'entity_group': 'PER', 'score': 0.9379774928092957, 'word': 'JebBush', 'start': 20, 'end': 27}]
[('PER', 'JebBush')]
[{'entity_group': 'PER', 'score': 0.9379774928092957, 'word': 'JebBush', 'start': 20, 'end': 27}]
[('PER', 'JebBush')]
[{'entity_group': 'PER', 'score': 0.9379774928092957, 'wo

[{'entity_group': 'PER', 'score': 0.9812110066413879, 'word': 'Amy Chozick', 'start': 21, 'end': 32}, {'entity_group': 'PER', 'score': 0.9995054801305135, 'word': 'Maggie Haberman', 'start': 37, 'end': 52}, {'entity_group': 'ORG', 'score': 0.5959740281105042, 'word': '##yt', 'start': 69, 'end': 71}, {'entity_group': 'PER', 'score': 0.9926121234893799, 'word': 'Hillary', 'start': 95, 'end': 102}, {'entity_group': 'MISC', 'score': 0.5010547041893005, 'word': 'Bill', 'start': 130, 'end': 134}]
[('PER', 'Amy Chozick'), ('PER', 'Maggie Haberman'), ('ORG', '##yt'), ('PER', 'Hillary'), ('MISC', 'Bill')]
[{'entity_group': 'PER', 'score': 0.9992276430130005, 'word': 'Hillary Clinton', 'start': 71, 'end': 86}]
[('PER', 'Hillary Clinton')]
[{'entity_group': 'MISC', 'score': 0.9876737892627716, 'word': 'CNN Poll', 'start': 65, 'end': 73}, {'entity_group': 'PER', 'score': 0.998128354549408, 'word': 'Cruz', 'start': 112, 'end': 116}, {'entity_group': 'PER', 'score': 0.9832907915115356, 'word': 'Chuc

[{'entity_group': 'ORG', 'score': 0.9441358208656311, 'word': 'TGowdySC', 'start': 7, 'end': 15}, {'entity_group': 'PER', 'score': 0.98509148756663, 'word': 'Rubio', 'start': 32, 'end': 37}, {'entity_group': 'LOC', 'score': 0.9897982676823934, 'word': 'Benghazi', 'start': 57, 'end': 65}, {'entity_group': 'MISC', 'score': 0.9964145421981812, 'word': 'Republicans', 'start': 107, 'end': 118}, {'entity_group': 'LOC', 'score': 0.9255962371826172, 'word': 'America', 'start': 120, 'end': 127}]
[('ORG', 'TGowdySC'), ('PER', 'Rubio'), ('LOC', 'Benghazi'), ('MISC', 'Republicans'), ('LOC', 'America')]
[{'entity_group': 'PER', 'score': 0.9050237337748209, 'word': 'Joe McQuaid', 'start': 15, 'end': 26}, {'entity_group': 'ORG', 'score': 0.8239558339118958, 'word': 'Union', 'start': 50, 'end': 55}]
[('PER', 'Joe McQuaid'), ('ORG', 'Union')]
[{'entity_group': 'PER', 'score': 0.9219582279523214, 'word': 'Joe McQuaid', 'start': 0, 'end': 11}, {'entity_group': 'ORG', 'score': 0.9975357353687286, 'word': 

[{'entity_group': 'ORG', 'score': 0.9977199733257294, 'word': 'Macys', 'start': 0, 'end': 5}, {'entity_group': 'ORG', 'score': 0.9740954041481018, 'word': 'SP', 'start': 52, 'end': 54}, {'entity_group': 'MISC', 'score': 0.9676913221677145, 'word': 'Trump Boycott', 'start': 115, 'end': 128}]
[('ORG', 'Macys'), ('ORG', 'SP'), ('MISC', 'Trump Boycott')]
[{'entity_group': 'ORG', 'score': 0.9977199733257294, 'word': 'Macys', 'start': 0, 'end': 5}, {'entity_group': 'ORG', 'score': 0.9740954041481018, 'word': 'SP', 'start': 52, 'end': 54}, {'entity_group': 'MISC', 'score': 0.9676913221677145, 'word': 'Trump Boycott', 'start': 115, 'end': 128}]
[('ORG', 'Macys'), ('ORG', 'SP'), ('MISC', 'Trump Boycott')]
[{'entity_group': 'LOC', 'score': 0.9997637867927551, 'word': 'Paris', 'start': 16, 'end': 21}, {'entity_group': 'LOC', 'score': 0.9997327923774719, 'word': 'Germany', 'start': 91, 'end': 98}]
[('LOC', 'Paris'), ('LOC', 'Germany')]
[{'entity_group': 'PER', 'score': 0.9743831306695938, 'word': 

[{'entity_group': 'ORG', 'score': 0.903467079003652, 'word': 'SOTU', 'start': 4, 'end': 8}]
[('ORG', 'SOTU')]
[{'entity_group': 'ORG', 'score': 0.6478356719017029, 'word': 'tedcruz', 'start': 20, 'end': 27}, {'entity_group': 'LOC', 'score': 0.9994951486587524, 'word': 'Iowa', 'start': 53, 'end': 57}]
[('ORG', 'tedcruz'), ('LOC', 'Iowa')]
[{'entity_group': 'MISC', 'score': 0.9976471960544586, 'word': 'State Of The Union', 'start': 4, 'end': 22}]
[('MISC', 'State Of The Union')]
[{'entity_group': 'ORG', 'score': 0.9990383386611938, 'word': 'CNN', 'start': 9, 'end': 12}, {'entity_group': 'PER', 'score': 0.9996033608913422, 'word': 'Stuart Stevens', 'start': 81, 'end': 95}, {'entity_group': 'PER', 'score': 0.9990755915641785, 'word': 'Romney', 'start': 114, 'end': 120}]
[('ORG', 'CNN'), ('PER', 'Stuart Stevens'), ('PER', 'Romney')]
[{'entity_group': 'PER', 'score': 0.9996368885040283, 'word': 'Stuart Stevens', 'start': 0, 'end': 14}, {'entity_group': 'PER', 'score': 0.991601824760437, 'wor

[{'entity_group': 'ORG', 'score': 0.9395189881324768, 'word': 'SarahPalinUSA', 'start': 39, 'end': 52}, {'entity_group': 'PER', 'score': 0.9969736337661743, 'word': 'Cruz', 'start': 70, 'end': 74}, {'entity_group': 'PER', 'score': 0.9917673468589783, 'word': 'Glenn', 'start': 97, 'end': 102}]
[('ORG', 'SarahPalinUSA'), ('PER', 'Cruz'), ('PER', 'Glenn')]
[{'entity_group': 'ORG', 'score': 0.9395189881324768, 'word': 'SarahPalinUSA', 'start': 39, 'end': 52}, {'entity_group': 'PER', 'score': 0.9969736337661743, 'word': 'Cruz', 'start': 70, 'end': 74}, {'entity_group': 'PER', 'score': 0.9917673468589783, 'word': 'Glenn', 'start': 97, 'end': 102}]
[('ORG', 'SarahPalinUSA'), ('PER', 'Cruz'), ('PER', 'Glenn')]
[{'entity_group': 'ORG', 'score': 0.9395189881324768, 'word': 'SarahPalinUSA', 'start': 39, 'end': 52}, {'entity_group': 'PER', 'score': 0.9969736337661743, 'word': 'Cruz', 'start': 70, 'end': 74}, {'entity_group': 'PER', 'score': 0.9917673468589783, 'word': 'Glenn', 'start': 97, 'end': 

[{'entity_group': 'PER', 'score': 0.9976552724838257, 'word': 'Cruz', 'start': 0, 'end': 4}, {'entity_group': 'ORG', 'score': 0.9687726696332296, 'word': 'TARP', 'start': 22, 'end': 26}, {'entity_group': 'ORG', 'score': 0.9992007911205292, 'word': 'Goldman Sachs', 'start': 52, 'end': 65}]
[('PER', 'Cruz'), ('ORG', 'TARP'), ('ORG', 'Goldman Sachs')]
[{'entity_group': 'PER', 'score': 0.9976478815078735, 'word': 'Cruz', 'start': 3, 'end': 7}, {'entity_group': 'LOC', 'score': 0.850296139717102, 'word': 'Wall St', 'start': 30, 'end': 37}, {'entity_group': 'ORG', 'score': 0.913582369685173, 'word': 'Goldman SachsCiti', 'start': 52, 'end': 69}]
[('PER', 'Cruz'), ('LOC', 'Wall St'), ('ORG', 'Goldman SachsCiti')]
[{'entity_group': 'LOC', 'score': 0.9997877478599548, 'word': 'Iowa', 'start': 8, 'end': 12}]
[('LOC', 'Iowa')]
[{'entity_group': 'LOC', 'score': 0.9997877478599548, 'word': 'Iowa', 'start': 8, 'end': 12}]
[('LOC', 'Iowa')]
[{'entity_group': 'PER', 'score': 0.8969218333562216, 'word': 

[{'entity_group': 'ORG', 'score': 0.5111605525016785, 'word': '##ob', 'start': 1, 'end': 3}, {'entity_group': 'PER', 'score': 0.7760142683982849, 'word': '##derp', 'start': 6, 'end': 10}, {'entity_group': 'ORG', 'score': 0.4460253119468689, 'word': '##la', 'start': 10, 'end': 12}, {'entity_group': 'PER', 'score': 0.9951380491256714, 'word': 'Cruz', 'start': 127, 'end': 131}]
[('ORG', '##ob'), ('PER', '##derp'), ('ORG', '##la'), ('PER', 'Cruz')]
[{'entity_group': 'ORG', 'score': 0.5111605525016785, 'word': '##ob', 'start': 1, 'end': 3}, {'entity_group': 'PER', 'score': 0.7760142683982849, 'word': '##derp', 'start': 6, 'end': 10}, {'entity_group': 'ORG', 'score': 0.4460253119468689, 'word': '##la', 'start': 10, 'end': 12}, {'entity_group': 'PER', 'score': 0.9951380491256714, 'word': 'Cruz', 'start': 127, 'end': 131}]
[('ORG', '##ob'), ('PER', '##derp'), ('ORG', '##la'), ('PER', 'Cruz')]
[{'entity_group': 'ORG', 'score': 0.5111605525016785, 'word': '##ob', 'start': 1, 'end': 3}, {'entity_

[{'entity_group': 'PER', 'score': 0.955711019039154, 'word': 'Mort Zuckerman', 'start': 6, 'end': 20}, {'entity_group': 'ORG', 'score': 0.9952357610066732, 'word': 'NYDailyNews', 'start': 44, 'end': 55}, {'entity_group': 'ORG', 'score': 0.9823870658874512, 'word': 'Paper', 'start': 88, 'end': 93}]
[('PER', 'Mort Zuckerman'), ('ORG', 'NYDailyNews'), ('ORG', 'Paper')]
[{'entity_group': 'PER', 'score': 0.955711019039154, 'word': 'Mort Zuckerman', 'start': 6, 'end': 20}, {'entity_group': 'ORG', 'score': 0.9952357610066732, 'word': 'NYDailyNews', 'start': 44, 'end': 55}, {'entity_group': 'ORG', 'score': 0.9823870658874512, 'word': 'Paper', 'start': 88, 'end': 93}]
[('PER', 'Mort Zuckerman'), ('ORG', 'NYDailyNews'), ('ORG', 'Paper')]
[{'entity_group': 'ORG', 'score': 0.9522542258103689, 'word': 'NYDailyNews', 'start': 10, 'end': 21}, {'entity_group': 'PER', 'score': 0.9492356657981873, 'word': 'Mort Zuckerman', 'start': 34, 'end': 48}]
[('ORG', 'NYDailyNews'), ('PER', 'Mort Zuckerman')]
[{'e

[{'entity_group': 'PER', 'score': 0.9994674324989319, 'word': 'Ted Cruz', 'start': 0, 'end': 8}, {'entity_group': 'MISC', 'score': 0.8580148220062256, 'word': 'Bible', 'start': 35, 'end': 40}]
[('PER', 'Ted Cruz'), ('MISC', 'Bible')]
[{'entity_group': 'PER', 'score': 0.9803362488746643, 'word': 'JebBush', 'start': 8, 'end': 15}, {'entity_group': 'PER', 'score': 0.9992272555828094, 'word': 'Hillary Clinton', 'start': 21, 'end': 36}, {'entity_group': 'MISC', 'score': 0.3770204782485962, 'word': 'GO', 'start': 72, 'end': 74}, {'entity_group': 'ORG', 'score': 0.7736040552457174, 'word': '##PDebate', 'start': 74, 'end': 81}]
[('PER', 'JebBush'), ('PER', 'Hillary Clinton'), ('MISC', 'GO'), ('ORG', '##PDebate')]
[{'entity_group': 'PER', 'score': 0.9940544366836548, 'word': 'Ted', 'start': 38, 'end': 41}, {'entity_group': 'ORG', 'score': 0.8438664674758911, 'word': 'Blaze', 'start': 54, 'end': 59}]
[('PER', 'Ted'), ('ORG', 'Blaze')]
[{'entity_group': 'ORG', 'score': 0.7959526181221008, 'word':

In [25]:
for key in entity_dictionary:
    
    print(key)
    show_wordcloud(entity_dictionary[key])
    
    