In [11]:
from TwitterAPI import TwitterAPI
from tqdm import tqdm
import pandas as pd
import numpy as np
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import folium
from tqdm import tqdm
import json

In [2]:
keys = dict(consumer_key=os.environ['TWITTER_KEY'],
            consumer_secret=os.environ['TWITTER_SECRET'],
            access_token_key=os.environ['TWITTER_ACCESS_TOKEN_KEY'],
            access_token_secret=os.environ['TWITTER_ACCESS_TOKEN_SECRET'])
api = TwitterAPI(**keys)

In [3]:
ids = pd.read_csv('../data/list_ids.csv', index_col=0)
info_with_twitter = pd.read_csv('../data/congress_social_media.csv',
                                index_col=0)
info_with_twitter['state'] = info_with_twitter['state'].str.lower()

In [4]:
count = 300

In [5]:
data = []
for state in tqdm(ids['State'].unique()):
    list_id = ids.query('State == @state')['ID'].values[0]
    resp = api.request('lists/statuses', {'list_id': list_id, 'count': count})
    json = resp.json()
    for i_json in json:
        date = i_json['created_at']
        text = i_json['text']
        user = i_json['user']['screen_name']
        i_id = i_json['id']
        data.append({'date': date, 'text': text,
                     'user': user, 'id': i_id, 'state': state})
data = pd.DataFrame(data)

100%|██████████| 50/50 [00:24<00:00,  2.47it/s]


In [6]:
analyzer = SentimentIntensityAnalyzer()
sentiments = []
for i_id, sentence in data[['id', 'text']].values:
    sentiment = analyzer.polarity_scores(sentence)
    sentiment['id'] = i_id
    sentiments.append(sentiment)
sentiments = pd.DataFrame(sentiments)

In [7]:
# words = ['trump', 'donald', 'president']
words = ['trump']
query = [any(word in text.lower() for word in words) for text in data['text']]
data_qu = data[query]

In [13]:
# Remove states with no data
path_json = '../data/states.json'

with open(path_json, 'r') as ff:
    maps = json.load(ff)

In [14]:
c_scale = np.array([-.5, -.3, 0, .3, .5])
column = 'compound'

all_states = [state['properties']['NAME'] for state in maps['features']]
all_states = pd.DataFrame(all_states, columns=['state'])

df = pd.merge(data_qu, sentiments, on='id', how='inner')
viz = df.groupby('state').mean().reset_index()
viz['state'] = viz['state'].map(lambda a: ' '.join(
    [ii.capitalize() for ii in a.split(' ')]))
viz = pd.merge(viz, all_states, on='state', how='outer')
viz['compound'] = viz['compound'].apply(np.clip, args=(c_scale.min() + .01, c_scale.max() - .01))
viz.loc[pd.isnull(viz['compound']), column] = 0
viz = viz.dropna()

In [15]:
new_feats = []
for state in maps['features']:
    if state['properties']['NAME'] in viz['state'].values:
        new_feats.append(state)
maps['features'] = new_feats
maps_string = json.dumps(maps)

In [22]:
map_1 = folium.Map(location=[48, -102], zoom_start=3)

# First plot the state outlines
map_1.choropleth(geo_path=path_json,
                 fill_color='White',
                 fill_opacity=0.8, line_opacity=0.3)

map_1.choropleth(geo_str=maps_string, data=viz,
                 columns=['state', column],
                 fill_color='RdBu', key_on='feature.properties.NAME',
                 fill_opacity=0.8, line_opacity=0.3,
                 threshold_scale=list(c_scale),
                 legend_name='Sentiment for: {}   |   Negative <--- ---> Positive'.format(','.join(words)))

map_1.save('map.html')
map_1