In [1]:
import tweepy
import dill

keys = dill.load(open('.secrets/api-keys.pkd', 'rb'))

auth = tweepy.OAuthHandler(keys['API'], keys['API secret'])

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [2]:
yang_followers = dill.load(open('../scrapped_data/twitter-data/yang_followers.pkd', 'rb'))

In [3]:
len(yang_followers)

36000

In [14]:
import pandas as pd

df = pd.DataFrame([follower._json for follower in yang_followers])[['name', 
                                                                   'screen_name', 
                                                                   'created_at', 
                                                                   'description',
                                                                    'location',
                                                                   'followers_count',
                                                                   'favourites_count']]

In [24]:
from collections import defaultdict

states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

states_full = ["Alabama","Alaska","Arizona","Arkansas","California","Colorado",
               "Connecticut","Delaware","Florida","Georgia","Hawaii","Idaho","Illinois",
            "Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland",
            "Massachusetts","Michigan","Minnesota","Mississippi","Missouri","Montana",
            "Nebraska","Nevada","New Hampshire","New Jersey","New Mexico","New York",
            "North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania",
            "Rhode Island","South Carolina","South Dakota","Tennessee","Texas","Utah",
            "Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming"]

state_counts = defaultdict(int)

for location in df.location.values:
    for state in states:
        if state in location:
            state_counts[state] += 1
    for i, state in enumerate(states_full):
        if state in location:
            state_counts[states[i]] += 1

ks, vs = [], []
for k, v in state_counts.items():
    ks.append(k)
    vs.append(v)
    
df_state_counts = pd.DataFrame.from_dict({'State': ks, 'Followers': vs})

In [25]:
df_state_counts

Unnamed: 0,State,Followers
0,GA,252
1,MA,398
2,SC,82
3,VA,557
4,WA,339
5,TX,729
6,DC,335
7,NY,966
8,CA,1771
9,VT,121


In [27]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd
import numpy as np

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df_state_counts.State,
        z = df_state_counts.Followers,
        locationmode = 'USA-states',
        text = df_state_counts.State,
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "Millions USD"
        )
    ) ]

layout = dict(
        title = 'Number of Followers by State',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig = dict( data=data, layout=layout )
init_notebook_mode(connected=True)
iplot(fig)