In [4]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import json
from twython import Twython
import time

Code below has scraped *all* of Mike Gravel's followers for June 21, 2019

In [5]:
# Code below adapated from: https://stackabuse.com/accessing-the-twitter-api-with-python/
# Load Twitter API credentials from json file
with open("twitter_credentials.json", "r") as file:  
    creds = json.load(file)

# Instantiate an object and provide Twitter credentials
twitter = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET_KEY'], creds['ACCESS_TOKEN'], creds['ACCESS_SECRET_TOKEN'])

In [6]:
def get_followers_json(username):
    # create empty dictionary to accept data
    dict_fol = {'screen_name': [], 'verified': [], 'location': [], 'followers_count': [], 'tweet_text': [], 'retweet_count': [], 'favorite_count': []}  

    # we don't want to dump directly to the graph because we'll get a giant hairball
    # we will want to filter for users with more followers before moving to depth=2 of the social network
    #create loop to page through API results without hitting rate limit
    next_cursor = -1
    timeout = time.time() + 60*60*12 #creates a hard end time for the script to run (12 hrs)
    while(next_cursor):
        if time.time() > timeout: 
            break #ends loop after timeout period
        get_followers = twitter.get_followers_list(screen_name='mikegravel', count = 200, cursor=next_cursor)
        for user in get_followers['users']:  
            dict_fol['screen_name'].append(user['screen_name'])
            dict_fol['verified'].append(user['verified'])
            dict_fol['followers_count'].append(user['followers_count'])
            
            try:
                dict_fol['location'].append(user['location'])
            except:
                dict_fol['location'].append(None) #many users leave this blank
            
            try:
                dict_fol['tweet_text'].append(user['status']['text'])
            except:
                dict_fol['tweet_text'].append(None) #a good number of users have no tweets!
            
            try:
                dict_fol['retweet_count'].append(user['status']['retweet_count'])
            except:
                dict_fol['retweet_count'].append(None)
            
            try:
                dict_fol['favorite_count'].append(user['status']['favorite_count']) 
            except:
                dict_fol['favorite_count'].append(None)
                
        try: 
            time.sleep(65)
            next_cursor = get_followers["next_cursor"]
            print("Getting more of %s's followers' tweets" % username)
            print (time.asctime())
        except:
            break
  

    filename=username + str(time.time()) + "_followers.json"
    
    with open(filename, "w") as file:
        json.dump(dict_fol, file)
        
    print("Saving %s's followers to JSON file" % username)
    print (time.asctime())
    return get_followers

In [7]:
get_followers = get_followers_json('mikegravel')

Getting more of mikegravel's followers' tweets
Fri Jul  5 21:43:51 2019
Saving mikegravel's followers to JSON file
Fri Jul  5 21:43:51 2019


In [8]:
with open('mikegravel1561157054.0137448_followers.json') as file:
    mgfol2 = json.load(file)

In [9]:
df = pd.DataFrame.from_dict(mgfol2)
df = df.sort_values(by='followers_count', ascending=False)

In [11]:
mgfol2.keys()

dict_keys(['screen_name', 'verified', 'location', 'followers_count', 'tweet_text', 'retweet_count', 'favorite_count'])

In [12]:
len(df)

98266

In [13]:
followers_to_get = df['screen_name'][0:19]

In [14]:
print(followers_to_get)

95977             maddow
95338          wikileaks
9849                 AOC
86051      NateSilver538
22595        HEELZiggler
66225           verified
23606      marwilliamson
95126         jaketapper
16955      AlaattinCAGIL
22259         johncusack
58768         robdelaney
47585       YourAnonNews
97936        HuffPostPol
845       Funnyoneliners
69178          shaunking
24154            IlhanMN
96435    sherylunderwood
82938         tweetsauce
6393        NOH8Campaign
Name: screen_name, dtype: object
