# Pulling data from Twitter using the API with tweepy

Before using this code, need to generate a json file for providing api keys using the code in notebook `store_twitter_credentials_as_json.ipynb`.

In [3]:
!pip3 install tweepy==3.9.0

Collecting tweepy==3.9.0
  Downloading tweepy-3.9.0-py2.py3-none-any.whl (30 kB)
Collecting requests-oauthlib>=0.7.0
  Downloading requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: requests-oauthlib, tweepy
Successfully installed requests-oauthlib-1.3.0 tweepy-3.9.0


In [4]:
import os
import time
import json
import pandas

import tweepy

In [5]:
api_keys_fp = '../twitter_credentials.json'

In [6]:
def load_api(keys_file):
    '''
    
    '''
    # Load twitter credentials
    with open(keys_file, 'r') as file:
        creds = json.load(file)

    # Use credentials to set up API access authorisation
    auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
    auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
    api = tweepy.API(auth, wait_on_rate_limit=True)
    
    return api

In [7]:
tw_api = load_api(api_keys_fp)

In [8]:
def get_followers_by_id(api, username, autowait=False):
    '''
    Function to scrape a list of followers of a specific user.
    
    Parameters
    ----------
    api : tweepy API object
        
    username : str
        the twitter handle of the user whose followers to pull.
    autowait : bool
        If true, overrides requirement for user input at start
        
    Returns
    -------
    followers : list of ints
    '''
    
    # First run a quick check at user follows
    user = api.get_user(username)
    nFollows = user.followers_count
    nRate = 5000*15
    if not autowait:
        if nFollows > nRate:
            duration = (nFollows%nRate)*15
            decision = input('Request will take approx. '+str(duration)+' minutes.  Proceed? (y/n)')
            if decision != 'y':
                return None
    
    ids = []
    
    for page in tweepy.Cursor(api.followers_ids, screen_name=username).pages():
        ids.extend(page)
    
    return ids

In [9]:
followers = get_followers_by_id(tw_api, 'bobthephysicist')

In [10]:
print(len(followers))

147


In [11]:
user = tw_api.get_user('bobthephysicist')

In [12]:
user.followers_count

147

In [14]:
ids = []
for page in tweepy.Cursor(tw_api.followers_ids, screen_name='bobthephysicist').pages():
    ids.extend(page)
    #time.sleep(60)

In [15]:
ids

[2494308547,
 1291311246248009728,
 918945949610889218,
 1056563882557431814,
 1280993379837304832,
 1149332088635777024,
 1014057864799780865,
 1352413740,
 431580871,
 3406988183,
 189773769,
 1565252448,
 2369130127,
 545338250,
 839744052,
 227255222,
 712390112936595456,
 872076043838009345,
 18118081,
 927189231784603648,
 20500058,
 714382760,
 1213107465627193349,
 1533204464,
 2730824087,
 1161306259603251200,
 975725305280323586,
 557346262,
 1031508832428519424,
 1121410304913616896,
 940655344556142592,
 1191395875194580997,
 1187649899225063424,
 2451484729,
 756391909606715392,
 1195971857314279424,
 913127149,
 3296513241,
 146076566,
 1109414543229227008,
 2488752192,
 909383230293504000,
 31514876,
 152684914,
 1129636697329864705,
 1130410589124268032,
 152939273,
 527427182,
 3081638848,
 867340418333192192,
 498817833,
 215361607,
 1060135641797836800,
 397896165,
 2347051676,
 857280418986561536,
 921822661663252480,
 4000407796,
 458724793,
 887718267137249281,
 3