In [8]:
# credentials is dict with the keys
# {'CONSUMER_KEY': 'xxxxx',
# 'CONSUMER_SECRET': 'xxxxx',
# 'OAUTH_TOKEN': 'xxxxx',
# 'OAUTH_TOKEN_SECRET': 'xxxxx'}

import json
with open('credentials.json', 'r') as f:
    credentials = json.load(f)

In [11]:
import twitter
auth = twitter.oauth.OAuth(credentials['OAUTH_TOKEN'], credentials['OAUTH_TOKEN_SECRET'],
                           credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET'])
twitter_api = twitter.Twitter(auth=auth)

In [16]:
import sys
import time
from urllib.error import URLError
from http.client import BadStatusLine
import json
import twitter

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw): 
    # A nested helper function that handles common HTTPErrors. Return an updated
    # value for wait_period if the problem is a 500 level error. Block until the
    # rate limit is reset if it's a rate limiting issue (429 error). Returns None
    # for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
    
        if wait_period > 3600: # Seconds
            print('Too many retries. Quitting.', file=sys.stderr)
            raise e
    
        # See https://dev.twitter.com/docs/error-codes-responses for common codes
    
        if e.e.code == 401:
            print('Encountered 401 Error (Not Authorized)', file=sys.stderr)
            return None
        elif e.e.code == 404:
            print('Encountered 404 Error (Not Found)', file=sys.stderr)
            return None
        elif e.e.code == 429: 
            print('Encountered 429 Error (Rate Limit Exceeded)', file = sys.stderr)
            if sleep_when_rate_limited:
                print("Retrying in 15 minutes...ZzZ...", file=sys.stderr)
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print('...ZzZ...Awake now and trying again.', file=sys.stderr)
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print('Encountered {:} Error. Retrying in {:} seconds'.format( \
                    e.e.code, wait_period), file=sys.stderr)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e

    # End of nested helper function
    
    wait_period = 2 
    error_count = 0 

    while True:
        try:
            return twitter_api_func(*args, **kw)
        except twitter.api.TwitterHTTPError as e:
            error_count = 0 
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError as e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print("URLError encountered. Continuing.", file=sys.stderr)
            if error_count > max_errors:
                print("Too many consecutive errors...bailing out.", file=sys.stderr)
                raise e
        except BadStatusLine as e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print("BadStatusLine encountered. Continuing.", file=sys.stderr)
            if error_count > max_errors:
                print("Too many consecutive errors...bailing out.", file=sys.stderr)
                raise e

In [17]:
from functools import partial
from sys import maxsize

def get_followers_ids(twitter_api, screen_name=None, user_id=None, followers_limit=maxsize):
    # Must have either screen_name or user_id (logical xor)
    assert (screen_name != None) != (user_id != None), \
    "Must have screen_name or user_id, but not both"
    
    # See https://dev.twitter.com/docs/api/1.1/get/friends/ids and
    # https://dev.twitter.com/docs/api/1.1/get/followers/ids for details
    # on API parameters
    get_followers_ids = partial(make_twitter_request, twitter_api.followers.ids, 
                                count=5000)
    followers_ids = []
    
    for twitter_api_func, limit, ids, label in [ 
                    [get_followers_ids, followers_limit, followers_ids, "followers"]
                ]:
        if limit == 0: 
            continue
        
        cursor = -1
        while cursor != 0:
            # Use make_twitter_request via the partially bound callable...
            if screen_name: 
                response = twitter_api_func(screen_name=screen_name, cursor=cursor)
            else: # user_id
                response = twitter_api_func(user_id=user_id, cursor=cursor)

            if response is not None:
                ids += response['ids']
                cursor = response['next_cursor']
            print('Fetched {0} total {1} ids for {2}'.format(len(ids), \
                        label, (user_id or screen_name)))
            if len(ids) >= limit or response is None:
                break

    # Do something useful with the IDs, like store them to disk...
    return followers_ids[:followers_limit]

In [18]:
Shopify_followers_ids = get_followers_ids(twitter_api, screen_name="Shopify")
Shopify_Picks_followers_ids = get_followers_ids(twitter_api, screen_name="ShopifyPicks")

Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...


Fetched 5000 total followers ids for Shopify
Fetched 10000 total followers ids for Shopify
Fetched 15000 total followers ids for Shopify
Fetched 20000 total followers ids for Shopify
Fetched 25000 total followers ids for Shopify
Fetched 30000 total followers ids for Shopify
Fetched 35000 total followers ids for Shopify
Fetched 40000 total followers ids for Shopify
Fetched 45000 total followers ids for Shopify
Fetched 50000 total followers ids for Shopify
Fetched 55000 total followers ids for Shopify
Fetched 60000 total followers ids for Shopify
Fetched 65000 total followers ids for Shopify
Fetched 70000 total followers ids for Shopify
Fetched 75000 total followers ids for Shopify

...ZzZ...Awake now and trying again.
Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...



Fetched 80000 total followers ids for Shopify
Fetched 85000 total followers ids for Shopify
Fetched 90000 total followers ids for Shopify
Fetched 95000 total followers ids for Shopify
Fetched 100000 total followers ids for Shopify
Fetched 105000 total followers ids for Shopify
Fetched 110000 total followers ids for Shopify
Fetched 115000 total followers ids for Shopify
Fetched 120000 total followers ids for Shopify
Fetched 125000 total followers ids for Shopify
Fetched 130000 total followers ids for Shopify
Fetched 135000 total followers ids for Shopify
Fetched 140000 total followers ids for Shopify
Fetched 145000 total followers ids for Shopify
Fetched 149870 total followers ids for Shopify

...ZzZ...Awake now and trying again.
Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...



Fetched 5000 total followers ids for ShopifyPicks
Fetched 10000 total followers ids for ShopifyPicks
Fetched 15000 total followers ids for ShopifyPicks
Fetched 20000 total followers ids for ShopifyPicks
Fetched 25000 total followers ids for ShopifyPicks
Fetched 25378 total followers ids for ShopifyPicks


...ZzZ...Awake now and trying again.


In [20]:
import numpy
numpy.save('twitter-followers.npy', [Shopify_followers_ids, Shopify_Picks_followers_ids])

In [43]:
common_followers = set(Shopify_followers_ids) & set(Shopify_Picks_followers_ids)

In [46]:
common_followers = list(common_followers)
print('{:} followers of both @Shopify and @ShopifyPicks'.format(len(common_followers)))
print('{:} ...'.format(common_followers[:50]))

5772 followers of both @Shopify and @ShopifyPicks
[1246265346, 2818670599, 815595528, 345571340, 342294547, 2424504342, 760676378, 2428928034, 2807627811, 114655268, 2894725161, 14221354, 169148462, 1877049392, 1231126578, 29229107, 2450260038, 257491017, 329285710, 2450227281, 443383904, 788234341, 15663217, 2391408757, 2647588982, 38273147, 2365259913, 393478300, 2882338972, 326041756, 611418271, 5767342, 355106994, 1484914878, 2182971594, 268796110, 216596688, 972325075, 2311946452, 111214812, 15663331, 251199717, 385941741, 334266608, 602177779, 2664005880, 2750644473, 85852410, 1446445322, 2540503308] ...


In [38]:
A = twitter_api.users.lookup(user_id=','.join([str(a) for a in list(common_followers)[:100]])) 

In [57]:
print('Checking names of {:} common followers'.format(len(common_followers)))
common_follower_profiles = []
common_follower_names = []
n_batches = int(numpy.ceil(len(common_followers) / 100))
for i in range(n_batches):
    if i % 5 == 0:
        print('Batch {:}:{:}, Total {:.1f}%'.format(i * 100, (i + 1) * 100, i / n_batches * 100))
    user_id_ = ','.join([str(a) for a in common_followers[i * 100 : (i + 1) * 100]])
    U = twitter_api.users.lookup(user_id=user_id_)
    
    common_follower_names.extend([u['screen_name'] for u in U])
    common_follower_profiles.append(U)

Checking names of 5772 common followers
Batch 0:100, Total 0.0%
Batch 500:600, Total 8.6%
Batch 1000:1100, Total 17.2%
Batch 1500:1600, Total 25.9%
Batch 2000:2100, Total 34.5%
Batch 2500:2600, Total 43.1%
Batch 3000:3100, Total 51.7%
Batch 3500:3600, Total 60.3%
Batch 4000:4100, Total 69.0%
Batch 4500:4600, Total 77.6%
Batch 5000:5100, Total 86.2%
Batch 5500:5600, Total 94.8%


In [58]:
numpy.save('common-followers.npy', [common_followers, common_follower_names])

In [59]:
print('{:} followers of both @Shopify and @ShopifyPicks'.format(len(common_followers)))
print('{:} ...'.format(common_followers[:50]))
print('{:} ...'.format(common_follower_names[:50]))

5772 followers of both @Shopify and @ShopifyPicks
[1246265346, 2818670599, 815595528, 345571340, 342294547, 2424504342, 760676378, 2428928034, 2807627811, 114655268, 2894725161, 14221354, 169148462, 1877049392, 1231126578, 29229107, 2450260038, 257491017, 329285710, 2450227281, 443383904, 788234341, 15663217, 2391408757, 2647588982, 38273147, 2365259913, 393478300, 2882338972, 326041756, 611418271, 5767342, 355106994, 1484914878, 2182971594, 268796110, 216596688, 972325075, 2311946452, 111214812, 15663331, 251199717, 385941741, 334266608, 602177779, 2664005880, 2750644473, 85852410, 1446445322, 2540503308] ...
['khanalsantosh66', 'Lynnag87', 'blakkybb', 'Mymummyspennies', 'CestBonChef', 'rocketaryinc', 'PlumeChocolat', 'Youngblood580', 'Paul_J_Myers', 'GreyLeeDesigns', 'taribanben', 'shonagilbert', 'AuroraImporting', 'MaxOstrov1', 'lotusbelletents', 'moopshop', 'ResponseLogic', 'fruitstudios', 'modernwebinfo', 'DelpaMusa', 'jillbuiter', 'eTail_Asia', 'aaronholmes', 'rahulyadav18000', '