## Twitter Followers

In [256]:
import os

from dotenv import load_dotenv
load_dotenv()

CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
TWITTER_TOKEN = os.environ['TWITTER_TOKEN']
TWITTER_SECRET = os.environ['TWITTER_SECRET']

import twitter

In [257]:
# FIXME: caching, memoization

t = twitter.Twitter(
    auth=twitter.OAuth(TWITTER_TOKEN,
                       TWITTER_SECRET,
                       CONSUMER_KEY,
                       CONSUMER_SECRET))

# t.application.rate_limit_status()

```
res = t.friends.list(count=200)
followers = res['users']
while res['next_cursor']:
    res = t.friends.list(count=200, cursor=res['next_cursor'])
    followers += res['users']
```

In [100]:
with open('user_details.pkl', 'rb') as fhandle:
    user_details = pickle.load(fhandle)
    
with open('ffriends.pkl', 'rb') as fhandle:
    ffriends = pickle.load(fhandle)
    
with open('friend_tweets.pkl', 'rb') as fhandle:
    friend_tweets = pickle.load(fhandle)

In [None]:
from ratelimiter import RateLimiter
from tqdm import tqdm_notebook
import functools as ft

WINDOW_SECONDS = 15*60  # 15 minutes
# FIXME: get this information from quota endpoint
RATE_LIMITS = {
    'friends/ids': 15,
    'search/tweets': 450,
    'statuses/lookup': 300,
    'statuses/user_timeline': 1500,
    'users/lookup': 300,  # *200
    'users/show': 900
}

@ft.lru_cache(maxsize=128)
def rate_limiter(endpoint):
    """ get a cached rate limiter per endpoint """
    limit = RATE_LIMITS[endpoint]
    return RateLimiter(max_calls=limit, period=WINDOW_SECONDS)

        
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [209]:
with rate_limiter('friends/ids'):
    my_friends = t.friends.ids()['ids']

# this has very low quota of 1 / minute / account (1,440/ day)
# likes, replies, retweets have higher quotas and probably imply "following"
#
# https://github.com/twintproject/twint but issue re: twitter terms of service
for friend_id in tqdm_notebook(my_friends):
    if friend_id in ffriends:
        continue
    with rate_limiter('friends/ids'):
        ffriends[friend_id] = t.friends.ids(user_id=friend_id)['ids']

HBox(children=(IntProgress(value=0, max=831), HTML(value='')))

In [None]:
def user_subset(user):
    fields = ['id', 'name', 'screen_name', 'location', 'description', 'protected',
              'followers_count', 'friends_count', 'favourites_count', 'created_at', 
              'statuses_count', 'lang']
    return {k: v for k,v in user.items() if k in fields}

def tweet_subset(tweet):
    return tweet

In [210]:
import logging

# looks like users.show is mostly identical to lookup
# t.users.lookup(screen_name='johannes_cork')
targets = list(set(sum(ffriends.values(), [])) - set(user_details.keys()))

for chunk in tqdm_notebook(chunks(targets, n=100)):
    with rate_limiter('users/lookup'):
        user_ids = ','.join(str(c) for c in chunk)
        try:
            res = t.users.lookup(user_id=user_ids)
            for r in res:
                user_details[r['id']] = user_subset(r)
        # FIXME: find a better way to catch this
        except twitter.TwitterHTTPError as e:
            if 'No user matches for specified terms.' in str(e):
                # all of the users are invalid
                logging.warning('users/lookup - all user ids were invalid')
                continue
            else:
                raise e

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [262]:
targets = list(set(sum(ffriends.values(), [])) - set(friend_tweets.keys()))
# friend_tweets = {}

# 100 per second, 200 tweets per response
# we can get user mentions from this, much faster than friends/ids
for friend_id in tqdm_notebook(my_friends):
    if friend_id in friend_tweets:
        continue

    max_id = None
    res = []
    for _ in range(2):
        with rate_limiter('statuses/user_timeline'):
            opts = dict(
                user_id=friend_id,
                count=200,
                trim_user=True,
                exclude_replies=False,
                include_rts=True,
                tweet_mode="extended"
            )
            if max_id:
                opts['max_id'] = max_id

            tl = t.statuses.user_timeline(**opts)
            if not tl:
                break
            res += tl
            max_id = min(r['id'] for r in res)
    friend_tweets[friend_id] = res

HBox(children=(IntProgress(value=0, max=831), HTML(value='')))

KeyboardInterrupt: 

In [200]:
user_details = {k: user_subset(v) for k, v in user_details.items()}

In [None]:
print('my_friends', len(my_friends))
print('ffriends', len(set(sum([v for v in ffriends.values()], []))))
print('user_details', len(user_details))
print('friend_tweets', len(sum(friend_tweets.values(), [])))

In [249]:
from collections import Counter

def get_relations(tweets):
    for t in tweets:
        entities = t['entities']
        mentions = [m['id'] for m in entities['user_mentions']]
        hashtags = [h['text'] for h in entities['hashtags']]
        reply_to = t['in_reply_to_user_id']
        for m in mentions:
            yield m
        if reply_to:
            yield reply_to
        
a_friend = list(friend_tweets.keys())[19]
print(user_details[a_friend]) # ['screen_name'])
# friend_tweets[a_friend]
for a, b in Counter(get_relations(friend_tweets[a_friend])).most_common(20):
    print(b, user_details.get(a, {}).get('screen_name'))

{'id': 2264882629, 'name': 'Startup Cork', 'screen_name': 'Startup_Cork', 'location': 'Cork, Ireland', 'description': 'New management coming soon for this account', 'protected': False, 'followers_count': 1653, 'friends_count': 1962, 'created_at': 'Fri Dec 27 20:23:05 +0000 2013', 'favourites_count': 329, 'statuses_count': 2854, 'lang': None}
13 CorkHour
11 UCC
10 StartupIreland
7 SiobhanMFinn
7 Corkinnovates
6 uccgateway
6 CorkBIC
6 IrishStartUpTV
6 None
5 Irish_TechNews
5 bankofireland
5 CorkChamber
5 Corkcoco
4 LEOCorkCity
4 EntrepShip
4 None
4 None
3 TyndallInstitut
3 Startup_Cork
3 RiverleeHotel


In [214]:
import pickle

with open('user_details.pkl', 'wb') as fhandle:
    pickle.dump(user_details, fhandle)
    
with open('ffriends.pkl', 'wb') as fhandle:
    pickle.dump(ffriends, fhandle)

with open('friend_tweets.pkl', 'wb') as fhandle:
    pickle.dump(friend_tweets, fhandle)

## Ideas
- push everything into neo4j or similar
- get recommendations for hashtags, twitter accounts, etc.