## Twitter Followers

In [49]:
import os
import pickle

from dotenv import load_dotenv
load_dotenv()

CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
TWITTER_TOKEN = os.environ['TWITTER_TOKEN']
TWITTER_SECRET = os.environ['TWITTER_SECRET']

import twitter
%matplotlib inline

In [2]:
# FIXME: caching, memoization

t = twitter.Twitter(
    auth=twitter.OAuth(TWITTER_TOKEN,
                       TWITTER_SECRET,
                       CONSUMER_KEY,
                       CONSUMER_SECRET))

# t.application.rate_limit_status()

```
res = t.friends.list(count=200)
followers = res['users']
while res['next_cursor']:
    res = t.friends.list(count=200, cursor=res['next_cursor'])
    followers += res['users']
```

In [10]:
with open('user_details.pkl', 'rb') as fhandle:
    user_details = pickle.load(fhandle)
    
with open('ffriends.pkl', 'rb') as fhandle:
    ffriends = pickle.load(fhandle)
    
with open('friend_tweets.pkl', 'rb') as fhandle:
    friend_tweets = pickle.load(fhandle)

In [4]:
from ratelimiter import RateLimiter
from tqdm import tqdm_notebook
import functools as ft

WINDOW_SECONDS = 15*60  # 15 minutes
# FIXME: get this information from quota endpoint
RATE_LIMITS = {
    'friends/ids': 15,
    'search/tweets': 450,
    'statuses/lookup': 300,
    'statuses/user_timeline': 1500,
    'users/lookup': 300,  # *200
    'users/show': 900
}

@ft.lru_cache(maxsize=128)
def rate_limiter(endpoint):
    """ get a cached rate limiter per endpoint """
    limit = RATE_LIMITS[endpoint]
    return RateLimiter(max_calls=limit, period=WINDOW_SECONDS)

        
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [8]:
with rate_limiter('friends/ids'):
    my_friends = t.friends.ids()['ids']


In [None]:

# this has very low quota of 1 / minute / account (1,440/ day)
# likes, replies, retweets have higher quotas and probably imply "following"
#
# https://github.com/twintproject/twint but issue re: twitter terms of service
for friend_id in tqdm_notebook(my_friends):
    if friend_id in ffriends:
        continue
    with rate_limiter('friends/ids'):
        ffriends[friend_id] = t.friends.ids(user_id=friend_id)['ids']

In [5]:
def user_subset(user):
    fields = ['id', 'name', 'screen_name', 'location', 'description', 'protected',
              'followers_count', 'friends_count', 'favourites_count', 'created_at', 
              'statuses_count', 'lang']
    return {k: v for k,v in user.items() if k in fields}

def tweet_subset(tweet):
    return tweet

In [6]:
import logging

# looks like users.show is mostly identical to lookup
# t.users.lookup(screen_name='johannes_cork')
targets = list(set(sum(ffriends.values(), [])) - set(user_details.keys()))

for chunk in tqdm_notebook(chunks(targets, n=100)):
    with rate_limiter('users/lookup'):
        user_ids = ','.join(str(c) for c in chunk)
        try:
            res = t.users.lookup(user_id=user_ids)
            for r in res:
                user_details[r['id']] = user_subset(r)
        # FIXME: find a better way to catch this
        except twitter.TwitterHTTPError as e:
            if 'No user matches for specified terms.' in str(e):
                # all of the users are invalid
                logging.warning('users/lookup - all user ids were invalid')
                continue
            else:
                raise e

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [None]:
targets = list(set(sum(ffriends.values(), [])) - set(friend_tweets.keys()))
# friend_tweets = {}

# 100 per second, 200 tweets per response
# we can get user mentions from this, much faster than friends/ids
for friend_id in tqdm_notebook(my_friends):
    if friend_id in friend_tweets:
        continue

    max_id = None
    res = []
    for _ in range(2):
        with rate_limiter('statuses/user_timeline'):
            opts = dict(
                user_id=friend_id,
                count=200,
                trim_user=True,
                exclude_replies=False,
                include_rts=True,
                tweet_mode="extended"
            )
            if max_id:
                opts['max_id'] = max_id

            tl = t.statuses.user_timeline(**opts)
            if not tl:
                break
            res += tl
            max_id = min(r['id'] for r in res)
    friend_tweets[friend_id] = res

In [11]:
user_details = {k: user_subset(v) for k, v in user_details.items()}

In [12]:
print('my_friends', len(my_friends))
print('ffriends', len(set(sum([v for v in ffriends.values()], []))))
print('user_details', len(user_details))
print('friend_tweets', len(sum(friend_tweets.values(), [])))

my_friends 777
ffriends 516831
user_details 516803
friend_tweets 94057


In [15]:
from collections import Counter

def get_relations(tweets):
    for t in tweets:
        entities = t['entities']
        mentions = [m['id'] for m in entities['user_mentions']]
        hashtags = [h['text'] for h in entities['hashtags']]
        reply_to = t['in_reply_to_user_id']
        for m in mentions:
            yield m
        if reply_to:
            yield reply_to
        
a_friend = list(friend_tweets.keys())[1]
print(user_details[a_friend]) # ['screen_name'])
# friend_tweets[a_friend]
for a, b in Counter(get_relations(friend_tweets[a_friend])).most_common(20):
    print(b, user_details.get(a, {}).get('screen_name'))

{'id': 711729626482081792, 'name': 'Peggy Rayzis 👩🏼\u200d💻', 'screen_name': 'peggyrayzis', 'location': 'New York, NY', 'description': 'Exploring the world through code, travel, and music ✨ Developer Experience @apollographql 🚀 she/her', 'protected': False, 'followers_count': 18891, 'friends_count': 443, 'created_at': 'Mon Mar 21 01:42:21 +0000 2016', 'favourites_count': 7027, 'statuses_count': 3778, 'lang': None}
23 apollographql
23 graphqlsummit
13 peggyrayzis
12 seldo
10 MoonTahoe
8 jevakallio
7 mjackson
6 eveporcello
5 dan_abramov
5 hughwillson
4 stemmlerjs
4 gatsbyjs
4 pkellner
4 trevorblades
4 jbaxleyiii
4 rakshesha
4 NikkitaFTW
4 None
4 kentcdodds
4 None


In [55]:
keys = set(list(friend_tweets.keys())[:300])
friends = {user_details.get(k, {}).get('screen_name'): get_relations(friend_tweets[k])
           for k in keys
           if k in user_details}

from pyvis import network as net
import networkx as nx
g = net.Network(notebook=True)
nxg = nx.Graph()
nxg.add_nodes_from(a for a, b in friends.items())
for screen_name, rels in friends.items():
    rs = [(screen_name, user_details.get(rel, {}).get('screen_name')) for rel in rels if rel in keys]
    rs = [(a, b) for (a, b) in rs if a != b]
    nxg.add_edges_from(rs)
# nxg.add_edges_from([(1,2), (1,3)])

g.from_nx(nxg)
g.show("example.html")

In [214]:
import pickle

if False:
    with open('user_details.pkl', 'wb') as fhandle:
        pickle.dump(user_details, fhandle)

    with open('ffriends.pkl', 'wb') as fhandle:
        pickle.dump(ffriends, fhandle)

    with open('friend_tweets.pkl', 'wb') as fhandle:
        pickle.dump(friend_tweets, fhandle)

## Ideas
- push everything into neo4j or similar
- get recommendations for hashtags, twitter accounts, etc.