In [207]:
%reset -f
import tweepy
import pandas as pd
import json
import os
import numpy as np
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)


In [208]:
# Load Twitter credentials
api_keys = json.load(open('twitter-api-keys.local.json'))
bearer_token = api_keys["bearer_token"]
client = tweepy.Client(
    bearer_token=api_keys['bearer_token'], wait_on_rate_limit=True)

auth = tweepy.OAuthHandler(
    consumer_key=api_keys['api_key'],
    consumer_secret=api_keys['api_key_secret'],
    access_token=api_keys['access_token'],
    access_token_secret=api_keys['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True)

me = 'fcx_xm'


In [209]:
# Pandas dataframe to store data
# filename = 'twitter.csv'
# if filename in os.listdir('.'):
#     df = pd.read_csv(filename)
# else:
df = pd.DataFrame(columns=[
    'id',
    'screen_name',
    'followers_count',
    'following_count',
    'verified',
    'followers',
    'friends',
    'mutuals'])


In [210]:
# Get my info and fill in the dataframe
me_info = api.get_user(screen_name=me)
df = df.append({
    'id': me_info.id,
    'screen_name': me_info.screen_name,
    'followers_count': me_info.followers_count,
    'following_count': me_info.friends_count,
    'verified': me_info.verified,
    'followers': '',
    'friends': ''}, ignore_index=True)


In [211]:
def get_followers(screen_name):
    global df
    if df.loc[df['screen_name'] == screen_name, 'followers'].values[0] != '':
        print('Already have followers for ' + screen_name)
        return

    ids = []
    for fid in tweepy.Cursor(api.get_follower_ids, screen_name=screen_name, count=5000).items():
        ids.append(fid)

    info = []
    for i in range(0, len(ids), 100):
        try:
            chunk = ids[i:i+100]
            info.extend(api.lookup_users(user_id=chunk))
        except:
            import traceback
            traceback.print_exc()
            print('Something went wrong, skipping...')

    followers_list = {}
    for i in info:
        followers_list[i.id] = i.screen_name
        if i.id not in df['id'].values:
            df = df.append({
                'id': i.id,
                'screen_name': i.screen_name,
                'followers_count': i.followers_count,
                'following_count': i.friends_count,
                'verified': i.verified,
                'followers': '',
                'friends': ''},
                ignore_index=True)

    df.loc[df['screen_name'] == screen_name,
           'followers'] = json.dumps(followers_list)
           


In [212]:
def get_friends(screen_name):
    global df

    if df.loc[df['screen_name'] == screen_name, 'friends'].values[0] != '':
        print('Already have friends for ' + screen_name)
        return 

    ids = []
    for fid in tweepy.Cursor(api.get_friend_ids, screen_name=screen_name, count=5000).items():
        ids.append(fid)
    
    info = []
    for i in range(0, len(ids), 100):
        try:
            chunk = ids[i:i+100]
            info.extend(api.lookup_users(user_id=chunk))
        except:
            import traceback
            traceback.print_exc()
            print('Something went wrong, skipping...')

    followers_list = {}
    for i in info:
        followers_list[i.id] = i.screen_name
        if i.id not in df['id'].values:
            df = df.append({
                'id': i.id,
                'screen_name': i.screen_name,
                'followers_count': i.followers_count,
                'following_count': i.friends_count,
                'verified': i.verified,
                'followers': '',
                'friends': ''},
                ignore_index=True)

    df.loc[df['screen_name'] == screen_name,
           'friends'] = json.dumps(followers_list)
    df = df.drop_duplicates(subset=['id'], keep='first')


In [213]:
def get_mutuals(screen_name):
    global df
    get_followers(screen_name)
    followers = json.loads(
        df.loc[df['screen_name'] == screen_name, 'followers'].values[0])

    get_friends(screen_name)
    friends = json.loads(
        df.loc[df['screen_name'] == screen_name, 'friends'].values[0])
    mutuals = {k: v for k, v in followers.items() if k in friends}
    df.loc[df['screen_name'] == screen_name, 'mutuals'] = json.dumps(mutuals)


[432092673,
 1489371551518629888,
 754080615339876352,
 1500108767299620865,
 1254865098956574722,
 1278907039310364672,
 1161779669173243905,
 1096855563810623494,
 1380566359139094532,
 1270059649916682242,
 1164565837229039616,
 1328007844185174019,
 1018777056606408706,
 1355938346346946572,
 1042889640380981253,
 59096838,
 706098229360926721,
 593797648,
 724792076,
 322537741,
 3345671957,
 2978956055,
 509325593,
 437809435,
 800793090,
 3041376541,
 1864278054,
 381713703,
 542559528,
 1280156928,
 771868716,
 2525683501,
 460535093,
 418158906,
 989329212,
 866759743,
 311362114,
 740014922,
 376781642,
 476938316,
 519400526,
 3095468878,
 1179673934,
 565424467,
 512350036,
 279684948,
 214977622,
 2830172499,
 358939736,
 2153107036,
 743219294,
 210788704,
 3978618465,
 4783081058,
 1219775076,
 250670439,
 237308263,
 403331691,
 1713903732,
 4557974133,
 201738874,
 605579388,
 1876270208,
 965897726629236736,
 1032317176773070848,
 1013893767525552128,
 541166977,
 769

In [None]:
def shortest_path_to_celeb():
    # First create a list of the path of users to reach a celeb
    path = []
    # Define max depth
    max_depth = 3
    # For now, the only rule is that user must be verified
    # Later we can add more rules like the min number of followers
    rules = {'verified': True} # , 'followers_count': 100}
    # Found celebrity
    found = False

    # Starting with me
    current_user = me
    path.append(current_user)
    # Check mutuals and sort by 
    mutuals = json.loads(
        df.loc[df['screen_name'] == current_user, 'mutuals'].values[0])
