In [79]:
%reset -f
import tweepy
import pandas as pd
import json
import warnings
import time
import os

warnings.simplefilter(action='ignore', category=FutureWarning)


In [80]:
# Load Twitter credentials
api_keys = json.load(open('twitter-api-keys.local.json'))
bearer_token = api_keys["bearer_token"]
client = tweepy.Client(
    bearer_token=api_keys['bearer_token'], wait_on_rate_limit=True)

auth = tweepy.OAuthHandler(
    consumer_key=api_keys['api_key'],
    consumer_secret=api_keys['api_key_secret'],
    access_token=api_keys['access_token'],
    access_token_secret=api_keys['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True)

me = 'Meescapo'


In [81]:
filename = 'twitter.csv'
if filename in os.listdir('.'):
    df = pd.read_csv(filename)
else:
    df = pd.DataFrame(columns=[
        'id',
        'screen_name',
        'followers_count',
        'following_count',
        'verified',
        'followers',
        'friends',
        'mutuals'])


In [82]:
# Get my info and fill in the dataframe
# check if my info is already in the dataframe
if me not in df['screen_name'].values:
    me_info = api.get_user(screen_name=me)
    df = df.append({
        'id': me_info.id,
        'screen_name': me_info.screen_name,
        'followers_count': me_info.followers_count,
        'following_count': me_info.friends_count,
        'verified': me_info.verified,
        'followers': '',
        'friends': ''}, ignore_index=True)


In [83]:
def get_followers(screen_name):
    start = time.time()
    print('Getting followers')
    global df
    if len(df['screen_name'] == screen_name) > 0 and df.loc[df['screen_name'] == screen_name, 'followers'].values[0] != '':
        print('- Already have followers for ' + screen_name)
        return

    ids = []
    for fid in tweepy.Cursor(api.get_follower_ids, screen_name=screen_name, count=5000).items():
        ids.append(fid)

    info = []
    for i in range(0, len(ids), 100):
        try:
            chunk = ids[i:i+100]
            print('- Getting followers info for chunk ' + str(i) + ' to ' + str(i+100) + ' of ' + str(len(ids)) + ' for ' + screen_name)
            info.extend(api.lookup_users(user_id=chunk))
        except:
            import traceback
            traceback.print_exc()
            print('Something went wrong, skipping...')

    followers_list = {}
    for i in info:
        followers_list[i.id] = i.screen_name
        if i.id not in df['id'].values:
            df = df.append({
                'id': i.id,
                'screen_name': i.screen_name,
                'followers_count': i.followers_count,
                'following_count': i.friends_count,
                'verified': i.verified,
                'followers': '',
                'friends': ''},
                ignore_index=True)

    df.loc[df['screen_name'] == screen_name,
           'followers'] = json.dumps(followers_list)
    df.to_csv(filename, index=False)
    end = time.time()
    print('Done getting followers in {} seconds'.format(end - start))
           


In [84]:
def get_friends(screen_name):
    start = time.time()
    print('Getting friends')
    global df

    # check if df has any row

    if len(df['screen_name'] == screen_name) > 0 and df.loc[df['screen_name'] == screen_name, 'friends'].values[0] != '':
        print('- Already have friends for ' + screen_name)
        return 

    ids = []
    for fid in tweepy.Cursor(api.get_friend_ids, screen_name=screen_name, count=5000).items():
        ids.append(fid)
    
    info = []
    for i in range(0, len(ids), 100):
        try:
            chunk = ids[i:i+100]
            # print chunk and len of ids
            print('- Getting friends info for chunk ' + str(i) + ' to ' + str(i+100) + ' of ' + str(len(ids)) + ' for ' + screen_name)
            info.extend(api.lookup_users(user_id=chunk))
        except:
            import traceback
            traceback.print_exc()
            print('Something went wrong, skipping...')

    followers_list = {}
    for i in info:
        followers_list[i.id] = i.screen_name
        if i.id not in df['id'].values:
            df = df.append({
                'id': i.id,
                'screen_name': i.screen_name,
                'followers_count': i.followers_count,
                'following_count': i.friends_count,
                'verified': i.verified,
                'followers': '',
                'friends': ''},
                ignore_index=True)

    df.loc[df['screen_name'] == screen_name,
           'friends'] = json.dumps(followers_list)
    df.to_csv(filename, index=False)
    end = time.time()
    print('Done getting friends in {} seconds'.format(end - start))


In [85]:
def get_mutuals(screen_name):
    global df
    get_followers(screen_name)
    followers = json.loads(
        df.loc[df['screen_name'] == screen_name, 'followers'].values[0])

    get_friends(screen_name)
    friends = json.loads(
        df.loc[df['screen_name'] == screen_name, 'friends'].values[0])
    mutuals = {k: v for k, v in followers.items() if k in friends}
    df.loc[df['screen_name'] == screen_name, 'mutuals'] = json.dumps(mutuals)

In [99]:
def build_mutuals_df(screen_name):
    global df
    mutuals_df = pd.DataFrame(columns=['id', 'screen_name'])
    mutuals_json = json.loads(
        df.loc[df['screen_name'] == screen_name, 'mutuals'].values[0])
    for k, v in mutuals_json.items():
        mutuals_df = mutuals_df.append(
            {'id': int(k), 'screen_name': v}, ignore_index=True)
    # for every id in mutuals_df, add the values from df columns followers_count, following_count, verified
    for i in mutuals_df['id'].values:
        mutuals_df.loc[mutuals_df['id'] == i, 'followers_count'] = df.loc[df['id'] == i, 'followers_count'].values[0]
        mutuals_df.loc[mutuals_df['id'] == i, 'following_count'] = df.loc[df['id'] == i, 'following_count'].values[0]
        mutuals_df.loc[mutuals_df['id'] == i, 'verified'] = df.loc[df['id'] == i, 'verified'].values[0]

    return mutuals_df

In [None]:
def shortest_path_to_celeb():
    global df
    start = time.time()
    print('Getting shortest path to celeb')
    path = []
    max_depth = 2
    rules = {'verified': True}
    found = False

    # Starting with me
    current_user = me

    i = 0

    while ((len(path) < max_depth) and (not found) or i < 5):
        i += 1
        print('Iteration {}'.format(i))
        print('Current user: {}'.format(current_user))
        path.append(current_user)
        get_mutuals(current_user)
        mutuals_df = build_mutuals_df(current_user)

        # Check if any user in mutuals_df matches rules
        mutuals_matching = []
        for index, row in mutuals_df.iterrows():
            if 'verified' in rules:
                if row['verified'] == rules['verified']:
                    mutuals_matching.append(row['screen_name'])
        print('Matching users: {}'.format(mutuals_matching))
        # if no matching users, get the first user from mutuals_df and iterate again
        current_user = mutuals_df.iloc[0]['screen_name']

        print('Built path: {}'.format(path))

    end = time.time()
    print('Shortest path found in {} seconds'.format(end - start))


x = shortest_path_to_celeb()


In [100]:
mutuals = build_mutuals_df(me)

In [101]:
# sort mutuals by followers_count
mutuals.sort_values(by=['followers_count'], ascending=False)

Unnamed: 0,id,screen_name,followers_count,following_count,verified
1644,192945691,Loulogio_Pi,310410.0,1320.0,True
1224,18939115,vodafone_es,215767.0,25732.0,True
992,1092023714286370818,skereunpesado,208870.0,992.0,False
1055,399531287,joseluismolinas,171259.0,188110.0,False
346,850531868990730240,eduhaueroficial,136829.0,42576.0,False
...,...,...,...,...,...
2,1326914374167105536,Maariaces,18.0,34.0,False
839,1040313376025575424,lolaaxcf_,17.0,35.0,False
0,782944944474484736,Klorito__,15.0,101.0,False
124,1315743842809917441,inlovewithred_,12.0,32.0,False


In [109]:
#get followers
f = json.loads(df.loc[df['screen_name'] == me, 'followers'].values[0])
f

{'1528166517233307648': 'LlivolMarc',
 '84703938': 'Jhonny_CM01',
 '1328762980968828928': 'DjanTry',
 '1503861554407686149': 'Lamedor25',
 '2952886942': 'salaza18_david',
 '223084671': 'Deadsmush',
 '313623514': 'azul_chavez',
 '782944944474484736': 'Klorito__',
 '808732269527625728': 'jotikaaaa18',
 '1512405428453494785': 'dpmgrimaldo1103',
 '1326914374167105536': 'Maariaces',
 '2462072906': 'charo2321',
 '1008380375146618883': 'sarasvnt',
 '1009595797': 'Aitorpt22',
 '1282453412387397632': 'punkycloud_',
 '1499130079938596865': 'LuisDanielSaa12',
 '1361251818408394752': 'asdfka5',
 '1514707687493320704': 'Fkeybroke',
 '383508361': 'elputoale',
 '1505799166433415170': 'bangaloreputa',
 '2242693404': 'Schrodinguer65',
 '1136704665750573056': 'paumina',
 '1266492932812615683': 'laugandara',
 '1140717485450285056': 'Wikidrew_',
 '3160292008': 'martalsks',
 '528875739': 'victoriarodrgzz',
 '1129726986790887429': 'ArismalNox',
 '1212441683029217280': 'miiriamexx',
 '323326907': 'Mordek_89'