In [1]:
from datetime import datetime, timezone

import numpy as np
import pandas as pd

from utils import get_api, get_users_from_user_ids

In [None]:
# Get my followers (not necessary for now)

In [136]:
# Get all followers ids from target big account
api = get_api()
screen_name = 'MrCluni'
follower_ids = iterate_over_cursors(function=api.get_follower_ids, screen_name=screen_name)
print(len(follower_ids))

19126


In [2]:
# Get a subset of followers ids from target big account
api = get_api()
screen_name = 'MrCluni'
follower_ids = api.get_follower_ids(screen_name=screen_name)
print(len(follower_ids))

5000


In [5]:
num_users = 201
followers = get_users_from_user_ids(follower_ids[:num_users])

In [6]:
# Make df of followers in account
important_attributes = ['screen_name', 'location', 'followers_count', 'friends_count', 'created_at', 'favourites_count', 'statuses_count']
data = {k:[] for k in important_attributes}
data['last_status'] = []
for user in followers:
    attributes = user._json
    for attribute in important_attributes:
        data[attribute].append(attributes[attribute])
    last_status = np.nan
    if 'status' in attributes:
        last_status = user.status.created_at
    
    data['last_status'].append(last_status)

followers_df = pd.DataFrame(data)
followers_df['friends_followers_ratio'] = followers_df['friends_count'] / followers_df['followers_count']
followers_df['time_processed'] = datetime.now(timezone.utc)
followers_df['days_since_last_status'] = (followers_df['time_processed'] - followers_df['last_status']).dt.days
followers_df['source_screen_name'] = screen_name


In [7]:
# Get users I already checked
df_users_played = pd.read_csv('./data/users_played.csv')

In [8]:
# Pick users to check
num_users = 10
friends_followers_ratio_threshold = 4
days_since_last_status_threshold = 0
friends_threshold = 30

friends_followers_ratio_mask = followers_df['friends_followers_ratio'] > friends_followers_ratio_threshold
days_since_last_status_mask = followers_df['days_since_last_status'] <= days_since_last_status_threshold
friends_mask = followers_df['friends_count'] > friends_threshold
not_played_mask = ~followers_df['screen_name'].isin(df_users_played['screen_name'])

df_to_follow = followers_df[friends_followers_ratio_mask & days_since_last_status_mask & friends_mask & not_played_mask].sample(num_users)

In [9]:
# View users to play
df_to_follow

Unnamed: 0,screen_name,location,followers_count,friends_count,created_at,favourites_count,statuses_count,last_status,friends_followers_ratio,time_processed,days_since_last_status,source_screen_name
85,Hernan_Cai1977,"Buenos Aires, Argentina",176,1711,Fri Nov 19 09:41:30 +0000 2021,28,23,2021-11-24 10:57:03+00:00,9.721591,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
92,GonzaloVigo3,,47,229,Mon Mar 09 20:59:06 +0000 2020,14843,2448,2021-11-24 11:16:28+00:00,4.87234,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
192,debichiafala,,131,875,Thu Feb 18 16:56:38 +0000 2010,3560,1601,2021-11-23 18:34:36+00:00,6.679389,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
114,Alexa93546545,,1,130,Fri Nov 19 13:02:05 +0000 2021,54,29,2021-11-24 02:45:14+00:00,130.0,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
107,AleIncubus2,,73,584,Fri Mar 31 17:32:09 +0000 2017,1669,965,2021-11-24 02:44:57+00:00,8.0,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
100,Sole83311655,,16,589,Wed Dec 13 23:20:29 +0000 2017,2467,3525,2021-11-24 03:47:30+00:00,36.8125,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
4,enritone,"Buenos Aires, Argentina",61,599,Wed May 25 17:46:04 +0000 2011,2730,920,2021-11-23 17:33:47+00:00,9.819672,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
95,nicobacker,"London, England",367,3230,Mon Mar 02 04:48:39 +0000 2009,18580,4257,2021-11-23 17:33:33+00:00,8.80109,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
13,SSpock3,,15,205,Tue Nov 23 23:18:28 +0000 2021,21,31,2021-11-24 13:02:01+00:00,13.666667,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
134,Sofibhache,,24,549,Fri Nov 19 01:39:54 +0000 2021,121,17,2021-11-24 04:04:26+00:00,22.875,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni


In [10]:
# Save users played
df_to_follow.to_csv('./data/users_played.csv', mode='a', index=False, header=False)

In [11]:
# View users played
df_users_played = pd.read_csv('./data/users_played.csv')
df_users_played

Unnamed: 0,screen_name,location,followers_count,friends_count,created_at,favourites_count,statuses_count,last_status,friends_followers_ratio,time_processed,days_since_last_status,source_screen_name
0,Alejarmex,"Playa del Carmen, México.",242,1079,Mon May 24 19:45:51 +0000 2010,22235,8747,2021-11-23 20:21:26+00:00,4.458677685950414,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
1,GinestarMatias,República Unitaria de Córdoba,72,638,Sun Dec 16 19:37:06 +0000 2018,1509,4119,2021-11-23 18:40:27+00:00,8.86111111111111,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
2,ROSADUR85441457,,17,83,Thu Nov 18 17:22:05 +0000 2021,20,23,2021-11-23 19:46:37+00:00,4.882352941176471,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
3,RafaelR85635114,,1,194,Sun Nov 21 14:54:37 +0000 2021,45,8,2021-11-23 11:05:37+00:00,194.0,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
4,catmatadorcba,Cordoba Argentina,707,5009,Sun Oct 10 02:51:55 +0000 2010,67234,26575,2021-11-23 21:29:13+00:00,7.084865629420085,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
5,screen_name,location,followers_count,friends_count,created_at,favourites_count,statuses_count,last_status,friends_followers_ratio,time_processed,days_since_last_status,source_screen_name
6,VegetaLiberal,,215,4739,Thu May 06 01:51:06 +0000 2021,3,3730,2021-11-23 20:42:15+00:00,22.04186046511628,2021-11-24 00:19:06.168127+00:00,0.0,MrCluni
7,screen_name,location,followers_count,friends_count,created_at,favourites_count,statuses_count,last_status,friends_followers_ratio,time_processed,days_since_last_status,source_screen_name
8,Hernan_Cai1977,"Buenos Aires, Argentina",176,1711,Fri Nov 19 09:41:30 +0000 2021,28,23,2021-11-24 10:57:03+00:00,9.721590909090908,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
9,GonzaloVigo3,,47,229,Mon Mar 09 20:59:06 +0000 2020,14843,2448,2021-11-24 11:16:28+00:00,4.872340425531915,2021-11-24 13:15:10.730720+00:00,0.0,MrCluni
