In [2]:

import pandas as pd
import datetime
import time
import json
import functions_twitter

# Read input.csv with interested twitter_username.
users_data_df = pd.read_csv(f'output_0/twitter_summary_df_0.tsv', sep = '\t', usecols = ['username', 'id', 'public_metrics.following_count'])

# Read list of secrets. Due to rate limit, multiple twitter developer accounts are recommended.
secrets_pool = pd.read_csv('twitter_dev_secrets.csv', index_col = False)
bearer_tokens = list(secrets_pool['bearer_token'])

# Filter twitter_usernames with non-zero following_count
users_data_df_nonzero = users_data_df[(users_data_df['public_metrics.following_count'] > 0)]

# Rank twitter_usernams by following_count in descending order
users_data_df_nonzero.sort_values('public_metrics.following_count', ascending = True, inplace = True)
users_data_df_nonzero.reset_index(drop=True, inplace=True)

# Calculate total users and following included in analysis
total_users_count = len(users_data_df_nonzero)
total_following = users_data_df_nonzero['public_metrics.following_count'].sum()

total_bearer_token_count = min(20, len(bearer_tokens)) # Max cap is at 20 bearer_tokens
request_left = 15 # Request limit for GET /2/users/:id/following is 15 requests per 15-minute window (app auth)
following_per_batch = 1000 # per 15minutes
rate_of_req_hrly = total_bearer_token_count*request_left*following_per_batch*4

est_time_req_hr = total_following/rate_of_req_hrly

text = f"Total number of Twitter Developer Accounts = {total_bearer_token_count}\nTotal number of Twitter Usernames = {total_users_count}\nTotal number of following = {total_following}\nEstimated time required for analysis = {est_time_req_hr:.2f} hrs"
#open text file
text_file = open(f"output_0/analytics_notice.txt", "w")
#write string to file
text_file.write(text)
#close file
text_file.close()

bearer_tokens_catridge = bearer_tokens.copy()
bearer_token = bearer_tokens_catridge.pop(0)
# request_left = 15 # Request limit for GET /2/users/:id/following is 15 requests per 15-minute window (app auth)
rate_limit_reset_time = time.mktime(datetime.datetime.now().timetuple()) + 1000 # timestamp when bearer_token_catridge restarted

users_following = []
error_log = []

for index, row in users_data_df_nonzero.iterrows():
    user_following = []
    origin_id = row['id']
    origin_username = row['username']
    nxt_page = True
    nxt_token = ''
    while nxt_page:
        print(f'{index} - {origin_username} | {len(bearer_tokens_catridge)} - {request_left}')
        if request_left > 0:
            request, request_left = functions_twitter.twitter_get_following_by_users(origin_id, nxt_token, bearer_token)
            if request.status_code == 200:
                try:
                    request_json = request.json()
                    user_following += request_json['data']
                    request_meta = request_json['meta']
                    if 'next_token' in request_meta.keys(): # There are more following under username
                        nxt_page = True # Continue with while loop
                        nxt_token = f'&pagination_token={request_meta["next_token"]}' # Save pagination token
                    else:
                        nxt_page = False # Stop iteration of current username
                except KeyError:
                    error_detail = {'error' : f'Might be a private account. No access allowed.', 'username' : origin_username}
                    error_log.append(error_detail)
                    nxt_page = False # Stop iteration of current username
            else: # Requests failed. Did not get full list of following of username
                error_detail = {'error' : f'Incomplete run. Did not finish extracting all following', 'username' : origin_username}
                error_log.append(error_detail)
                nxt_page = False # Stop iteration of current username
        else:
            request_reset_unixtime = float(request.headers['x-rate-limit-reset']) # timestamp when the request limit for bearer token is expected to reset.
            rate_limit_reset_time = min(request_reset_unixtime,rate_limit_reset_time)
            bearer_tokens_catridge, bearer_token, request_left, rate_limit_reset_time = functions_twitter.catridge_reset(rate_limit_reset_time, bearer_tokens_catridge, bearer_tokens, 15)
    if len(user_following) == 0:
        continue
    else:
        user_following = [dict(item, **{'origin_username':origin_username}) for item in user_following]
        users_following += user_following

users_following_df = pd.DataFrame(users_following)
users_following_df.to_csv('output_0/users_following_df_0.csv')

error_log_df = pd.DataFrame(error_log)
error_log_df.to_csv('output_0/error_log_df_0.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users_data_df_nonzero.sort_values('public_metrics.following_count', ascending = True, inplace = True)


0 - VictorV87691741
1 - DonJonRambo
2 - stupakvip
3 - zkSync01
4 - Operskie0
5 - AP39056879
6 - OleksanderKrav1
7 - MedyFede
8 - gmx_rekt
9 - LukaszZiebaa
10 - Bob99161782
11 - Infcamtecnologi
12 - lorenzocioc
13 - ZinhagelM
14 - GmxDailyActions
15 - itspureduh
16 - maks20000906
17 - CoinWizard7
18 - LucasTorrente8
19 - Ciroking99
20 - InuArbitrum
21 - HNY_Optimism
22 - bandimohangoud
23 - ElBu34714398
24 - KriedemannSven
25 - OoijenPeter
26 - garcevic_j
27 - lucoluc1993
28 - Lucianaaraujo07
29 - gmxunstakebot
30 - KelvinMoronari
31 - Kameha_Pools
32 - LuizFer2470527
33 - Jasspre81383334
34 - StacyMa04397816
35 - ShamsInayat
36 - Vmagalhaes90
37 - BasicoFoda
38 - dazzsnow
39 - MarcosV01810246
40 - BlueberryPulse
41 - Nils34172202
42 - DsvDegen
43 - Thx2US
44 - Nhat313
45 - gmxalerts
46 - CryptoYoked
47 - NikitaSyzonenko
48 - Minhdoa42226205
49 - bRawlaphant
50 - DavidBa29515829
51 - nammyio
52 - ViolagoKathy
53 - 0xMitu
54 - accidental_toad
55 - HoodlifeClub
56 - ShaunSeah2
57 - weiqia

KeyboardInterrupt: 

In [5]:
users_following_df

Unnamed: 0,id,name,username,origin_username
0,1543921444131930113,The Blueberry Podcast 🎙 🫐,TheBerryPodcast,GMX_IO
1,1491005700750942208,Blueberry Pulse 🫐,BlueberryPulse,GMX_IO
2,1458678055644368900,GMX Blueberry Club 🫐,GBlueberryClub,GMX_IO
