In [1]:

import pandas as pd
import datetime
import time
import json
import functions_twitter

# Read input.csv with interested twitter_username.
users_data_df = pd.read_csv(f'output_0/twitter_summary_df_0.tsv', sep = '\t', usecols = ['username', 'id', 'public_metrics.following_count'])

# Read list of secrets. Due to rate limit, multiple twitter developer accounts are recommended.
secrets_pool = pd.read_csv('twitter_dev_secrets.csv', index_col = False)
bearer_tokens = list(secrets_pool['bearer_token'])

# Filter twitter_usernames with non-zero following_count
users_data_df_nonzero = users_data_df[(users_data_df['public_metrics.following_count'] > 0)]

# Rank twitter_usernams by following_count in descending order
users_data_df_nonzero.sort_values('public_metrics.following_count', ascending = True, inplace = True)
users_data_df_nonzero.reset_index(drop=True, inplace=True)

# Calculate total users and following included in analysis
total_users_count = len(users_data_df_nonzero)
total_following = users_data_df_nonzero['public_metrics.following_count'].sum()

total_bearer_token_count = len(bearer_tokens)
request_left = 15 # Request limit for GET /2/users/:id/following is 15 requests per 15-minute window (app auth)
following_per_batch = 1000 # per 15minutes
rate_of_req_hrly = total_bearer_token_count*request_left*following_per_batch*4

est_time_req_hr = total_following/rate_of_req_hrly

text = f"Total number of Twitter Developer Accounts = {total_bearer_token_count}\nTotal number of Twitter Usernames = {total_users_count}\nTotal number of following = {total_following}\nEstimated time required for analysis = {est_time_req_hr:.2f} hrs"
#open text file
text_file = open(f"output_0/analytics_notice.txt", "w")
#write string to file
text_file.write(text)
#close file
text_file.close()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users_data_df_nonzero.sort_values('public_metrics.following_count', ascending = True, inplace = True)


In [2]:
bearer_tokens_catridge = bearer_tokens.copy()
bearer_token = bearer_tokens_catridge.pop(0)

In [3]:
users_following = []
error_log = []

In [4]:
# request_left = 15 # Request limit for GET /2/users/:id/following is 15 requests per 15-minute window (app auth)
rate_limit_reset_time = time.mktime(datetime.datetime.now().timetuple()) + 1000 # timestamp when bearer_token_catridge restarted

for index, row in users_data_df_nonzero[85485:].iterrows():
    user_following = []
    origin_id = row['id']
    origin_username = row['username']
    nxt_page = True
    nxt_token = ''
    while nxt_page:
        print(f'{index} - {origin_username} | {len(bearer_tokens_catridge)} - {request_left}')
        if request_left > 0:
            request, request_left = functions_twitter.twitter_get_following_by_users(origin_id, nxt_token, bearer_token)
            if request.status_code == 200:
                try:
                    request_json = request.json()
                    user_following += request_json['data']
                    request_meta = request_json['meta']
                    if 'next_token' in request_meta.keys(): # There are more following under username
                        nxt_page = True # Continue with while loop
                        nxt_token = f'&pagination_token={request_meta["next_token"]}' # Save pagination token
                    else:
                        nxt_page = False # Stop iteration of current username
                except KeyError:
                    print(f'{origin_username} - Might be a private account. No access allowed.')
                    error_detail = {'error' : f'Might be a private account. No access allowed.', 'username' : origin_username}
                    error_log.append(error_detail)
                    nxt_page = False # Stop iteration of current username
            else: # Requests failed. Did not get full list of following of username
                print(f'{origin_username} - Incomplete run. Did not finish extracting all followers')
                error_detail = {'error' : f'Incomplete run. Did not finish extracting all following', 'username' : origin_username}
                error_log.append(error_detail)
                nxt_page = False # Stop iteration of current username
        else:
            request_reset_unixtime = float(request.headers['x-rate-limit-reset']) # timestamp when the request limit for bearer token is expected to reset.
            rate_limit_reset_time = min(request_reset_unixtime,rate_limit_reset_time)
            bearer_tokens_catridge, bearer_token, request_left, rate_limit_reset_time = functions_twitter.catridge_reset(rate_limit_reset_time, bearer_tokens_catridge, bearer_tokens, 15)
    if len(user_following) == 0:
        continue
    else:
        user_following = [dict(item, **{'origin_username':origin_username}) for item in user_following]
        users_following += user_following

85485 - darshpreetsingh | 35 - 15
85485 - darshpreetsingh | 35 - 14
85485 - darshpreetsingh | 35 - 13
85486 - Khanchai256 | 35 - 12
Khanchai256 - Might be a private account. No access allowed.
85487 - LadenSyed | 35 - 11
85487 - LadenSyed | 35 - 10
85487 - LadenSyed | 35 - 9
85488 - EVfortheM | 35 - 8
85488 - EVfortheM | 35 - 7
85488 - EVfortheM | 35 - 6
85489 - JMSSS2608 | 35 - 5
85489 - JMSSS2608 | 35 - 4
85489 - JMSSS2608 | 35 - 3
85490 - 0xbilly | 35 - 2
85490 - 0xbilly | 35 - 1
85490 - 0xbilly | 35 - 0
85490 - 0xbilly | 34 - 15
85491 - SLadington | 34 - 14
85491 - SLadington | 34 - 13
85491 - SLadington | 34 - 12
85492 - eashin95016 | 34 - 11
85492 - eashin95016 | 34 - 10
85492 - eashin95016 | 34 - 9
85493 - Ilbart00 | 34 - 8
85493 - Ilbart00 | 34 - 7
85493 - Ilbart00 | 34 - 6
85494 - LanaO67 | 34 - 5
85494 - LanaO67 | 34 - 4
85494 - LanaO67 | 34 - 3
85495 - domainpay | 34 - 2
85495 - domainpay | 34 - 1
85495 - domainpay | 34 - 0
85495 - domainpay | 33 - 15
85496 - imCuPunk | 33 -

In [None]:
# users_following_df = pd.DataFrame(users_following)
# error_log_df = pd.DataFrame(error_log)

In [None]:
# users_following_df.to_csv('output_0/users_following_df_0_part2.tsv', sep = '\t')
# error_log_df.to_csv('output_0/error_log_df_0_part2.tsv', sep = '\t')

In [5]:
import json

with open("output_0/users_following_df_0_part7.json", "w") as outfile:
    json.dump(users_following, outfile)
    
with open("output_0/error_log_df_0_part7.json", "w") as outfile:
    json.dump(error_log, outfile)