In [13]:
import json
import pandas as pd
import tweepy
import networkx as nx
import numpy as np
import time

In [2]:
with open("twitter_credentials.json", "r") as file:
    data = file.read()
    
credentials = json.loads(data)
client = tweepy.Client(bearer_token=credentials["BEARER_TOKEN"])

In [3]:
# build composite network and remove duplicates

df = pd.read_csv('following_list_catcontentonly.csv')
df = pd.concat([df, pd.read_csv('following_list_BenjaminNorton.csv')], ignore_index=True)
df = pd.concat([df, pd.read_csv('following_list_qaomene.csv')], ignore_index=True)
df = df.drop_duplicates(ignore_index=True)

In [4]:
G = nx.from_pandas_edgelist(df, 'source', 'target')

G.number_of_nodes()

675793

In [5]:
def get_follower_count(id):
    c = client.get_user(id = id, user_fields = ["public_metrics"])
    if c.data:
        return c.data.public_metrics['followers_count']
    else:
        return None

In [6]:
def get_name(id):
    c = client.get_user(id = id)
    if c.data:
        return c.data.username
    else:
        return None

In [7]:
num_nodes = 500

G_sorted = pd.DataFrame(sorted(G.degree, key=lambda x: x[1], reverse=True))
G_sorted.index.name = 'Rank'
G_sorted.columns = ['user_id','degree']
G_sorted = G_sorted.astype({'user_id': 'int64'}) # cast the nconst
top_central = G_sorted.head(num_nodes)

top_central

Unnamed: 0_level_0,user_id,degree
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2433526788,1540
1,36511308,1379
2,240462058,1376
3,150151712,1374
4,1634248890,1372
...,...,...
495,34798360,1029
496,26784152,1029
497,1104547601880436736,1028
498,16156174,1028


In [61]:
user_id = client.get_user(username='qaomene').data
if user_id:
    row = G_sorted.loc[G_sorted['user_id'] == user_id.id]
    print(row[['degree']])


      degree
Rank        
0       1540


In [8]:
pd.options.mode.chained_assignment = None  # default='warn', turns off warnings
# top_central['follower_count'] = top_central.apply(lambda row: client.get_user(id = int(row.nconst), user_fields = ["public_metrics"]).data.public_metrics['followers_count'] if client.get_user(id = int(row.nconst)).data else None, axis = 1)
#top_central['follower_count'] = top_central.apply(lambda row: get_follower_count(int(row.user_id)), axis = 1)
top_central['username'] = top_central.apply(lambda row: get_name(int(row.user_id)), axis = 1)

TooManyRequests: 429 Too Many Requests

In [14]:
top_central_top = top_central[:250]
top_central_bottom = top_central[250:]

time.sleep(900)
top_central_top['username'] = top_central_top.apply(lambda row: get_name(int(row.user_id)), axis = 1)
print(top_central_top)
time.sleep(900)
top_central_bottom['username'] = top_central_bottom.apply(lambda row: get_name(int(row.user_id)), axis = 1)

                 user_id  degree         username
Rank                                             
0             2433526788    1540          qaomene
1               36511308    1379      MomodouTaal
2              240462058    1376       nickwestes
3              150151712    1374     vijayprashad
4             1634248890    1372  kenklippenstein
...                  ...     ...              ...
245             15503210    1069       Antiwarcom
246   735231724976738304    1069     gumby4christ
247            534675241    1069   Dreamdefenders
248             21539378    1069     Srirachachau
249   756925335757729792    1068        IronProle

[250 rows x 3 columns]


In [26]:
top_central = top_central_top + top_central_bottom
top_central = pd.concat([top_central_top, top_central_bottom])
top_500 = top_central.reset_index()[['user_id', 'username']]
top_500

Unnamed: 0,user_id,username
0,2433526788,qaomene
1,36511308,MomodouTaal
2,240462058,nickwestes
3,150151712,vijayprashad
4,1634248890,kenklippenstein
...,...,...
495,34798360,piedadcordoba
496,26784152,Renegade_Inc
497,1104547601880436736,Liv_Agar
498,16156174,DavidpStein


In [28]:
top_500.to_csv("top_500.csv", index=False)

# Normalization Tests

In [65]:
top_central['deg_by_count'] = top_central['degree'] / top_central['follower_count']

In [66]:
top_central.sort_values(by=['deg_by_count'], ascending=False)

Unnamed: 0_level_0,user_id,degree,follower_count,deg_by_count
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
162,1155901481624768512,1100,708.0,1.553672
8,761494303688495104,1354,2677.0,0.505790
145,1247981876129693696,1108,2603.0,0.425663
189,2904138960,1091,2600.0,0.419615
190,267305922,1090,3327.0,0.327622
...,...,...,...,...
155,807095,1104,51702761.0,0.000021
82,759251,1153,56343096.0,0.000020
129,1468029408535453696,1116,,
154,1416921060788801536,1105,,


In [67]:
top_central['deg_by_log_count'] = top_central['degree'] / np.log(top_central['follower_count'])

In [68]:
top_central.sort_values(by=['deg_by_log_count'], ascending=False).head(10)

Unnamed: 0_level_0,user_id,degree,follower_count,deg_by_count,deg_by_log_count
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,761494303688495104,1354,2677.0,0.50579,171.556316
162,1155901481624768512,1100,708.0,1.553672,167.620476
1,36511308,1379,5621.0,0.24533,159.712497
24,1354814020751036416,1259,7192.0,0.175056,141.767712
145,1247981876129693696,1108,2603.0,0.425663,140.887696
189,2904138960,1091,2600.0,0.419615,138.746406
69,1116129122,1164,4471.0,0.260344,138.482942
0,2433526788,1540,85662.0,0.017978,135.585286
190,267305922,1090,3327.0,0.327622,134.404852
120,815376043007561728,1120,4717.0,0.237439,132.40448


In [69]:
log_base = 1.021

top_central['log_follower_count'] = np.log(top_central['follower_count'])/np.log(log_base)
top_central['deg_minus_log_follower'] = top_central['degree'] - top_central['log_follower_count']

In [70]:
top_central.sort_values(by=['deg_minus_log_follower'], ascending=False).head(10)

Unnamed: 0_level_0,user_id,degree,follower_count,deg_by_count,deg_by_log_count,log_follower_count,deg_minus_log_follower
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2433526788,1540,85662.0,0.017978,135.585286,546.524392,993.475608
8,761494303688495104,1354,2677.0,0.50579,171.556316,379.763607,974.236393
1,36511308,1379,5621.0,0.24533,159.712497,415.457649,963.542351
6,1658279324,1367,34100.0,0.040088,130.975673,502.202959,864.797041
2,240462058,1376,73853.0,0.018632,122.749387,539.387022,836.612978
24,1354814020751036416,1259,7192.0,0.175056,141.767712,427.316629,831.683371
3,150151712,1374,98706.0,0.01392,119.479289,553.344368,820.655632
5,88702710,1369,141458.0,0.009678,115.432371,570.659727,798.340273
162,1155901481624768512,1100,708.0,1.553672,167.620476,315.767195,784.232805
20,195306655,1272,36492.0,0.034857,121.086946,505.465104,766.534896
