In [99]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

In [100]:
db_tweets = pd.read_csv('../data/db_annotated_tweets.csv', index_col=False)
db_tweets = db_tweets.fillna('')

db_user_tweets = pd.read_csv('../data/db_annotated_user_tweets.csv', index_col=False)
db_user_tweets = db_user_tweets.fillna('')

db_tweets = pd.concat([db_tweets, db_user_tweets])

db_tweets['UserName'] = db_tweets['UserName'].apply(lambda x: x.replace('@', '').replace(' ', ''))
db_tweets['retweet'] = db_tweets['retweet'].apply(lambda x: x.replace('@', '').replace(' ', ''))

  interactivity=interactivity, compiler=compiler, result=result)


In [101]:
db_tweets['MentionedUser'] = db_tweets['mentions'].apply(lambda x: [i.split(',')[0] for i in x.split(';')])
mentions = db_tweets.explode('MentionedUser')[['UserName', 'MentionedUser']]
mentions['MentionedUser'] = mentions['MentionedUser'].apply(lambda x: x.replace('@', '').replace(' ', ''))
mentions = mentions[(mentions['UserName'] != '') & (mentions['MentionedUser'] != '') & (mentions['MentionedUser'] != mentions['UserName'])]
mention_counts = mentions.groupby(['UserName', 'MentionedUser'])['UserName'].count().to_frame().rename(columns={'UserName':'count'}).reset_index()
mention_counts

Unnamed: 0,UserName,MentionedUser,count
0,)1766,816AJ18,1
1,)1766,russiancosmist,1
2,000Dillon000,CBedfordDC,1
3,001Prometheus,Guns_Velez,1
4,001Prometheus,NoNutNoSlut,1
...,...,...,...
261200,zzzdogman,eveforamerica,1
261201,zzzdogman,rezazia,1
261202,zzzjamieg,RadioFreeTom,1
261203,zzzuperior,cityafreaks,1


In [102]:
mention_counts['weights'] = mention_counts.groupby('UserName')['count'].apply(lambda x: (x-min(x))/(max(x)-min(x)))
mention_counts = mention_counts.fillna(1.0)
mention_counts.sort_values(by='weights', ascending=False)

Unnamed: 0,UserName,MentionedUser,count,weights
0,)1766,816AJ18,1,1.0
162361,antisophistry,JonathanTurley,1,1.0
162328,antidealogue,Dadabase2,1,1.0
162329,antidealogue,jordanbpeterson,1,1.0
162330,antidogeatdog,theblaze,1,1.0
...,...,...,...,...
224831,mrotterpockets,DonutOperator,1,0.0
224832,mrotterpockets,IRS_CI,1,0.0
224833,mrotterpockets,MrAndyNgo,1,0.0
224834,mrotterpockets,PhantomShitlord,1,0.0


In [103]:
retweets = db_tweets[db_tweets['retweet'] != '']
retweet_counts = retweets.groupby(['UserName', 'retweet'])['UserName'].count().to_frame().rename(columns={'UserName':'count'}).reset_index()
retweet_counts

Unnamed: 0,UserName,retweet,count
0,000Dillon000,CBedfordDC,1
1,0045James,imUrB00gieman,1
2,0071Holly,fake_biden,1
3,008moonside,DougCameron51,1
4,00Lilac_Sky00,WalterDavis4L,1
...,...,...,...
129466,zzz_das,FAsintomatico,1
129467,zzzdogman,McKaylaRoseJ,1
129468,zzzdogman,eveforamerica,1
129469,zzzdogman,rezazia,1


In [104]:
retweet_counts['count'].value_counts()

1      124566
2        3897
3         634
4         200
5          79
6          36
7          17
8          13
9          10
11          3
12          3
10          3
13          2
71          1
16          1
17          1
20          1
21          1
22          1
30          1
303         1
Name: count, dtype: int64

In [105]:
retweet_counts['weights'] = retweet_counts.groupby('UserName')['count'].apply(lambda x: (x-min(x))/(max(x)-min(x)))
retweet_counts

Unnamed: 0,UserName,retweet,count,weights
0,000Dillon000,CBedfordDC,1,
1,0045James,imUrB00gieman,1,
2,0071Holly,fake_biden,1,
3,008moonside,DougCameron51,1,
4,00Lilac_Sky00,WalterDavis4L,1,
...,...,...,...,...
129466,zzz_das,FAsintomatico,1,
129467,zzzdogman,McKaylaRoseJ,1,
129468,zzzdogman,eveforamerica,1,
129469,zzzdogman,rezazia,1,


In [106]:
retweet_counts['weights'].value_counts()

0.000000    19992
1.000000     3171
0.500000      356
0.333333      113
0.250000       56
0.666667       30
0.142857       27
0.200000       25
0.166667       13
0.062500       11
0.285714       10
0.750000        9
0.125000        8
0.400000        4
0.066667        4
0.052632        3
0.428571        3
0.800000        3
0.111111        3
0.090909        3
0.083333        3
0.100000        2
0.571429        2
0.714286        2
0.181818        2
0.014286        1
0.857143        1
0.187500        1
0.210526        1
0.105263        1
0.272727        1
0.444444        1
0.133333        1
0.157895        1
Name: weights, dtype: int64

In [107]:
retweet_counts.fillna(1.0)

Unnamed: 0,UserName,retweet,count,weights
0,000Dillon000,CBedfordDC,1,1.0
1,0045James,imUrB00gieman,1,1.0
2,0071Holly,fake_biden,1,1.0
3,008moonside,DougCameron51,1,1.0
4,00Lilac_Sky00,WalterDavis4L,1,1.0
...,...,...,...,...
129466,zzz_das,FAsintomatico,1,1.0
129467,zzzdogman,McKaylaRoseJ,1,1.0
129468,zzzdogman,eveforamerica,1,1.0
129469,zzzdogman,rezazia,1,1.0


In [108]:
interactions = pd.merge(mention_counts, retweet_counts, left_on=['UserName', 'MentionedUser'], right_on=['UserName', 'retweet'], how='outer', suffixes=('_mention', '_rt'))
interactions = interactions.fillna(0)

In [109]:
interactions.to_csv('../data/interactions.csv')