In [None]:
### Network construction
# data file: preprocessed_data.pkl
# bipartite network user - tweet
# one-mode user network 
# one-mode tweet network

# other combinations:
# one-mode pro-russian / pro-ukrainian user network   
# one-mode user network before / after Twitter ban in Russia   

# export nodelist and edgelist for network visualisation

In [6]:
import pandas as pd
import networkx as nx
import csv
from networkx.algorithms import bipartite

In [43]:
data = pd.read_pickle("preprocessed_data.pkl")
data.head()

Unnamed: 0,userid,username,textid,location,language,tweetid,tweetcreatedts,retweetcount,hashtags,text,usercreatedts,following,followers,totaltweets,tags_splitted,tags_lower,pro_Russian,pro_Ukraine,Neutral,NodeType
0,1497277564200517635,GulambiLamver,0,गुलाबी_स्तान,en,1497433817086234624,2022-02-26 04:50:17,0,"[{'text': 'Ukraine', 'indices': [40, 48]}, {'t...",Coz barking is our fundamental right..\n\n#Ukr...,2022-02-25 18:29:39.000000,4,0,2,"[Ukraine, UkraineUnderAttack, RussiaUkraineWar...","[ukraine, ukraineunderattack, russiaukrainewar...",False,True,True,pro_Ukraine
1,4832941924,Mari_Berbec,1,"Berlin, Germany",en,1497476520742309891,2022-02-26 07:39:58,0,"[{'text': 'putin', 'indices': [53, 59]}, {'tex...",@charlieweissr Socialism has nothing to do wit...,2016-01-21 13:57:12.000000,921,224,2848,"[putin, trump]","[putin, trump]",False,False,True,Neutral
2,2314945207,Outspoken_Sam,2,"Texas, USA",en,1497066569263767555,2022-02-25 04:30:58,0,"[{'text': 'Ukraine', 'indices': [272, 280]}]",We're seeing repeated demonstrations of who Bi...,2014-01-28 05:46:59.000000,1162,642,3869,[Ukraine],[ukraine],False,False,True,Neutral
3,2811043316,joncreed2,3,Nottinghamshire,und,1497308839401635845,2022-02-25 20:33:40,16,"[{'text': 'Ukraine', 'indices': [20, 28]}, {'t...",#Ukraine #Nottingham #nottinghamtogether #Stan...,2014-09-15 10:10:32.000000,778,361,3960,"[Ukraine, Nottingham, nottinghamtogether, Stan...","[ukraine, nottingham, nottinghamtogether, stan...",False,True,True,pro_Ukraine
4,1239053406301335552,PChauhan_96,4,"Chamba, India",en,1497551063913619457,2022-02-26 12:36:11,276,"[{'text': 'Ukraine', 'indices': [34, 42]}]",That's a Leader ❣️ \n#Ukraine https://t.co/BpF...,2020-03-15 04:59:11.000000,25,6,374,[Ukraine],[ukraine],False,False,True,Neutral


In [44]:
# Get nodes for bipartite network
users = data["userid"]
tweets = data["textid"]

# Create edge list for bipartite network
columns = ["userid", "textid"]
edges = [(user, textid) for user, textid in data[columns].values]

# save edge list to file
with open('edges.csv','wb') as file:
    csv_out=csv.writer(out)
    csv_out.writerow(['name','num'])
    for row in data:
        csv_out.writerow(row)

NameError: name 'out' is not defined

In [45]:
print(users.nunique())
print(tweets.nunique())

130608
105158


In [46]:
# bipartite network construction
B = nx.Graph()
B.name = "Bipartite user-tweet network"
B.add_nodes_from(users, bipartite=0)
B.add_nodes_from(tweets, bipartite=1)
B.add_edges_from(edges)

In [47]:
nx.info(B)

"Graph named 'Bipartite user-tweet network' with 235745 nodes and 161429 edges"

In [48]:
users_nodes = {n for n, a in B.nodes(data=True) if a["bipartite"] == 0}
tweets_nodes = set(B) - users_nodes

B_users = bipartite.projected_graph(B, users_nodes)
B_users.name = "One-mode user network"
B_tweets = bipartite.projected_graph(B, tweets_nodes)
B_tweets.name = "One-mode tweet network"

print(nx.info(B_users))
print(nx.info(B_tweets))

Graph named 'One-mode user network' with 130619 nodes and 1436926 edges
Graph named 'One-mode tweet network' with 106650 nodes and 78517 edges


## Compare two timeframes

In [49]:
# Filter tweets by time period: before and after the tweeter ban 
time_before = data[(data['tweetcreatedts'] < '2022-03-04 24:59:59+00:00')]
time_after = data[(data['tweetcreatedts'] > '2022-03-04 24:59:59+00:00')]
print("rows of timeframe 1 dataset:",len(time_before))
print("rows of timeframe 2 dataset:",len(time_after))

rows of timeframe 1 dataset: 73333
rows of timeframe 2 dataset: 88295


### (1) timeframe1: before

In [51]:
# Get nodes for bipartite network
users_1 = time_before["userid"]
tweets_1 = time_before["textid"]

# Create edge list for bipartite network
columns = ["userid", "textid"]
edges_1 = time_before[columns]
edges_1.to_csv("edgelist_t1.csv",sep = ",", header = True, encoding = "UTF-8",index=False)
# same list-type edgelist for network construction
edges_1 = [(user, textid) for user, textid in time_before[columns].values]

print(users_1.nunique(),"unique users in timeframe 1")
print(tweets_1.nunique(),"unique tweets in timeframe 1")

66530 unique users in timeframe 1
49702 unique tweets in timeframe 1


In [52]:
# construct projected one-mode network for users
# export edgelist for visualisation
B1 = nx.Graph()
B1.name = "Bipartite user-tweet network time 1"
B1.add_nodes_from(users_1, bipartite=0)
B1.add_nodes_from(tweets_1, bipartite=1)
B1.add_edges_from(edges_1)

users_nodes = {n for n, a in B1.nodes(data=True) if a["bipartite"] == 0}
tweets_nodes = set(B1) - users_nodes

B1_users = bipartite.weighted_projected_graph(B1, users_nodes)
B1_users.name = "One-mode user network 1"
B1_tweets = bipartite.weighted_projected_graph(B1, tweets_nodes)
B1_tweets.name = "One-mode tweet network 1"

print(nx.info(B1_users))
print(nx.info(B1_tweets))

Graph named 'One-mode user network 1' with 66532 nodes and 403244 edges
Graph named 'One-mode tweet network 1' with 49760 nodes and 10645 edges


In [65]:
# write edgelist of projected network
nx.write_weighted_edgelist(B1_users, "projected_w_user_edgelist_1.csv", delimiter=',', encoding='utf-8')   
# format the edgelist file for Gephi
gephi1_edgelist = pd.read_csv("projected_w_user_edgelist_1.csv",header=None)
gephi1_edgelist.columns=['Source','Target','weight']
gephi1_edgelist.to_csv("projected_w_user_edgelist_1.csv",sep = ",", header = True, encoding = "UTF-8",index=False)


### (2) timeframe2: After

In [54]:
# Get nodes for bipartite network
users_2 = time_after["userid"]
tweets_2 = time_after["textid"]

# Create edge list for bipartite network
columns = ["userid", "textid"]
edges_2 = time_after[columns]
edges_2.to_csv("edgelist_t2.csv",sep = ",", header = True, encoding = "UTF-8",index=False)
# same list-type edgelist for network construction
edges_2 = [(user, textid) for user, textid in time_after[columns].values]

print(users_2.nunique(),"unique users in timeframe 2")
print(tweets_2.nunique(),"unique tweets in timeframe 2")

72022 unique users in timeframe 2
56463 unique tweets in timeframe 2


In [55]:
# construct projected one-mode network for users
# export edgelist for visualisation
B2 = nx.Graph()
B2.name = "Bipartite user-tweet network time 2"
B2.add_nodes_from(users_2, bipartite=0)
B2.add_nodes_from(tweets_2, bipartite=1)
B2.add_edges_from(edges_2)

users_nodes = {n for n, a in B2.nodes(data=True) if a["bipartite"] == 0}
tweets_nodes = set(B2) - users_nodes

B2_users = bipartite.weighted_projected_graph(B2, users_nodes)
B2_users.name = "One-mode user network 2"
B2_tweets = bipartite.weighted_projected_graph(B2, tweets_nodes)
B2_tweets.name = "One-mode tweet network 2"

print(nx.info(B2_users))
print(nx.info(B2_tweets))

Graph named 'One-mode user network 2' with 72025 nodes and 959639 edges
Graph named 'One-mode tweet network 2' with 57184 nodes and 40248 edges


In [66]:
# write edgelist of projected network
nx.write_weighted_edgelist(B2_users, "projected_w_user_edgelist_2.csv", delimiter=',', encoding='utf-8') 
# format the edgelist file for Gephi
gephi2_edgelist = pd.read_csv("projected_w_user_edgelist_2.csv",header=None)
gephi2_edgelist.columns=['Source','Target','weight']
gephi2_edgelist.to_csv("projected_w_user_edgelist_2.csv",sep = ",", header = True, encoding = "UTF-8",index=False)


### Export nodelist for two timeframes seperately

In [79]:
# create node info database from pre-processed and labelled dataframe
node_columns = ["userid","username","usercreatedts","following","followers","totaltweets","NodeType"]
nodeslist = data[node_columns]
nodeslist.head()

Unnamed: 0,userid,username,usercreatedts,following,followers,totaltweets,NodeType
0,1497277564200517635,GulambiLamver,2022-02-25 18:29:39.000000,4,0,2,pro_Ukraine
1,4832941924,Mari_Berbec,2016-01-21 13:57:12.000000,921,224,2848,Neutral
2,2314945207,Outspoken_Sam,2014-01-28 05:46:59.000000,1162,642,3869,Neutral
3,2811043316,joncreed2,2014-09-15 10:10:32.000000,778,361,3960,pro_Ukraine
4,1239053406301335552,PChauhan_96,2020-03-15 04:59:11.000000,25,6,374,Neutral


In [86]:
# mark the nodes appeared in the projected user network
nodelist1 = list(B1_users.nodes)
nodelist2 = list(B2_users.nodes)

# lambda functions marking political orientation if related hashtags were found 
node1_check = lambda node: int(node) in nodelist1 # return true if userid of the nodes in overlap list
node2_check = lambda node: int(node) in nodelist2

data['nodeInG1'] = data['userid'].apply(node1_check)
data['nodeInG2'] = data['userid'].apply(node2_check)

# format node ID for Gephi
data["ID"] = data["userid"].apply(lambda x: int(x))

In [87]:
data.head()

Unnamed: 0,userid,username,textid,location,language,tweetid,tweetcreatedts,retweetcount,hashtags,text,...,totaltweets,tags_splitted,tags_lower,pro_Russian,pro_Ukraine,Neutral,NodeType,nodeInG1,nodeInG2,ID
0,1497277564200517635,GulambiLamver,0,गुलाबी_स्तान,en,1497433817086234624,2022-02-26 04:50:17,0,"[{'text': 'Ukraine', 'indices': [40, 48]}, {'t...",Coz barking is our fundamental right..\n\n#Ukr...,...,2,"[Ukraine, UkraineUnderAttack, RussiaUkraineWar...","[ukraine, ukraineunderattack, russiaukrainewar...",False,True,True,pro_Ukraine,True,False,1497277564200517635
1,4832941924,Mari_Berbec,1,"Berlin, Germany",en,1497476520742309891,2022-02-26 07:39:58,0,"[{'text': 'putin', 'indices': [53, 59]}, {'tex...",@charlieweissr Socialism has nothing to do wit...,...,2848,"[putin, trump]","[putin, trump]",False,False,True,Neutral,True,True,4832941924
2,2314945207,Outspoken_Sam,2,"Texas, USA",en,1497066569263767555,2022-02-25 04:30:58,0,"[{'text': 'Ukraine', 'indices': [272, 280]}]",We're seeing repeated demonstrations of who Bi...,...,3869,[Ukraine],[ukraine],False,False,True,Neutral,True,False,2314945207
3,2811043316,joncreed2,3,Nottinghamshire,und,1497308839401635845,2022-02-25 20:33:40,16,"[{'text': 'Ukraine', 'indices': [20, 28]}, {'t...",#Ukraine #Nottingham #nottinghamtogether #Stan...,...,3960,"[Ukraine, Nottingham, nottinghamtogether, Stan...","[ukraine, nottingham, nottinghamtogether, stan...",False,True,True,pro_Ukraine,True,False,2811043316
4,1239053406301335552,PChauhan_96,4,"Chamba, India",en,1497551063913619457,2022-02-26 12:36:11,276,"[{'text': 'Ukraine', 'indices': [34, 42]}]",That's a Leader ❣️ \n#Ukraine https://t.co/BpF...,...,374,[Ukraine],[ukraine],False,False,True,Neutral,True,False,1239053406301335552


In [92]:
# export
nodelist1 = data[data['nodeInG1']]
nodelist2 = data[data['nodeInG2']]
print(nodelist1["ID"].nunique(),"unique users identified in gephi graph in timeframe 1")
print(nodelist2["ID"].nunique(),"users identified in gephi graph in timeframe 2")
nodelist1.to_csv("nodelist1_poli.csv",sep = ",", header = True, encoding = "UTF-8",index=False)
nodelist2.to_csv("nodelist2_poli.csv",sep = ",", header = True, encoding = "UTF-8",index=False)

66527 unique users identified in gephi graph in timeframe 1
72019 users identified in gephi graph in timeframe 2
