# Network Analysis

In [None]:
# https://programminghistorian.org/en/lessons/exploring-and-analyzing-network-data-with-python

In [1]:
import pandas as pd
from collections import defaultdict
import itertools

import networkx as nx
#import nxviz
#from nxviz.plots import MatrixPlot, ArcPlot, CircosPlot
import matplotlib.pyplot as plt


from community import community_louvain

from pyvis.network import Network
#import plotly.offline as py
#import plotly.graph_objects as go

In [2]:
def create_edgelist(entity_list, retweets):
    """
    Creates a weighted edgelist based on entity co-occurence.
    
        Parameters:
            entity_list (list): List of lists of entities occuring together.
        
        Returns:
            df_edgelist (dataframe): Pandas dataframe that shows number of co-occurances of different entities. 
    """
    
    print("\nCreating Edgelist...\n\n")
    
    cooccurence_dict = defaultdict(lambda: defaultdict(int))
    
    # Iterate over all entity co-occurences
    for cooccurence, retweet in zip(entity_list,retweets):
    
    # Check whether more than one entity is available
        if len(cooccurence) > 1:
    
        # Get all possible combinations of entities
            combinations = list(itertools.combinations(sorted(cooccurence), 2))
            
    
            # Increment count of entity co-occurences by retweet count + 1
            for combination in combinations:
                cooccurence_dict[combination]["Co_Occurence"] += 1
                cooccurence_dict[combination]["Retweet"] += (retweet+1)             
        
    # Convert dict to dataframe
    #df_edgelist = pd.Series(cooccurance_dict).reset_index()
    #df_edgelist.columns = ['Entity_1', 'Entity_2', 'Weight']
    
    df_edgelist = pd.DataFrame.from_dict(cooccurence_dict, orient="index")
    df_edgelist.reset_index(inplace=True)
    df_edgelist.rename({"level_0":"Entity_1","level_1":"Entity_2"},axis=1, inplace=True)
    df_edgelist = df_edgelist[df_edgelist["Entity_1"] != df_edgelist["Entity_2"]]
    
    # Return edgelist dataframe
    return df_edgelist

In [3]:
def get_entity_count(series):
    dict_count = defaultdict(int)
    
    for entities in series:
        for entity in entities:
            if len(entity) > 0:
                dict_count[entity] += 1
            
    return dict_count

In [4]:
def create_overall_graph(df,df_edgelist,entity = "hashtags"):
    print("\nCreating the Overall Graph...\n\n")

    G = nx.Graph()
    
    for entity_1,entity_2,weight in zip(df_edgelist["Entity_1"],
                                        df_edgelist["Entity_2"],
                                        df_edgelist["Weight"]):
        G.add_edge(entity_1,entity_2, weight = weight, width=weight)
    
    
    print(nx.info(G))
    print("\n----------------------------------------------\n\n")

    entity_count = get_entity_count(df[entity])
    degree_centrality = nx.degree_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G, weight = "weight")
    size = {k:v/max(degree_centrality.values())*30 for (k,v) in degree_centrality.items()}

    nx.set_node_attributes(G, entity_count, "entity_count" )
    nx.set_node_attributes(G, degree_centrality, "Degree Centrality")
    nx.set_node_attributes(G, eigenvector_centrality, "Eigenvector Centrality")
    nx.set_node_attributes(G, size, "size")
    
    df_node_attributes = pd.DataFrame.from_dict(dict(G.nodes(data=True)),
                                            orient="index").reset_index().rename({"index":"entity"}, axis = 1)
    df_node_attributes = df_node_attributes.sort_values(by="Degree Centrality", ascending=False).reset_index(drop=True)
    
    print(f"Most important {entity} in the network:")
    print(df_node_attributes[["entity","entity_count","Degree Centrality"]].head(15))
    print("\n----------------------------------------------\n\n")
    
    return G, df_node_attributes

In [5]:
def create_subgraph(df_node_attributes, G, node_count = 50, filename = "abc.html"):
    print("\nCreating Subgraph...\n\n")
    
    cut_off = df_node_attributes.loc[node_count,"Degree Centrality"]
    selected_nodes = [n for n,v in G.nodes(data=True) if v["Degree Centrality"]>cut_off]
    
    G_sub = G.subgraph(selected_nodes)
    
    partition = community_louvain.best_partition(G_sub, random_state=42)
    print(f"Identified {len(set(partition.values()))} clusters.")
    
    colors_partition = {k:v for (k,v) in colors.items() if k < len(set(partition.values()))}
    
    node_colors = {k:colors_partition[v] for (k,v) in partition.items()}
    
    nx.set_node_attributes(G_sub, partition, "Partiton")
    nx.set_node_attributes(G_sub, node_colors, "color")
    
    print("\nPlotting Graph...\n\n")
    n = Network(height = "1500px", width ="100%", font_color ="#616a6b ")
    #n.show_buttons()
    n.from_nx(G_sub)
    n.barnes_hut()
    n.show_buttons(filter_=["nodes","edges","physics"])
    n.show(filename)
    print("\nGraph succesfully plotted\n\n")

In [6]:
def all_in_one(df, entity, filename,node_count=50, weighting = "Retweet"):
    
    df_edgelist = create_edgelist(list(df[entity]), list(df["retweet_count_sum"]))
    df_edgelist["Weight"] = df_edgelist[weighting]/(max(df_edgelist[weighting])/25)
    df_edgelist = df_edgelist.sort_values(by="Weight", ascending=False)
    
    print(f"Most frequently co-occuring hashtags sorted by {weighting}:")
    print(df_edgelist[["Entity_1","Entity_2","Co_Occurence","Retweet"]].head(15))
    print("\n----------------------------------------------\n\n")
    
    G, df_node_attributes = create_overall_graph(df,
                                                 df_edgelist,
                                                 entity = entity)
    
    create_subgraph(df_node_attributes, G, node_count = node_count, filename = filename)

In [7]:
colors = {0:"#6495ED",
          1:"#9FE2BF",
          2:"#FFBF00",
          3:"#717d7e",
          4:"#cb4335",
          5:"#7d3c98",
          6:"#273746",
          7:"#196f3d "}

## Import the data

In [8]:
df_greece = pd.read_csv(r"C:\Users\jawo19ad\Dropbox (CBS)\Master thesis data\Event Dataframes\Clean\df_greece_clean.csv",
                        converters = {"hashtags": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "mentions": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "annotations": lambda x: x.strip("[]").replace("'","").split(", ")})
# Drop unnecessary index column
df_greece.drop("Unnamed: 0", axis=1, inplace=True)
df_greece.head()

Unnamed: 0,source,text,lang,id,created_at,author_id,retweet_count,reply_count,like_count,quote_count,...,migrant,immigrant,asylum_seeker,other,date,text_coherent,retweet_count_sum,count,text_alphanum,text_stm
0,Hootsuite Inc.,PA Ambassador in Bosnia &amp; Herzegovina says...,en,1227019550912372737,2020-02-11 00:00:32+00:00,81136269,1,0,1,0,...,False,False,False,False,2020-02-11,PA Ambassador in Bosnia & Herzegovina says lif...,1,1,pa ambassador in bosnia herzegovina says life...,ambassador bosnia herzegovina say life europe ...
1,u.fooo.ooo,[🔴 NEWS] Greece plans floating sea border wall...,en,1227019556167864321,2020-02-11 00:00:33+00:00,1052191553802854407,0,0,0,0,...,False,False,False,False,2020-02-11,[ NEWS] Greece plans floating sea border wall ...,0,1,news greece plans floating sea border wall to...,news greece plan floating border wall keep
2,Twitter Web Client,Latest Battle for Idlib Could Send Another Wav...,en,1227021374780313601,2020-02-11 00:07:47+00:00,18570470,0,0,1,1,...,False,False,False,False,2020-02-11,Latest Battle for Idlib Could Send Another Wav...,8,5,latest battle for idlib could send another wav...,latest battle idlib send another wave europe w...
3,Tweepsmap,UNHCR calls for decisive action to end alarmin...,en,1227021789525614594,2020-02-11 00:09:26+00:00,62632306,0,0,0,0,...,False,False,False,False,2020-02-11,UNHCR calls for decisive action to end alarmin...,9,3,unhcr calls for decisive action to end alarmin...,unhcr call decisive action alarming condition ...
4,Twitter for iPhone,"It is not your own feet sinking in the mud, bo...",en,1227022233484308481,2020-02-11 00:11:12+00:00,2729959018,9,1,29,1,...,False,False,False,False,2020-02-11,"It is not your own feet sinking in the mud, boy.",9,1,it is not your own feet sinking in the mud boy.,foot sinking


In [9]:
df_channel = pd.read_csv(r"C:\Users\jawo19ad\Dropbox (CBS)\Master thesis data\Event Dataframes\Clean\df_channel_clean.csv",
                         converters = {"hashtags": lambda x: x.strip("[]").replace("'","").split(", "),
                                       "mentions": lambda x: x.strip("[]").replace("'","").split(", "),
                                       "annotations": lambda x: x.strip("[]").replace("'","").split(", ")})
# Drop unnecessary index column
df_channel.drop("Unnamed: 0", axis=1, inplace=True)
#df_channel.head()

In [10]:
df_tigray = pd.read_csv(r"C:\Users\jawo19ad\Dropbox (CBS)\Master thesis data\Event Dataframes\Clean\df_tigray_clean.csv",
                        converters = {"hashtags": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "mentions": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "annotations": lambda x: x.strip("[]").replace("'","").split(", ")})
# Drop unnecessary index column
df_tigray.drop("Unnamed: 0", axis=1, inplace=True)
#df_tigray.head()

In [11]:
df_rohingya = pd.read_csv(r"C:\Users\jawo19ad\Dropbox (CBS)\Master thesis data\Event Dataframes\Clean\df_rohingya_clean.csv",
                        converters = {"hashtags": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "mentions": lambda x: x.strip("[]").replace("'","").split(", "),
                                      "annotations": lambda x: x.strip("[]").replace("'","").split(", ")})
# Drop unnecessary index column
df_rohingya.drop("Unnamed: 0", axis=1, inplace=True)
#df_rohingya.head()

## Create Networks

### Greece

In [52]:
test = get_entity_count(df_channel["annotations"])

In [53]:
dict(sorted(test.items(), key=lambda item: item[1], reverse=True))

{'uk': 56001,
 'france': 32722,
 'europe': 11823,
 'eu': 11785,
 'britain': 10704,
 'brexit': 5529,
 'farage': 4540,
 'england': 3991,
 'u.k.': 3805,
 'priti patel': 3574,
 'germany': 3436,
 'bbc': 3355,
 'calais': 2817,
 'dover': 2592,
 'nigel farage': 2152,
 'greece': 2124,
 'boris': 2082,
 'turkey': 1958,
 'spain': 1773,
 'us': 1713,
 'patel': 1696,
 'dublin': 1624,
 'italy': 1591,
 'syria': 1529,
 'kent': 1285,
 'tories': 1174,
 'bbc news': 1154,
 'london': 1029,
 'africa': 1006,
 'ben': 985,
 'nhs': 981,
 'libya': 936,
 'boris johnson': 910,
 'jerry': 905,
 'johnson': 900,
 'scotland': 897,
 'america': 800,
 'australia': 787,
 'iraq': 786,
 'nigel': 774,
 'tory': 771,
 'trump': 725,
 'usa': 713,
 'royal navy': 656,
 'labour': 606,
 'india': 551,
 'lebanon': 546,
 'canada': 527,
 'priti': 526,
 'twitter': 519,
 'ireland': 518,
 'isis': 515,
 'sweden': 491,
 'covid': 490,
 'poland': 488,
 'paris': 465,
 'china': 464,
 'god': 462,
 'middle east': 460,
 'iran': 429,
 'tommy robinson':

In [16]:
all_in_one(df_greece,"hashtags","nw_greece_hashtags.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
                Entity_1          Entity_2  Co_Occurence  Retweet
48                greece            turkey          3955    44678
575               greece          refugees          2645    19062
1373            refugees            turkey          2586    15474
102                syria            turkey          2345    16120
291                   eu            turkey          1820     9683
107             refugees             syria          1686     8280
218                   eu            greece          1543    12143
226                idlib             syria          1468    12428
37                europe            turkey          1401     9012
1496             erdogan            turkey          1312    15234
170               greece          migrants          1288     6837
155                   eu          refugees          1232     5203
202             migrants            turkey          1141 

In [17]:
all_in_one(df_greece,"mentions","nw_greece_mentions_retweet.html",weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
              Entity_1         Entity_2  Co_Occurence  Retweet
21961    eucopresident      vonderleyen           522     3471
1209     eu_commission      vonderleyen           470     1471
52728    eucopresident  primeministergr           341      726
125           refugees               un           321      863
21990     ep_president      vonderleyen           303      713
74193     ep_president    eucopresident           270      632
1180           skynews    stone_skynews           259      330
29609    eu_commission    eucopresident           249      632
1082   mindedmusically       the_leaver           210      227
969        jag11814459  mindedmusically           210      227
977        jag11814459       the_leaver           210      227
425         begentle50         dw2essex           209      226
872           dw2essex       the_leaver           209      226
437         begentle50  mindedmus

### Channel

In [18]:
all_in_one(df_channel,"hashtags","nw_channel_hashtags.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
      Entity_1         Entity_2  Co_Occurence  Retweet
1739  migrants               uk           266      746
20    migrants         refugees           215     1995
123     france               uk           201     1012
2885    france         migrants           194      512
1413  refugees  refugeeswelcome           168      503
1400  refugees               uk           167      574
2207    brexit         migrants           164      887
1884  migrants       pritipatel           159      907
387         eu         refugees           156      938
370         eu         migrants           153     1180
2235   bbcnews          skynews           147      487
15      europe         refugees           132     1500
1314     dover         migrants           131      662
1779    brexit               eu           128      425
1390        eu               uk           123      903

----------------------------------

In [19]:
all_in_one(df_channel,"mentions","nw_channel_mentions_retweet.html",weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
               Entity_1       Entity_2  Co_Occurence  Retweet
755        borisjohnson     pritipatel          5505    30845
3905       nigel_farage     pritipatel          3689    10659
111609   benandjerrysuk     pritipatel          2860     4009
4088       borisjohnson   nigel_farage          1706     6488
1230         pritipatel   ukhomeoffice          1639     6109
13670        davidlammy     pritipatel          1148     1860
746     10downingstreet   borisjohnson           778     3346
2541       borisjohnson  conservatives           741     3023
2724      conservatives     pritipatel           729     2922
750     10downingstreet     pritipatel           715     2815
2525            bbcnews        skynews           626     2046
4943       nigel_farage   ukhomeoffice           565     3465
6501              iromg      talkradio           442     1052
37665       britishalba     pritipatel         

### Tigray

In [20]:
all_in_one(df_tigray,"hashtags","nw_tigray_hashtags.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
                 Entity_1        Entity_2  Co_Occurence  Retweet
5                  tigray  tigraygenocide          3088    39241
41               ethiopia          tigray          1893    38665
7382  allowaccesstotigray  tigraygenocide          1280    15710
174               eritrea          tigray          1234    22856
426       bidentakeaction  tigraygenocide          1201    10745
114               eritrea        ethiopia          1178    23201
147        tigraycantwait  tigraygenocide          1143     8874
195    eritreaoutoftigray  tigraygenocide           802     3853
4         stopwarontigray  tigraygenocide           798     7297
3342       tigraygenocide      unscactnow           754    13740
334              ethiopia  tigraygenocide           729     9157
121              eritrean          tigray           681     8864
7381  allowaccesstotigray          tigray           640     8854
616  

In [21]:
all_in_one(df_tigray,"mentions","nw_tigray_mentions.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
              Entity_1       Entity_2  Co_Occurence  Retweet
19            refugees             un          3300    28452
20            refugees  unhumanrights          2551    30234
675            amnesty       refugees          2387    52415
684                hrw       refugees          2297    29978
674            amnesty            hrw          1747    31150
20505         refugees     secblinken          1745    22296
1142               hrw             un          1650    22598
12344            potus       refugees          1532     9756
55       eu_commission       refugees          1526    25934
3      antonioguterres       refugees          1511    12486
14228            potus             un          1497    26285
20496            potus     secblinken          1468    28074
21280       secblinken             un          1441    32955
257           joebiden       refugees          1388    19664


### Myanmar

In [22]:
all_in_one(df_rohingya,"hashtags","nw_rohingya_hashtags.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
                 Entity_1                 Entity_2  Co_Occurence  Retweet
12             bangladesh                 rohingya          1068    27946
13                myanmar                 rohingya           577    25261
14314           mar30coup  whatshappeninginmyanmar           574     9379
271       milkteaalliance  whatshappeninginmyanmar           572    21011
1031              myanmar  whatshappeninginmyanmar           558    47855
105              refugees                 rohingya           547    12009
40                  karen  whatshappeninginmyanmar           508    16630
6682            mar18coup  whatshappeninginmyanmar           502    16423
21016  noflyzoneinmyanmar  whatshappeninginmyanmar           500    11277
17790            apr4coup  whatshappeninginmyanmar           464    15053
95                refugee                 rohingya           390     8184
6771     internetshutdown  

In [23]:
all_in_one(df_rohingya,"mentions","nw_rohingya_mentions.html", weighting="Co_Occurence")


Creating Edgelist...


Most frequently co-occuring hashtags sorted by Co_Occurence:
          Entity_1         Entity_2  Co_Occurence  Retweet
73        refugees               un           615     1126
82        refugees    unhumanrights           562     2329
277   rapporteurun  schranerburgen1           532     1432
689     freya_cole          kenroth           531     1219
276   rapporteurun         refugees           521     4644
431     freya_cole     rapporteurun           508     1159
83        refugees           un_hrc           498      777
833   rapporteurun           un_hrc           426     1059
278   rapporteurun               un           414     1095
1066    freya_cole        tostevinm           408      953
690        kenroth     rapporteurun           395      754
281       refugees  schranerburgen1           384      719
1137       kenroth        tostevinm           356      924
794             un           un_hrc           352     1696
1067    freya_cole           u

In [123]:
df_rohingya.columns

Index(['source', 'text', 'lang', 'id', 'created_at', 'author_id',
       'retweet_count', 'reply_count', 'like_count', 'quote_count',
       'withheld.scope', 'hashtags', 'mentions', 'annotations', 'text_clean',
       'year', 'calendar_week', 'year_month', 'year_calendar_week', 'date',
       'refugee', 'migrant', 'immigrant', 'asylum_seeker', 'other',
       'text_coherent', 'retweet_count_sum', 'count', 'text_alphanum',
       'text_stm'],
      dtype='object')

## Create edgelist

In [162]:
df_edgelist_hash = create_edgelist(list(df_tigray["hashtags"]), list(df_tigray["retweet_count_sum"]))
df_edgelist_hash["Co_Occurence_Norm"] = df_edgelist_hash["Co_Occurence"]/(max(df_edgelist_hash["Co_Occurence"])/10)
df_edgelist_hash["Retweet_Norm"] = df_edgelist_hash["Retweet"]/(max(df_edgelist_hash["Retweet"])/10)
df_edgelist_hash


Creating Edgelist...




Unnamed: 0,Entity_1,Entity_2,Co_Occurence,Retweet,Co_Occurence_Norm,Retweet_Norm
0,stopabiy,stopwarontigray,34,99,0.110104,0.025229
1,stopabiy,tigray,20,48,0.064767,0.012232
2,stopabiy,tigraygenocide,38,88,0.123057,0.022426
3,stopwarontigray,tigray,447,4283,1.447539,1.091460
4,stopwarontigray,tigraygenocide,798,7297,2.584197,1.859535
...,...,...,...,...,...,...
37616,refugeecamps,somalia,1,2,0.003238,0.000510
37617,refugeecamps,southsudan,1,2,0.003238,0.000510
37618,canadians,cdnpoli,1,6,0.003238,0.001529
37619,canadians,tigray,1,6,0.003238,0.001529


In [163]:
#df_edgelist_hash = create_edgelist(list(df_greece["hashtags"])).sort_values(by="Weight",ascending=False).reset_index()
#df_edgelist_hash = df_edgelist_hash[df_edgelist_hash["Entity_1"] != df_edgelist_hash["Entity_2"]]

## Create graph

In [164]:
G_hashtag = nx.Graph()
for entity_1,entity_2,weight in zip(df_edgelist_hash["Entity_1"],
                                    df_edgelist_hash["Entity_2"],
                                    df_edgelist_hash["Co_Occurence_Norm"]):
    G_hashtag.add_edge(entity_1,entity_2, weight = weight, width=weight) #0.005

In [165]:
print(nx.info(G_hashtag))

Name: 
Type: Graph
Number of nodes: 5212
Number of edges: 37417
Average degree:  14.3580


## Provide node metadata

In [166]:
hashtag_count = get_entity_count(df_tigray["hashtags"])

# Degree Centrality: 
degree_centrality = nx.degree_centrality(G_hashtag)

eigenvector_centrality = nx.eigenvector_centrality(G_hashtag, weight = "weight")

#size = {k:v*100 for (k,v) in degree_centrality.items()}
size = {k:v/max(degree_centrality.values())*10 for (k,v) in degree_centrality.items()}

In [167]:
max(degree_centrality.values())

0.3273843791978507

In [168]:
nx.set_node_attributes(G_hashtag, hashtag_count, "hashtag_count" )
nx.set_node_attributes(G_hashtag, degree_centrality, "Degree Centrality")
nx.set_node_attributes(G_hashtag, eigenvector_centrality, "Eigenvector Centrality")
nx.set_node_attributes(G_hashtag, size, "size")

## Calculate network measures

In [169]:
# Density: Ratio of actual edges in the network to all possible edges
#density = nx.density(G_hashtag)
# Transitivity: Ration of all triangles over all possible triangles (triadic closure)
#trans = nx.transitivity(G_hashtag)
# Reciprocity:
#reci = nx.reciprocity(G_hashtag)

#print(f"Density: {density}\nTransitivity: {trans}\nReciprocity: {reci}")

In [170]:
# Diameter: Longest of all shortest paths in network
#nx.is_connected(G_hashtag)
# nx.diameter(G_hashtag)
# nx.average_shortest_path_length(G)

#components = nx.connected_components(G_hashtag)
#largest_component = max(components, key=len)
#subgraph = G_hashtag.subgraph(largest_component)
#diameter = nx.diameter(subgraph)
#print("Network diameter of largest component:", diameter)

## Create df with node attributes

In [171]:
df_node_attributes = pd.DataFrame.from_dict(dict(G_hashtag.nodes(data=True)),
                                            orient="index").reset_index().rename({"index":"hashtag"}, axis = 1)
df_node_attributes = df_node_attributes.sort_values(by="Degree Centrality", ascending=False).reset_index(drop=True)
df_node_attributes.head(15)

Unnamed: 0,hashtag,hashtag_count,Degree Centrality,Eigenvector Centrality,size
0,ethiopia,4668,0.327384,0.333967,10.0
1,tigray,8474,0.308962,0.543133,9.43728
2,tigraygenocide,9415,0.230474,0.518959,7.039859
3,refugees,940,0.191518,0.090212,5.849941
4,sudan,1491,0.181155,0.106225,5.533411
5,eritrea,2751,0.176358,0.250274,5.38687
6,tplf,2695,0.147189,0.130918,4.495897
7,eritrean,1645,0.130877,0.130827,3.997655
8,un,492,0.091729,0.05105,2.801876
9,ethiopian,682,0.090961,0.053681,2.778429


## Create subgraph that only consists of top 50 nodes with highest degree centrality

In [172]:
cut_off = df_node_attributes.loc[50,"Degree Centrality"]
selected_nodes = [n for n,v in G_hashtag.nodes(data=True) if v["Degree Centrality"]>cut_off]

In [173]:
G_hashtag_sub = G_hashtag.subgraph(selected_nodes)

## Identify clusters

In [174]:
partition = community_louvain.best_partition(G_hashtag_sub, random_state=42)
print(f"Identified {len(set(partition.values()))} clusters.")

Identified 2 clusters.


In [175]:
for i in range (len(set(partition.values()))):
    print([k for k,v in partition.items() if v == i])
    print("------")

['africa', 'somalia', 'tigrayans', 'eu', 'tplf', 'yemen', 'ethiopiaprevails', 'syria', 'tplfcrimes', 'tplfisaterroristgroup', 'maikadramassacre', 'us', 'ethiopia', 'covid19', 'eritrean', 'southsudan', 'unityforethiopia', 'biden', 'ethiopian', 'migrants', 'usa', 'humanitarian', 'libya', 'canada', 'genocide', 'sudan', 'amhara', 'refugees', 'maikadra', 'europe', 'tplfisthecause', 'un', 'unhcr', 'immigrants', 'refugee', 'humanrights', 'eritrea']
------
['tigraycantwait', 'bidenactnow', 'unscactnow', 'tigray', 'humanitariancorridor', 'eritreaoutoftigray', 'unsc', 'tigraygenocide', 'abiyahmedali', 'eritreantroopsoutoftigray', 'stopwarontigray', 'bidentakeaction', 'allowaccesstotigray']
------


In [176]:
partition

{'africa': 0,
 'somalia': 0,
 'tigraycantwait': 1,
 'tigrayans': 0,
 'eu': 0,
 'bidenactnow': 1,
 'tplf': 0,
 'yemen': 0,
 'ethiopiaprevails': 0,
 'syria': 0,
 'tplfcrimes': 0,
 'unscactnow': 1,
 'tplfisaterroristgroup': 0,
 'maikadramassacre': 0,
 'tigray': 1,
 'us': 0,
 'ethiopia': 0,
 'covid19': 0,
 'eritrean': 0,
 'southsudan': 0,
 'unityforethiopia': 0,
 'biden': 0,
 'ethiopian': 0,
 'humanitariancorridor': 1,
 'eritreaoutoftigray': 1,
 'migrants': 0,
 'usa': 0,
 'unsc': 1,
 'tigraygenocide': 1,
 'humanitarian': 0,
 'abiyahmedali': 1,
 'libya': 0,
 'canada': 0,
 'genocide': 0,
 'eritreantroopsoutoftigray': 1,
 'sudan': 0,
 'amhara': 0,
 'stopwarontigray': 1,
 'refugees': 0,
 'maikadra': 0,
 'bidentakeaction': 1,
 'europe': 0,
 'tplfisthecause': 0,
 'un': 0,
 'unhcr': 0,
 'immigrants': 0,
 'refugee': 0,
 'allowaccesstotigray': 1,
 'humanrights': 0,
 'eritrea': 0}

In [177]:
colors_partition = {k:v for (k,v) in colors.items() if k < len(set(partition.values()))}

In [178]:
node_colors = {k:colors_partition[v] for (k,v) in partition.items()}

In [179]:
nx.set_node_attributes(G_hashtag_sub, partition, "Partition")
nx.set_node_attributes(G_hashtag_sub, node_colors, "color")

In [180]:
df_node_attributes1 = pd.DataFrame.from_dict(dict(G_hashtag_sub.nodes(data=True)),
                                            orient="index").reset_index().rename({"index":"hashtag"}, axis = 1)
df_node_attributes1 = df_node_attributes1.sort_values(by=["Partition","Degree Centrality"], ascending=[True,False]).reset_index(drop=True)
df_node_attributes1

Unnamed: 0,hashtag,hashtag_count,Degree Centrality,Eigenvector Centrality,size,Partition,color
0,ethiopia,4668,0.327384,0.333967,10.0,0,#6495ED
1,refugees,940,0.191518,0.090212,5.849941,0,#6495ED
2,sudan,1491,0.181155,0.106225,5.533411,0,#6495ED
3,eritrea,2751,0.176358,0.250274,5.38687,0,#6495ED
4,tplf,2695,0.147189,0.130918,4.495897,0,#6495ED
5,eritrean,1645,0.130877,0.130827,3.997655,0,#6495ED
6,un,492,0.091729,0.05105,2.801876,0,#6495ED
7,ethiopian,682,0.090961,0.053681,2.778429,0,#6495ED
8,refugee,277,0.076761,0.02209,2.344666,0,#6495ED
9,africa,139,0.076377,0.011029,2.332943,0,#6495ED


## Draw network graph

In [None]:
G_hashtag_sub.nodes(data=True)

In [None]:
n = Network(height = "1500px", width ="100%", font_color ="#616a6b ")
#n.show_buttons()
n.from_nx(G_hashtag_sub)
n.barnes_hut()
n.show_buttons(filter_=["nodes","edges","physics"])
n.show("network_test.html")