# Network analysis based on topic extraction
In this part we are performing a network analysis.

## Imports

In [1]:
#basic packages
import numpy as np
import pandas as pd 
import datetime
import math
import ast #for transforming dataframes
from time import time
from tqdm import tqdm

#Network analysis 
import seaborn as sns
from networkx.algorithms import bipartite
from networkx.drawing.layout import bipartite_layout
import networkx as nx
from community import community_louvain

#data visualizations
import plotly.express as px
import matplotlib.pyplot as plt

# Adittional dataviz
import nxviz as nv
import altair as alt

# Visualization of NA
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from bokeh.plotting import show
kwargs = dict(width=800, height=800, xaxis=None, yaxis=None)
opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs))
from holoviews.operation.datashader import datashade, bundle_graph

from sklearn.metrics.pairwise import cosine_distances

sns.set(color_codes=False, rc={'figure.figsize':(10,8)})

nxviz has a new API! Version 0.7.4 onwards, the old class-based API is being
deprecated in favour of a new API focused on advancing a grammar of network
graphics. If your plotting code depends on the old API, please consider
pinning nxviz at version 0.7.4, as the new API will break your old code.

To check out the new API, please head over to the docs at
https://ericmjl.github.io/nxviz/ to learn more. We hope you enjoy using it!

(This deprecation message will go away in version 1.0.)



## Network Analysis
We are going to explore co-authorship network in authors that wrote about AI. 
Our goal is to get the eigenvector centrality to establish our well connected author in order to set up the recommender system. 

In [2]:
#Read the docs that had the topic modelling done with the BERTopic
docs = pd.read_csv('/Users/yasminesarraj/Documents/GitHub/M3-Assignment-Deep-Learning/Assignment_4/data/scopus.csv')

#check out the other preprocess from NA in MarAI

In [3]:
docs

Unnamed: 0,Authors,Author(s) ID,Title,Year,Source title,Volume,Issue,Art. No.,Page start,Page end,...,ISBN,CODEN,PubMed ID,Language of Original Document,Abbreviated Source Title,Document Type,Publication Stage,Open Access,Source,EID
0,"Taplin R., Nowak A.Z.",57988586500;57211105275;,"Climate change, pandemics and artificial intel...",2023,"Artificial Intelligence, Intellectual Property...",,,,92,108,...,9781000824193; 9781032418872,,,English,"Artif. Intell., Intellect. Prop., Cyber Risk a...",Book Chapter,Final,,Scopus,2-s2.0-85143113025
1,Taplin R.,57988586500;,"Artificial intelligence, intellectual property...",2023,"Artificial Intelligence, Intellectual Property...",,,,1,13,...,9781000824193; 9781032418872,,,English,"Artif. Intell., Intellect. Prop., Cyber Risk a...",Book Chapter,Final,,Scopus,2-s2.0-85143111643
2,Taplin R.,57988586500;,"Artificial intelligence, intellectual property...",2023,"Artificial Intelligence, Intellectual Property...",,,,1,156,...,9781000824193; 9781032418872,,,English,"Artif. Intell., Intellect. Prop., Cyber Risk a...",Book,Final,,Scopus,2-s2.0-85143106152
3,Bartenev V.,57988658100;,The physical concept of information and artifi...,2023,"Artificial Intelligence, Intellectual Property...",,,,41,63,...,9781000824193; 9781032418872,,,English,"Artif. Intell., Intellect. Prop., Cyber Risk a...",Book Chapter,Final,,Scopus,2-s2.0-85143104459
4,Friedman K.,57988586900;,Artificial intelligence: A looming economic an...,2023,"Artificial Intelligence, Intellectual Property...",,,,109,124,...,9781000824193; 9781032418872,,,English,"Artif. Intell., Intellect. Prop., Cyber Risk a...",Book Chapter,Final,,Scopus,2-s2.0-85143098366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,"Alzahrani N., Al-Baity H.H.",57523515700;55481129500;,Object Recognition System for the Visually Imp...,2023,Electronics (Switzerland),12,3,541,,,...,,,,English,Electronics (Switzerland),Article,Final,"All Open Access, Gold",Scopus,2-s2.0-85147814486
1996,"Tian Y., Zhang Y., Zhang H.",9272686000;57214808837;55685619100;,Recent Advances in Stochastic Gradient Descent...,2023,Mathematics,11,3,682,,,...,,,,English,Mathematics,Review,Final,"All Open Access, Gold, Green",Scopus,2-s2.0-85147814051
1997,"Sharma K., Doriya R., Shastri S., Aljrees T., ...",57207403333;54917442000;58098500200;5721536020...,Development of Cloud Autonomous System for Enh...,2023,Electronics (Switzerland),12,3,683,,,...,,,,English,Electronics (Switzerland),Article,Final,"All Open Access, Gold, Green",Scopus,2-s2.0-85147812633
1998,"Elia S., Pompeo E., Santone A., Rigoli R., Chi...",7003353842;7003385498;6603700255;58098472700;2...,Radiomics and Artificial Intelligence Can Pred...,2023,Diagnostics,13,3,384,,,...,,,,English,Diagn.,Article,Final,"All Open Access, Gold, Green",Scopus,2-s2.0-85147811866


In [4]:
docs.columns

Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume',
       'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by',
       'DOI', 'Link', 'Affiliations', 'Authors with affiliations', 'Abstract',
       'Author Keywords', 'Index Keywords', 'Correspondence Address',
       'Editors', 'Publisher', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID',
       'Language of Original Document', 'Abbreviated Source Title',
       'Document Type', 'Publication Stage', 'Open Access', 'Source', 'EID'],
      dtype='object')

In [5]:
docs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 33 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Authors                        2000 non-null   object 
 1   Author(s) ID                   2000 non-null   object 
 2   Title                          2000 non-null   object 
 3   Year                           2000 non-null   int64  
 4   Source title                   2000 non-null   object 
 5   Volume                         1990 non-null   object 
 6   Issue                          1223 non-null   object 
 7   Art. No.                       1336 non-null   object 
 8   Page start                     648 non-null    object 
 9   Page end                       596 non-null    object 
 10  Page count                     1 non-null      float64
 11  Cited by                       231 non-null    float64
 12  DOI                            1998 non-null   o

In [6]:
def clean_up(data):
    """
    Compact function to clean up the data

    """
    for i in tqdm(range(len(docs)), desc = 'Clean up'):
        df = data.copy()
        
        #Delete missings
        df_clean = df.dropna(subset=['Author Keywords'])
                
        #Format the dtypes of year column + extract the year
        df_clean = df_clean.rename(columns={"Year": "date"}) #rename year column
        df_clean["date"] = pd.to_datetime(df_clean["date"]) #format to datetime
    return df_clean

In [7]:
#Execute function
data_clean = clean_up(docs)

data_clean.head()

Clean up: 100%|██████████| 2000/2000 [00:03<00:00, 650.44it/s]


Unnamed: 0,Authors,Author(s) ID,Title,date,Source title,Volume,Issue,Art. No.,Page start,Page end,...,ISBN,CODEN,PubMed ID,Language of Original Document,Abbreviated Source Title,Document Type,Publication Stage,Open Access,Source,EID
6,"Krishnan P., Jain K., Aldweesh A., Prabu P., B...",57194605863;57215744374;57200513576;5720079815...,OpenStackDP: a scalable network security frame...,1970-01-01 00:00:00.000002023,Journal of Cloud Computing,12,1,26,,,...,,,,English,J. Cloud Comput.,Article,Final,"All Open Access, Gold",Scopus,2-s2.0-85149016620
10,"Cai J., Xu Z., Sun X., Guo X., Fu X.",57715240900;58115802100;57211301540;5811503830...,Validity and reliability of the Chinese versio...,1970-01-01 00:00:00.000002023,Psicologia: Reflexao e Critica,36,1,5,,,...,,,,English,Psicol. Reflexao Crit.,Article,Final,"All Open Access, Gold, Green",Scopus,2-s2.0-85148711886
11,"Tlili A., Shehata B., Adarkwah M.A., Bozkurt A...",57188567626;57782639700;57219025710;5656618160...,What if the devil is my guardian angel: ChatGP...,1970-01-01 00:00:00.000002023,Smart Learning Environments,10,1,15,,,...,,,,English,Smart Learn. Environ.,Article,Final,"All Open Access, Gold",Scopus,2-s2.0-85148704172
12,"Cheikh Youssef S., Haram K., Noël J., Patel V....",57891446400;58113891400;57212704902;8564080000...,Evolution of the digital operating room: the p...,1970-01-01 00:00:00.000002023,Langenbeck's Archives of Surgery,408,1,95,,,...,,LASUF,36807211.0,English,Langenbeck's Arch. Surg.,Review,Final,,Scopus,2-s2.0-85148679306
19,"Wang J., Dou J., Han J., Li G., Tao J.",57192107513;58108655300;58108630800;5595179220...,A population-based study to assess two convolu...,1970-01-01 00:00:00.000002023,BMC Oral Health,23,1,109,,,...,,,36803132.0,English,BMC Oral Health,Article,Final,"All Open Access, Gold",Scopus,2-s2.0-85148394539


We start by creating different edges that we will use as both edges and or as nodes attributes
- Author_paper: each author and the paper that they have wrote this will be the base of our bipartite graph 

It's important to note that we are using ID's instead of names as they are more Institution ID's, this might be due to the face that some institution have the same name.

In [8]:
author_paper= []

for i in data_clean.iterrows(): #iterate through each row
  targets_x = [i.strip() for i in i[1]['Author(s) ID'].split(';')]
  edges_x = [(i[1]['Title'], j) for j in targets_x] #connect to the paper
  
    #append each of the found edges to the list
  author_paper.extend(edges_x)

In [15]:
paper_abstract= []

for i in data_clean.iterrows(): #iterate through each row
  targets_y = [i.strip() for i in i[1]['Title']]
  edges_y = [(i[1]['Abstract'], j) for j in targets_x] #connect to the paper
  
    #append each of the found edges to the list
  paper_abstract.extend(edges_y)

We create a dictionnary from the latest info of each author that will be the nodes attributes. 

In [12]:
#Here we make a dictionnary that will be used to feed the node attributes 
topic_author = dict(list(zip(data_clean.author_id, data_clean.dropout_binary_update)))

### Creating the graph 
In our case of co-authorship we will create a bipartite weighted graph. The collaboration weighted projection is the projection of the bipartite network B onto the specified nodes with weights assigned using Newman’s collaboration model. 

In [9]:
#make a df with only authors and get the unique values of the authors and title that will be our nodes
df5 = pd.DataFrame(author_paper)
author_node = df5[1].drop_duplicates()
title_node = df5[0].drop_duplicates()

In [21]:
df3 = pd.DataFrame(author_country)
author_node = df3[1].drop_duplicates()
title_node = df3[0].drop_duplicates()

In [16]:
#make a df with only authors and get the unique values of the authors and title that will be our nodes
df4 = pd.DataFrame(author_institution_name)
author_node = df4[1].drop_duplicates()
institution_node = df4[0].drop_duplicates()

In [10]:
#Creating the Bipartitie graph
#connecting authors to papers
B = nx.path_graph(author_node)
B.add_edges_from(author_paper)
B.add_nodes_from(title_node, bipartite='title')
B.add_nodes_from(author_node, bipartite='author')
G = bipartite.collaboration_weighted_projected_graph(B, B.nodes)

In [11]:
G = bipartite.collaboration_weighted_projected_graph(B, B.nodes)

### Nodes attributes
There is a total of 6 nodes attributes
- Eigenvector centrality: an algorithm that measures the transitive influence of nodes
- Community louvain partition: Yields partitions for each level of the Louvain Community Detection Algorithm
- Author share: Calculated previously
- Institution Name
- Dropout year: Calculated previously
- Type author: Made previously 

In [12]:
# Here we can calculate different centrality indicators as well as partition (community detection)
centrality_eig = nx.eigenvector_centrality_numpy(G, weight = 'weight') #WE MAINLY FOCUS ON EIGENVECTOR 
partition = community_louvain.best_partition(G)

In [13]:
#We set the node attributes to the centralities, partition, institution and type of author. 
#This is gonna help reading the graph easily 
#We set it as colors for the graph
nx.set_node_attributes(G, author_share, 'share')
nx.set_node_attributes(G, centrality_eig, 'eig')
nx.set_node_attributes(G, partition, 'partition')
nx.set_node_attributes(G, institution, 'institution')
nx.set_node_attributes(G, dropout_year, 'dropout year')
nx.set_node_attributes(G, type_author, 'switcher')
nx.set_node_attributes(G, country_author, 'country')
nx.set_node_attributes(G, year, 'year')




In [14]:
nx.set_node_attributes(G, author_share_all, 'share')
nx.set_node_attributes(G, centrality_eig, 'eig')
nx.set_node_attributes(G, partition, 'partition')
nx.set_node_attributes(G, institution_all, 'institution')
nx.set_node_attributes(G, dropout_year_all, 'dropout year')
nx.set_node_attributes(G, type_author_all, 'switcher')
nx.set_node_attributes(G, country_author_all, 'country')
nx.set_node_attributes(G, year_all, 'year')

In [15]:
#This is how you turn a Graph object (NetworkX) to a Dataframe
nodes_df = pd.DataFrame.from_dict(
    dict(G.nodes(data=True)), orient='index', columns=
['bipartite','share','eig','partition','institution','switcher','year','country','dropout year']
)

In [16]:
#This is how you turn a Graph object (NetworkX) to a Dataframe
nodes_df_2 = pd.DataFrame.from_dict(
    dict(G.nodes(data=True)), orient='index', columns=
['bipartite','share','eig','partition','institution','switcher','year','country','dropout year']
)

In [17]:
#Here we set the aurhor set of nodes as author
author = (nodes_df['bipartite'] == 'author')
nodes_df[author]

# How many communities are there (identified automatically)
nodes_df[author].partition.nunique()

169

In [18]:
# selecting rows based on condition
df_2018 = nodes_df[author].loc[nodes_df['year'] == 2018]
df_2017 = nodes_df[author].loc[nodes_df['year'] == 2017]
df_2019 = nodes_df[author].loc[nodes_df['year'] == 2019]
df_2020 = nodes_df[author].loc[nodes_df['year'] == 2020]



In [19]:
# selecting rows based on condition
df_2017 = nodes_df_2[author].loc[nodes_df_2['year'] == 2017]
df_2018 = nodes_df_2[author].loc[nodes_df_2['year'] == 2018]
df_2019 = nodes_df_2[author].loc[nodes_df_2['year'] == 2019]
df_2020 = nodes_df_2[author].loc[nodes_df_2['year'] == 2020]

### Subsetting and observing the results
In order to get a readable graph we will subset the nodes to the top writers. We focus first on authors that have switched and sort them with the highest amount of shares. Firstly we create some tables to see the different between the two sector and finally we'll comment on the NA graph

In [20]:
#We start first by subsettting with author that have switched 
nodes_df_switcher = nodes_df[author][nodes_df[author].switcher.isin([1])]

In [21]:
# For that we can e.g. sort the dataframe by eigenvector (only first 10 rows)
# We see here that they are all within the subset of researcher that left academia
nodes_df_switcher[author].sort_values('eig', ascending=False)[:10]

  nodes_df_switcher[author].sort_values('eig', ascending=False)[:10]


Unnamed: 0,bipartite,share,eig,partition,institution,switcher,year,country,dropout year
A2656544774,author,0.0,0.000224,20,University of Minnesota,1.0,2014.0,US,2015.0
A2790042352,author,2.53125,0.000157,86,Apple,1.0,2019.0,IL,2019.0
A2342647531,author,0.0,0.000106,47,Columbia University,1.0,2012.0,US,2015.0
A2150708589,author,1.752778,4.8e-05,150,Korea University,1.0,2017.0,KR,2013.0
A2130435799,author,3.090909,4.6e-05,150,University of North Carolina at Chapel Hill,1.0,2016.0,US,2020.0
A2067332573,author,0.0,1e-05,30,Northwestern University,1.0,2008.0,PH,2009.0
A2309502574,author,0.0,1e-05,0,Massachusetts Institute of Technology,1.0,2016.0,US,2018.0
A2031945151,author,1.005405,7e-06,4,Apple,1.0,2017.0,IL,2017.0
A2112462370,author,1.380687,7e-06,26,"University of California, Merced",1.0,2019.0,US,2018.0
A2112716162,author,1.443431,7e-06,0,Google,1.0,2019.0,US,2008.0


In [22]:
# For that we can e.g. sort the dataframe by avg_shr_cit (only first 10 rows)
# We see here that they are all within the subset of researcher that stayed in academia but some appearing from private
nodes_df_switcher[author].sort_values('share', ascending=False)[:10]

  nodes_df_switcher[author].sort_values('share', ascending=False)[:10]


Unnamed: 0,bipartite,share,eig,partition,institution,switcher,year,country,dropout year
A2140581490,author,47.0,3.238797e-11,44,"National University of Ireland, Galway",1.0,2018.0,IE,2020.0
A2618037599,author,42.0,4.995408e-09,0,Facebook,1.0,2017.0,IL,2017.0
A2589938230,author,41.0,1.922605e-08,0,Carnegie Mellon University,1.0,2015.0,US,2015.0
A2422898687,author,39.0,3.212452e-10,20,IBM,1.0,2018.0,US,2018.0
A1967404238,author,31.5,2.654498e-09,4,Stanford University,1.0,2016.0,US,2018.0
A2552119883,author,26.0,4.274833e-10,4,Google,1.0,2018.0,US,2018.0
A2298435663,author,25.0,8.332202e-11,21,Mission College,1.0,2020.0,US,2020.0
A2281662583,author,25.0,9.561671e-10,4,Carnegie Mellon University,1.0,2017.0,US,2019.0
A2952465156,author,21.8,2.305359e-10,5,DeepMind,1.0,2017.0,GB,2017.0
A2116967331,author,21.666667,4.36992e-12,59,University of Cambridge,1.0,2016.0,GB,2017.0


We see here that there is two ways we could subset the switcher by their share of citation or the eigenvector centrality. It's interesting to note that some author that have switched look like they still work within academia. This is due to the fact that we have not considered author that went back to the public sector. We are sure this is the case as we have selecting the nodes attributes on the latest work they have provided. 

### Partitions

We'll observe the partitions in order to see how they are divided and which companies they have within them. 
We'll start by looking at most promienent partitions when summing the collective avg_cit_sh and the sum of authors that they contains. 

In [23]:
nodes_df_switcher[author].groupby(['partition'])['share'].sum().nlargest(10).reset_index()

  nodes_df_switcher[author].groupby(['partition'])['share'].sum().nlargest(10).reset_index()


Unnamed: 0,partition,share
0,4,304.547205
1,0,303.381226
2,5,81.541193
3,26,63.950585
4,21,59.798512
5,20,55.118991
6,44,47.975439
7,68,25.64979
8,59,24.333333
9,67,22.623528


In [24]:
nodes_df_switcher[author].groupby(['partition'])['switcher'].sum().nlargest(10).reset_index()

  nodes_df_switcher[author].groupby(['partition'])['switcher'].sum().nlargest(10).reset_index()


Unnamed: 0,partition,switcher
0,0,123.0
1,4,116.0
2,5,34.0
3,26,32.0
4,67,21.0
5,20,18.0
6,21,16.0
7,10,14.0
8,18,11.0
9,38,10.0


We observe that partitions 0, 3, 1, 15 appear in the top 3 of both highest share of authors and highest share of average of citations. We'll extract the institutions names to see which companies will fall within these partitions. 

In [25]:
nodes_df_switcher.groupby(['partition', 'institution']).sum().nlargest(10,'share')

  nodes_df_switcher.groupby(['partition', 'institution']).sum().nlargest(10,'share')


Unnamed: 0_level_0,Unnamed: 1_level_0,share,eig,switcher,year,dropout year
partition,institution,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4,Google,91.542344,7.70692e-08,23.0,46381.0,46358.0
0,Facebook,63.096866,2.957468e-07,11.0,22200.0,22190.0
44,"National University of Ireland, Galway",47.0,3.238797e-11,1.0,2018.0,2020.0
0,Carnegie Mellon University,44.36,2.28268e-06,6.0,12088.0,12107.0
5,DeepMind,40.704255,6.576408e-08,9.0,18173.0,18163.0
20,IBM,39.0,3.212452e-10,1.0,2018.0,2018.0
0,Google,37.65369,1.009676e-05,26.0,52414.0,52382.0
4,Stanford University,34.504749,1.564481e-07,9.0,18107.0,18160.0
4,Carnegie Mellon University,26.154472,2.470728e-07,3.0,6038.0,6054.0
21,Mission College,25.0,8.332202e-11,1.0,2020.0,2020.0


In [26]:
nodes_df_switcher.groupby(['institution','partition']).sum().nlargest(10,'switcher')

  nodes_df_switcher.groupby(['institution','partition']).sum().nlargest(10,'switcher')


Unnamed: 0_level_0,Unnamed: 1_level_0,share,eig,switcher,year,dropout year
institution,partition,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Google,0,37.65369,1.009676e-05,26.0,52414.0,52382.0
Google,4,91.542344,7.70692e-08,23.0,46381.0,46358.0
Facebook,0,63.096866,2.957468e-07,11.0,22200.0,22190.0
Microsoft,4,18.9509,2.599445e-07,10.0,20179.0,20172.0
DeepMind,5,40.704255,6.576408e-08,9.0,18173.0,18163.0
Stanford University,4,34.504749,1.564481e-07,9.0,18107.0,18160.0
Adobe Systems,0,11.485672,3.933961e-07,7.0,14120.0,14114.0
Google,5,18.419283,2.7711e-08,7.0,14124.0,14119.0
Carnegie Mellon University,0,44.36,2.28268e-06,6.0,12088.0,12107.0
University of Montreal,4,18.638298,3.578967e-08,5.0,10062.0,10078.0


Within the companies we observe similar results to the EDA analysis where most prominent institutions are big tech companies such as Google, Facebook, DeepMind and Microsoft. Our only 'outlier' would be Standford university this is due to our definition of the dropout variable. 

In [27]:
top_central_2017 = df_2017[author][df_2017[author].eig > df_2017[author].eig.quantile(0.8)].index
top_central_2018 = df_2018[author][df_2018[author].eig > df_2018[author].eig.quantile(0.8)].index
top_central_2019 = df_2019[author][df_2019[author].eig > df_2019[author].eig.quantile(0.8)].index
top_central_2020 = df_2020[author][df_2020[author].eig > df_2020[author].eig.quantile(0.8)].index


  top_central_2017 = df_2017[author][df_2017[author].eig > df_2017[author].eig.quantile(0.8)].index
  top_central_2018 = df_2018[author][df_2018[author].eig > df_2018[author].eig.quantile(0.8)].index
  top_central_2019 = df_2019[author][df_2019[author].eig > df_2019[author].eig.quantile(0.8)].index
  top_central_2020 = df_2020[author][df_2020[author].eig > df_2020[author].eig.quantile(0.8)].index


In [28]:
#keeping only top nodes (extreme subsetting)
top_central_nodes_eig = nodes_df_switcher[author][nodes_df_switcher[author].eig > nodes_df_switcher[author].eig.quantile(0.8)].index

  top_central_nodes_eig = nodes_df_switcher[author][nodes_df_switcher[author].eig > nodes_df_switcher[author].eig.quantile(0.8)].index


In [29]:
#keeping only top nodes (extreme subsetting)
top_central_nodes_share = nodes_df_switcher[author][nodes_df_switcher[author].share > nodes_df_switcher[author].share.quantile(0.6)].index

  top_central_nodes_share = nodes_df_switcher[author][nodes_df_switcher[author].share > nodes_df_switcher[author].share.quantile(0.6)].index


In [30]:
colors = ['#000000']+hv.Cycle('Category20').values

# Create subset graph
g_sub_2017 = nx.subgraph(G, top_central_2017)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_2017) 
g_plot_2017 = hv.Graph.from_networkx(g_sub_2017, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_2017.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='switcher')
bundled_2017 = bundle_graph(g_plot_2017)
# show the plot
show(hv.render(bundled_2017, text_color='black', bgcolor='white'))



In [31]:
hv.save(bundled_2017,"check.png", fmt='png')

In [32]:
# Create subset graph
g_sub_2018 = nx.subgraph(G, top_central_2018)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_2018) 
g_plot_2018 = hv.Graph.from_networkx(g_sub_2018, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_2018.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='switcher')
bundled_2018 = bundle_graph(g_plot_2018)
# show the plot
show(hv.render(bundled_2018, text_color='black', bgcolor='white'))



In [33]:
# Create subset graph
g_sub_2019 = nx.subgraph(G, top_central_2019)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_2019) 
g_plot_2019 = hv.Graph.from_networkx(g_sub_2019, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_2019.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='switcher')
bundled_2019 = bundle_graph(g_plot_2019)
# show the plot
show(hv.render(bundled_2019, text_color='black', bgcolor='white'))



In [34]:
# Create subset graph
g_sub_2020 = nx.subgraph(G, top_central_2020)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_2020) 
g_plot_2020 = hv.Graph.from_networkx(g_sub_2020, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_2020.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='switcher')
bundled_2020 = bundle_graph(g_plot_2020)
# show the plot
show(hv.render(bundled_2020, text_color='black', bgcolor='white'))



In [35]:
# Create subset graph
g_sub_eig = nx.subgraph(G, top_central_nodes_eig)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_eig) 
g_plot_eig = hv.Graph.from_networkx(g_sub_eig, g_layout).opts(tools=['hover'], node_color='country')
# make the plot
g_plot_eig.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='country')
bundled_eig = bundle_graph(g_plot_eig)
# show the plot
show(hv.render(bundled_eig, text_color='black', bgcolor='white'))



In [36]:
# Create subset graph
g_sub_eig = nx.subgraph(G, top_central_nodes_eig)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_eig) 
g_plot_eig = hv.Graph.from_networkx(g_sub_eig, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_eig.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='dropout year')
bundled_eig = bundle_graph(g_plot_eig)
# show the plot
show(hv.render(bundled_eig, text_color='black', bgcolor='white'))



In [37]:
colors = ['#000000']+hv.Cycle('Category20').values

# Create subset graph
g_sub_share = nx.subgraph(G, top_central_nodes_share)
# Create and save a layout.
g_layout = nx.layout.spring_layout(g_sub_share) 
g_plot_share = hv.Graph.from_networkx(g_sub_share, g_layout).opts(tools=['hover'], node_color='dropout year')
# make the plot
g_plot_share.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='institution')
bundled_share = bundle_graph(g_plot_share)
# show the plot
show(hv.render(bundled_share, text_color='black', bgcolor='white'))


