In [1]:
# Importing Basic Libaries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import networkx as nx
import itertools
import requests
import pickle
import re

from bs4 import BeautifulSoup
from tqdm.notebook import tqdm

# Creating PrettyPrinter Instance
import pprint
pp = pprint.PrettyPrinter(indent=2)

In [2]:
df = pd.read_csv("Lye_En_Lih_updated.csv")
pd.set_option('display.max_rows', None)
df

Unnamed: 0,Full Name,Email,DR-NTU URL,Website URL,DBLP URL,Citations Count,Google Scholar URL
0,Wai Kin Adams Kong,adamskong@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00834,,https://dblp.org/pid/16/3792,7563,https://scholar.google.com/citations?hl=en&use...
1,Luu Anh Tuan,anhtuan.luu@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01296,https://tuanluu.github.io/,https://dblp.org/pid/81/8329,4242,https://scholar.google.com/citations?hl=en&use...
2,Anupam Chattopadhyay,anupam@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01076,https://scholar.google.co.in/citations?user=TI...,https://dblp.org/pid/99/4535,6226,https://scholar.google.com/citations?hl=en&use...
3,Anwitaman Datta,anwitaman@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00706,https://personal.ntu.edu.sg/anwitaman/,https://dblp.org/pid/d/AnwitamanDatta,8047,https://scholar.google.com/citations?hl=en&use...
4,Arvind Easwaran,arvinde@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00687,https://cps-research-group.github.io/,https://dblp.org/pid/73/1708,2817,https://scholar.google.com/citations?hl=en&use...
5,"Vun Chan Hua, Nicholas",aschvun@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00839,,https://dblp.org/pid/69/8028,0,
6,Kwoh Chee Keong,asckkwoh@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00799,https://personal.ntu.edu.sg/asckkwoh,https://dblp.org/pid/32/228,8704,https://scholar.google.com/citations?hl=en&use...
7,Yeo Chai Kiat,asckyeo@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00963,https://personal.ntu.edu.sg/asckyeo/,https://dblp.org/pid/52/910,4242,https://scholar.google.com/citations?hl=en&use...
8,Lau Chiew Tong,asctlau@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00670,,https://dblp.org/pid/30/6609,0,
9,Miao Chun Yan,ascymiao@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00084,,https://dblp.org/pid/m/ChunyanMiao,17708,https://scholar.google.com/citations?hl=en&use...


## Pickle Functions

In [3]:
# Function to load a pickle file and extract titles
def load_pickle(file_path):
    """
    Load data from a pickle file.

    :param file_path: The path to the output pickle file.
    """
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        
    return data

def write_pickle(data, file_path):
    """
    Write data to a pickle file.

    :param data: The data to be written to the file.
    :param file_path: The path to the output pickle file.
    """
    try:
        with open(file_path, 'wb') as file:
            pickle.dump(data, file)
        #print(f'Data has been written to {file_path}')
    except Exception as e:
        print(f'Error writing to {file_path}: {e}')

## Determining Collaboration Network of Professors

### Within SCSE

In order to find out the collaboration network in SCSE, I will use the DBLP names for each Professor and search for the matched names of faculty members in SCSE in their publications

In order to determine the collaboration network, I will use the co-authors information of each individual Professor's Google Scholar Profile

In [4]:
# I will be retrieving their DBLP names format
dblp_names = []
for i, row in tqdm(df.iterrows(), total=len(df), desc = 'SCSE Faculty Members'):
    name = row['Full Name']
    url = row['DBLP URL']
    if type(url) != str:
        dblp_names.append("NaN")
        continue
    soup_source = requests.get(url).text
    soup = BeautifulSoup(soup_source,'lxml')
    dblp_name = soup.find('header', id='headline').find('span', class_='name primary').text.strip()
    cleaned_name = re.sub(r'[\d]', '', dblp_name).rstrip()
    dblp_names.append(cleaned_name)

SCSE Faculty Members:   0%|          | 0/86 [00:00<?, ?it/s]

In [5]:
dblp_names

['Adams Wai-Kin Kong',
 'Anh Tuan Luu',
 'Anupam Chattopadhyay',
 'Anwitaman Datta',
 'Arvind Easwaran',
 'Nicholas C. H. Vun',
 'Chee Keong Kwoh',
 'Chai Kiat Yeo',
 'Chiew Tong Lau',
 'Chunyan Miao',
 'Douglas L. Maskell',
 'Deepu Rajan',
 'Chng Eng Siong',
 'Li Fang',
 'Hiok Chai Quek',
 'Seah Hock Soon',
 'Jagath C. Rajapakse',
 'Jianmin Zheng',
 'Liang-Tien Chia',
 'A. S. Madhukumar',
 'Syin Chan',
 'Siu Cheung Hui',
 'Siew-Kei Lam',
 'Sourav S. Bhowmick',
 'Alexei Sourin',
 'Shell-Ying Huang',
 'Tat-Jen Cham',
 'Thambipillai Srikanthan',
 'Wooi-Boon Goh',
 'Wentong Cai',
 'Xueyan Tang',
 'Yew-Soon Ong',
 'Wee Keong Ng',
 'Aixin Sun',
 'Bo An',
 'Boyang Li',
 'Wei Yang Bryan Lim',
 'Long Cheng',
 'Erik Cambria',
 'Chen Change Loy',
 'Chee-Wei Tan',
 'Chin Ann Ong',
 'Cuntai Guan',
 'Dmitrii Ustiugov',
 'Dusit Niyato',
 'Bu-Sung Lee',
 'Gao Cong',
 'Guosheng Lin',
 'Han Yu',
 'Hanwang Zhang',
 'Hong Lye Oh',
 'Josephine Chong',
 'Jun Luo',
 'Jun Zhao',
 'NaN',
 'Kwok-Yan Lam',
 'Mo

In [6]:
# Insert DBLP names into Dataframe
df.insert(1, 'DBLP Names', dblp_names)

In [7]:
df

Unnamed: 0,Full Name,DBLP Names,Email,DR-NTU URL,Website URL,DBLP URL,Citations Count,Google Scholar URL
0,Wai Kin Adams Kong,Adams Wai-Kin Kong,adamskong@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00834,,https://dblp.org/pid/16/3792,7563,https://scholar.google.com/citations?hl=en&use...
1,Luu Anh Tuan,Anh Tuan Luu,anhtuan.luu@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01296,https://tuanluu.github.io/,https://dblp.org/pid/81/8329,4242,https://scholar.google.com/citations?hl=en&use...
2,Anupam Chattopadhyay,Anupam Chattopadhyay,anupam@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01076,https://scholar.google.co.in/citations?user=TI...,https://dblp.org/pid/99/4535,6226,https://scholar.google.com/citations?hl=en&use...
3,Anwitaman Datta,Anwitaman Datta,anwitaman@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00706,https://personal.ntu.edu.sg/anwitaman/,https://dblp.org/pid/d/AnwitamanDatta,8047,https://scholar.google.com/citations?hl=en&use...
4,Arvind Easwaran,Arvind Easwaran,arvinde@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00687,https://cps-research-group.github.io/,https://dblp.org/pid/73/1708,2817,https://scholar.google.com/citations?hl=en&use...
5,"Vun Chan Hua, Nicholas",Nicholas C. H. Vun,aschvun@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00839,,https://dblp.org/pid/69/8028,0,
6,Kwoh Chee Keong,Chee Keong Kwoh,asckkwoh@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00799,https://personal.ntu.edu.sg/asckkwoh,https://dblp.org/pid/32/228,8704,https://scholar.google.com/citations?hl=en&use...
7,Yeo Chai Kiat,Chai Kiat Yeo,asckyeo@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00963,https://personal.ntu.edu.sg/asckyeo/,https://dblp.org/pid/52/910,4242,https://scholar.google.com/citations?hl=en&use...
8,Lau Chiew Tong,Chiew Tong Lau,asctlau@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00670,,https://dblp.org/pid/30/6609,0,
9,Miao Chun Yan,Chunyan Miao,ascymiao@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00084,,https://dblp.org/pid/m/ChunyanMiao,17708,https://scholar.google.com/citations?hl=en&use...


In [26]:
scse_authors = set()
# Adding all DBLP Names into a set
for i, row in tqdm(df.iterrows(), total=len(df), desc = 'SCSE Faculty Members'):
    dblp_name = row['DBLP Names']
    scse_authors.add(dblp_name)
write_pickle(scse_authors,'scse_network_authors.pkl')

SCSE Faculty Members:   0%|          | 0/86 [00:00<?, ?it/s]

In [14]:
scse_authors

{'A. S. Madhukumar',
 'Adams Wai-Kin Kong',
 'Aixin Sun',
 'Alexei Sourin',
 'Anh Tuan Luu',
 'Anupam Chattopadhyay',
 'Anwitaman Datta',
 'Arvind Easwaran',
 'Bo An',
 'Boyang Li',
 'Bu-Sung Lee',
 'C.-H. Luke Ong',
 'Chai Kiat Yeo',
 'Chee Keong Kwoh',
 'Chee-Wei Tan',
 'Chen Change Loy',
 'Chiew Tong Lau',
 'Chin Ann Ong',
 'Chng Eng Siong',
 'Chunyan Miao',
 'Cuntai Guan',
 'Deepu Rajan',
 'Dmitrii Ustiugov',
 'Douglas L. Maskell',
 'Dusit Niyato',
 'Erik Cambria',
 'Gao Cong',
 'Guosheng Lin',
 'Han Yu',
 'Hanwang Zhang',
 'Hiok Chai Quek',
 'Hong Lye Oh',
 'Jagath C. Rajapakse',
 'Jianmin Zheng',
 'Jie Zhang',
 'Josephine Chong',
 'Jun Luo',
 'Jun Zhao',
 'K. Vidya Sudarshan',
 'Kavallur Gopi Smitha',
 'Kemao Qian',
 'Kwok-Yan Lam',
 'Li Fang',
 'Li Yi',
 'Liang-Tien Chia',
 'Long Cheng',
 'Mo Li',
 'Mohamed M. Sabry',
 'NaN',
 'Nicholas C. H. Vun',
 'Owen Noel Newton Fernando',
 'Rui Tan',
 'Seah Hock Soon',
 'Shafiq R. Joty',
 'Shangwei Lin',
 'Shell-Ying Huang',
 'Shijian Lu',

In [34]:
from pyvis.network import Network

# Using Pyvis Network
G = Network(height='1000px', bgcolor='#242424', font_color='white')

# This function is to create the graph for selected professor, if show all network exclude the color code part 
def create_scse_graph(selected_prof):
    global G, scse_authors

    # Iterate through the authors and add them as nodes in the Pyvis graph (excluding "NaN")
    for prof_name in scse_authors:
        if not pd.isna(prof_name) and prof_name != 'NaN':
            # Add nodes with a larger size (adjust the value as needed)
            G.add_node(prof_name, label=prof_name)

    # Create edges between authors in the Pyvis graph
    for i, row in tqdm(df.iterrows(), total=len(df), desc='SCSE Faculty Members'):
        name = row['Full Name']
        dblp_name = row['DBLP Names']
        if pd.isna(dblp_name) or dblp_name == 'NaN':
            continue  
        with open(f'publication_set/publications_{name}.pkl', 'rb') as f:
            publications = pickle.load(f)
        for pub in publications:
            authors = pub['authors']
            for author in authors:
                # This will add regular edges and exclude self-loops
                if author in scse_authors and author != dblp_name:
                    G.add_edge(dblp_name, author)  
                    
    # Functions to Colour Code nodes based on selected Professors
    connect_nodes = set()

    for edge in G.edges:
        if edge['from'] == selected_prof:
            connect_nodes.add(edge['to'])
        elif edge['to'] == selected_prof:
            connect_nodes.add(edge['from'])

    for node in G.nodes:
        if node['label'] == selected_prof:
            node['color'] = '#FB0303'
        elif node['label'] in connect_nodes:
            node['color'] = '#FB8E03'

# Call the function to create the graph for selected professor
create_scse_graph("Anwitaman Datta")

# Customize the Pyvis graph if needed
# G.show_buttons(filter_=['nodes', 'edges', 'physics'])
#G.barnes_hut()

# Display the Pyvis graph in a Jupyter Notebook or your default web browser
G.save_graph('scse_network.html')

SCSE Faculty Members:   0%|          | 0/86 [00:00<?, ?it/s]

In [93]:
# This function is to create the graph for selected professor, if show all network exclude the color code part 
def create_scse_graph(selected_prof):
    global G, scse_authors

    # Iterate through the authors and add them as nodes in the Pyvis graph (excluding "NaN")
    for prof_name in scse_authors:
        if not pd.isna(prof_name) and prof_name != 'NaN':
            # Add nodes with a larger size (adjust the value as needed)
            G.add_node(prof_name, label=prof_name)

    # Create edges between authors in the Pyvis graph
    for i, row in df.iterrows():
        name = row['Full Name']
        dblp_name = row['DBLP Names']
        if pd.isna(dblp_name) or dblp_name == 'NaN':
            continue  
        with open(f'publication_set/publications_{name}.pkl', 'rb') as f:
            publications = pickle.load(f)
        for pub in publications:
            authors = pub['authors']
            for author in authors:
                # This will add regular edges and exclude self-loops
                if author in scse_authors and author != dblp_name:
                    G.add_edge(dblp_name, author)  
                    
    # Functions to Colour Code nodes based on selected Professors
    connect_nodes = set()

    for edge in G.edges:
        if edge['from'] == selected_prof:
            connect_nodes.add(edge['to'])
        elif edge['to'] == selected_prof:
            connect_nodes.add(edge['from'])

    for node in G.nodes:
        if node['label'] == selected_prof:
            node['color'] = '#FB0303'
        elif node['label'] in connect_nodes:
            node['color'] = '#FB8E03'

# Call the function to create the graph for selected professor
create_scse_graph("Anwitaman Datta")
# Display the Pyvis graph in a Jupyter Notebook or your default web browser
G.save_graph('scse_network_2.html')

## Within NTU

I will first search all the authors that have publications in DBLP that is affiliated in NTU, then i will do the same process from above to check the network with Professors in NTU

In [68]:
coauthors_ntu = []
url = "https://dblp.org/search?q=affiliation%3A%22Nanyang+Technological+University%22"
soup_source = requests.get(url).text
soup = BeautifulSoup(soup_source,'lxml')
print(soup.find('ul', class_='publ-list').find('li'))

None


## Outside NTU

In [89]:
from pyvis.network import Network

def create_outsideNTU_graph(selected_prof):
    publications = load_pickle(f'publication_set/publications_{selected_prof}.pkl')

    # Create a Pyvis Network instance
    net = Network(height='1000px', bgcolor='#242424', font_color='white')

    # Add nodes for Selected Professor and coauthors (excluding the excluded names)
    coauthors = set()

    for pub in publications:
        authors = pub['authors']
        for author in authors:
            if author != selected_prof and author not in scse_authors:
                coauthors.add(author)
#     print(coauthors)
    # Add nodes for Selected Professor and coauthors (excluding the excluded names)
    net.add_node(selected_prof, color='orange', title=selected_prof)  
    for author in coauthors:
        net.add_node(author, title=author)  
        
    # Add edges to represent coauthorship
    for pub in publications:
        authors = pub['authors']
        for author in authors:
            if author != selected_prof and author not in scse_authors:
                net.add_edge(selected_prof, author)

    # Save the graph
    net.save_graph('outsideNTU_network.html')

create_outsideNTU_graph('Anwitaman Datta')

{'Ekanshi Agrawal', 'Hoang-Vu Dang', 'Frédérique E. Oggier', 'Michael J. Franklin', 'Qi He', 'Xin Liu', 'Pietro Michiardi', 'Siddharth Singh', 'Vivekanand Gopalkrishnan', 'Marinho P. Barcellos', 'Hani Salah', 'Henning Schulzrinne', 'Jun Shao', 'Sally Ang', 'Daniël Reijsbergen', 'Spyros Voulgaris', 'Sajal K. Das', 'Roberto Ripamonti', 'Chih Wei Ling', 'Liviu Iftode', 'Jussi Keppo', 'Michael Militzer', 'Wan-Hee Cho', 'Kyumars Sheykh Esmaili', 'Gokhan Sagirlar', 'Quach Vinh Thanh', 'Aung Maw', 'Sarunas Girdzijauskas', 'Pascal Felber', 'Andreas Wombacher', 'Bhawani Shanker Bhati', 'Chenliang Li', 'Chua Chiah Soon', 'Iva Bojic', 'Thomas Paul', 'Radoslaw Nielek', 'Shun Hanli Hanley', 'Adamas Aqsa Fahreza', 'Prasenjit Dey', 'Kuiyu Chang', 'Martin Hasler', 'Marios D. Dikaiakos', 'Alberto Montresor', 'Vigneshwaran Shankaran', 'Tien Tuan Anh Dinh', 'Shahin Salavati', 'Bretislav Hajek', 'Karl Aberer', 'Dick H. J. Epema', 'Anthony Ventresque', 'Rongxing Lu', 'Jordan Ivanchev', 'Minh-Tam Le', 'Zhij

In [8]:
# Saving New Dataframe as CSV
df.to_csv('Lye_En_Lih_updated.csv', encoding = 'utf-8', index = False)

In [21]:
df

Unnamed: 0,Full Name,DBLP Names,Email,DR-NTU URL,Website URL,DBLP URL,Citations Count,Google Scholar URL
0,Wai Kin Adams Kong,Adams Wai-Kin Kong,adamskong@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00834,,https://dblp.org/pid/16/3792,7563,https://scholar.google.com/citations?hl=en&use...
1,Luu Anh Tuan,Anh Tuan Luu,anhtuan.luu@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01296,https://tuanluu.github.io/,https://dblp.org/pid/81/8329,4242,https://scholar.google.com/citations?hl=en&use...
2,Anupam Chattopadhyay,Anupam Chattopadhyay,anupam@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp01076,https://scholar.google.co.in/citations?user=TI...,https://dblp.org/pid/99/4535,6226,https://scholar.google.com/citations?hl=en&use...
3,Anwitaman Datta,Anwitaman Datta,anwitaman@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00706,https://personal.ntu.edu.sg/anwitaman/,https://dblp.org/pid/d/AnwitamanDatta,8047,https://scholar.google.com/citations?hl=en&use...
4,Arvind Easwaran,Arvind Easwaran,arvinde@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00687,https://cps-research-group.github.io/,https://dblp.org/pid/73/1708,2817,https://scholar.google.com/citations?hl=en&use...
5,"Vun Chan Hua, Nicholas",Nicholas C. H. Vun,aschvun@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00839,,https://dblp.org/pid/69/8028,0,
6,Kwoh Chee Keong,Chee Keong Kwoh,asckkwoh@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00799,https://personal.ntu.edu.sg/asckkwoh,https://dblp.org/pid/32/228,8704,https://scholar.google.com/citations?hl=en&use...
7,Yeo Chai Kiat,Chai Kiat Yeo,asckyeo@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00963,https://personal.ntu.edu.sg/asckyeo/,https://dblp.org/pid/52/910,4242,https://scholar.google.com/citations?hl=en&use...
8,Lau Chiew Tong,Chiew Tong Lau,asctlau@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00670,,https://dblp.org/pid/30/6609,0,
9,Miao Chun Yan,Chunyan Miao,ascymiao@ntu.edu.sg,https://dr.ntu.edu.sg/cris/rp/rp00084,,https://dblp.org/pid/m/ChunyanMiao,17708,https://scholar.google.com/citations?hl=en&use...
