In [1]:
#pip install pyvis

In [2]:
import pandas as pd
import networkx as nx

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', palette='muted', font_scale=1)

## Load data

In [3]:
df = pd.read_excel("/Users/fredcheng/Desktop/network_analysis/data1.xlsx")
df

Unnamed: 0,src_syterm,src_campus,src_biz_process,src_owning_division,dst_syterm,dst_campus,dst_biz_process,dst_owning_division,process_seen
0,star trader,hk,input trade,equities,razor,jp,send trade to market,equities,1990
1,bloomberg client,kr,get market data,market data,bloomberg,hk,market data connection head,market data,5000
2,murax,hk,input fx order,ficc,fx-pool,jp,match fx trade,ficc,563
3,black tunnel,hk,input black pool order,equities,black pool,sg,black pool trade matching,equities,32324
4,black tunnel,sg,input black pool order,equities,black pool,sg,black pool trade matching,equities,996
5,black tunnel,jp,input black pool order,equities,black pool,sg,black pool trade matching,equities,4555
6,black tunnel,kr,input black pool order,equities,black pool,sg,black pool trade matching,equities,2323
7,black tunnel,kr,input black pool order,equities,black pool,sg,black pool trade matching,equities,1500
8,Market AnalyticsX Client,sg,query market data,market data,Market AnalyticsX,hk,market data connection head,market data,111
9,Market AnalyticsX Client,hk,query market data,market data,Market AnalyticsX,hk,market data connection head,market data,10000


In [4]:
df['src_sys+campus'] = df['src_syterm']+' ['+ df['src_campus']+']'
df['dst_sys+campus'] = df['dst_syterm']+' ['+ df['dst_campus']+']'

In [5]:
df.head(2)

Unnamed: 0,src_syterm,src_campus,src_biz_process,src_owning_division,dst_syterm,dst_campus,dst_biz_process,dst_owning_division,process_seen,src_sys+campus,dst_sys+campus
0,star trader,hk,input trade,equities,razor,jp,send trade to market,equities,1990,star trader [hk],razor [jp]
1,bloomberg client,kr,get market data,market data,bloomberg,hk,market data connection head,market data,5000,bloomberg client [kr],bloomberg [hk]


## Create the graph

In [6]:
G = nx.from_pandas_edgelist(df,
                            source='src_sys+campus',
                            target='dst_sys+campus',
                            edge_attr='process_seen',
                            create_using=nx.DiGraph())

print("No of unique system:", len(G.nodes))
print("No of lineage:", len(G.edges))

No of unique system: 20
No of lineage: 14


## Prepare colours by campus location

In [7]:
campus_list = df['src_campus'].tolist() + df['dst_campus'].tolist()
campus_list = list(dict.fromkeys(campus_list))   
campus_list

['hk', 'kr', 'sg', 'jp', 'au', 'de', 'uk']

In [8]:
def campus_colour(campus):
    if campus == "hk": return "#40E0D0"
    elif campus == "kr": return "#FF7F50"
    elif campus == "sg": return "#FFBF00"
    elif campus == "jp": return "#DE3163"
    elif campus == "au": return "#DFFF00"
    elif campus == "de": return "#CCCCFF"
    else: return "#6495ED" # for 'uk'

In [9]:
connection_list = df.values.tolist()
connection_list[0]

['star trader',
 'hk',
 'input trade',
 'equities',
 'razor',
 'jp',
 'send trade to market',
 'equities',
 1990,
 'star trader [hk]',
 'razor [jp]']

## Create the graph

In [10]:
from pyvis.network  import Network

net1 = Network(notebook=True, width="75%", height="1500px", directed=True, bgcolor="#222222", font_color="white")

for edge in connection_list:
    net1.add_node(edge[-2], 
                  title= "Campus: {src_campus} | Division: {src_division} | Process: {src_process}".format(src_campus=edge[1], src_division=edge[3], src_process=edge[2]),
                  color=campus_colour(edge[1]))
    net1.add_node(edge[-1], 
                  title= "Campus: {dst_campus} | Division: {dst_division} | Process: {dst_process}".format(dst_campus=edge[5], dst_division=edge[7], dst_process=edge[6]),
                  color=campus_colour(edge[5]))
    net1.add_edge(edge[-2], edge[-1], value=edge[-3])
    
net1.show_buttons(filter_=["physics"])

net1.show("lineage.html")