In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import itertools
import networkx as nx 
import matplotlib.pyplot as plt
from IPython.display import Image
import warnings
warnings.filterwarnings("ignore") 

In [None]:
def Remove(duplicate): 
    final_list = [] 
    for num in duplicate: 
        if num not in final_list: 
            final_list.append(num) 
    return final_list 

In [None]:
# Sementor 'XREF_PROG_DB APP' ingestion
xl = pd.ExcelFile('XREF_PROG_DB APP.xlsx')
#xl.sheet_names
db = xl.parse('XREF_PROG_DB APP')

In [None]:
sp = db['Source Program'].unique()
schema = db['DMS2 Schema Definition'].unique()
app1 = db['String'].unique()
app2 = db['String.1'].unique()
app = []
for item in app1:
    app.append(item)
for item in app2:
    app.append(item)
app = Remove(app)

In [None]:
g = nx.Graph()
color_map = []
size = []
for i in range (0,len(schema)):
    g.add_node(schema[i], color='deepskyblue', type = 'db')
    size.append(2000)
for i in range (0,len(app)):
    g.add_node(app[i], color='darkorange', type = 'app')
    size.append(800)
for i in range (0,len(db)):
        g.add_edge(db.iloc[i,2], db.iloc[i,1], type='ext',color = 'blue', label='I', weight=1)
        g.add_edge(db.iloc[i,3], db.iloc[i,2], type='ext',color = 'blue', label='II', weight=2)
for n1, attr in g.nodes(data=True):
    if attr['type'] == 'db':
        color_map.append('deepskyblue')
    if attr['type'] == 'app':
        color_map.append('darkorange')

In [None]:
# Drawing Graph
plt.figure(1,figsize=(30,30));  
edges = g.edges()
colors = [g[u][v]['color'] for u,v in edges]
nx.draw(g, node_size = size, font_size= 15, node_color = color_map, edge_color = colors, with_labels = True);
plt.legend(('DB'), loc = 1, numpoints=2);
plt.savefig('graph_db.png');
Image(filename='graph_db.png');
plt.show();

In [None]:
# Extract reference graph facts & metrics 
print('Reference Graph')
print('Do we have a fully connected graph? ',nx.is_connected(g))
h = g.to_directed()
N, K = h.order(), h.size()
avg_deg= float(K) / N
print ("# Applications: ", len(app))
print ("# Databases: ", len(schema))
print ("# Nodes: ", N)
print ("# Edges: ", K)
print ("Average connectivity degree: ", avg_deg)
# Extract reference graph facts & metrics 
print ("SCC-strongly connected components: ", nx.number_strongly_connected_components(h))
print ("WCC-weakly connected components: ", nx.number_weakly_connected_components(h))
in_degrees= h.in_degree() # dictionary node:degree
#in_values= sorted(set(in_degrees.values()))

In [None]:
def get_subgraph(node):
    ancestors = nx.ancestors(g,node)
    ancestors.add(node)
    return nx.subgraph(h, ancestors)
def draw_subgraph(graph,db):
    color_map = []
    plt.figure(3,figsize=(25,25))
    for n1, attr in graph.nodes(data=True):
        if attr['type'] == 'db':
            color_map.append('deepskyblue')
        if attr['type'] == 'app':
            color_map.append('darkorange')
    nx.draw(graph, node_size = 1000, node_color = color_map, edge_color = 'grey', width=1.0, size=0.3, with_labels = True)
    name = 'sub_graph_'+db+'.png'
    plt.savefig(name)
    Image(filename= name)
    plt.show()   

In [None]:
# DB centric view (all)
for i in range(0,len(schema)):
    node = schema[i]
    z = get_subgraph(node)
    draw_subgraph(z,node)

In [None]:
db.head(5)