# Initial data preprocessing

In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
%matplotlib inline  
#from IPython.display import Image
from collections import Counter
import networkx as nx 
import warnings
warnings.filterwarnings("ignore") 
# Load soll of all registred NPA 
data = pd.read_csv('NPA_soll.csv', sep = ";", encoding = 'latin1') #reference 
# Deduplicate data
data['App name'] = data['App name'].str.upper()
data['NPA Name'] = data['NPA Name'].str.upper()
data['Account Type (System / Service)'] = data['Account Type (System / Service)'].str.upper()
# Create dictionaries
apps = data['App name'].unique()
npas = data['NPA Name'].unique()
types = data['Account Type (System / Service)'].unique()


# Graph Creation

In [None]:
# Create graph from data 
g = nx.Graph()
color_map = []
for i in range(0,len(apps)): #len(names)
    g.add_node(apps[i], type = 'app')
for i in range(0,len(npas)):
    g.add_node(npas[i], type = 'npa')
for i in range (0,len(data)):
    if data.iloc[i,11] == "system":
        g.add_edge(data.iloc[i,4], data.iloc[i,0], color='grey', weight=1)
    elif data.iloc[i,11] == "service":
        g.add_edge(data.iloc[i,4], data.iloc[i,0], color='green', weight=1)
    elif data.iloc[i,11] == "generic":
        g.add_edge(data.iloc[i,4], data.iloc[i,0], color='blue', weight=1)
    elif data.iloc[i,11] == "locked":
        g.add_edge(data.iloc[i,4], data.iloc[i,0], color='red', weight=1) 
    else:
        g.add_edge(data.iloc[i,4], data.iloc[i,0], color='black', weight=1) 
for n1, attr in g.nodes(data=True):
    if attr['type'] == 'app':
        color_map.append('lime')
    else: 
        if attr['type'] == 'npa':
            color_map.append('cyan')


# Graph Query (External) 

In [None]:
#query App2App
#query = 'NSP-SF'
query = input('Application:')
while query != 'exit':   
    for i in nx.all_neighbors(g,query):
        for j in nx.all_neighbors(g,i):
            if j != query:
                #print(query,'-',end='')
                print(query,'-2App-',j,'(using NPA:',i,')' )
    print()
    query = input('Application:')

In [None]:
apps

# Graph Metrics 

In [None]:
# Extract reference graph facts & metrics 
print('Reference Graph')
print('Do we have a fully connected graph? ',nx.is_connected(g))
d = list(nx.connected_component_subgraphs(g))
print('The graph contains',len(d), 'sub-graph')
nx.isolates(g)
h = g.to_directed()
N, K = h.order(), h.size()
avg_deg= float(K) / N
print ("# Nodes: ", N, "(Applications & NPAs)")
print ("# Edges: ", K)
print ("Average Degree: ", avg_deg)
# Extract reference graph facts & metrics 
in_degrees= h.in_degree() # dictionary node:degree
#in_values= sorted(set(in_degrees.values()))

# Graph Drawing

In [None]:
# Plot the graph
plt.figure(3,figsize=(90,90))  
edges = g.edges()
colors = [g[u][v]['color'] for u,v in edges]
nx.draw(g,node_color = color_map, edge_color = colors, with_labels = True)
plt.show()
#plt.savefig('graph_test1.png')
#Image(filename='graph_test1.png')
