In [6]:
#base modules
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from datetime import date,datetime
import math
from matplotlib.dates import date2num, num2date

#bokeh
from bokeh.io import show, output_notebook, output_file, curdoc
from bokeh.plotting import figure
from bokeh.layouts import gridplot, widgetbox, layout
from bokeh.models import (
    ColumnDataSource,
    Circle,
    HoverTool,
    Range1d,
    Plot,
    MultiLine)
from bokeh.transform import factor_cmap, linear_cmap
from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.models.widgets import MultiSelect

#config
from myutils import *

In [7]:
_modulename = 'repayment_patterns_interactive'
cfgname="DEFAULT"
cfg = getconfig(_modulename, cfgname)

datafolder = cfg["datafolder"].format("MichaelBoguslavsky") # user specific path
loadprefix = cfg["loadprefix"]
repdate = cfg["reportdate"]
filename = cfg["filename"]

print("Reading the input dataset...")
inst = pd.read_pickle(datafolder + loadprefix+ filename)

Reading the input dataset...


In [None]:
inst.head().transpose()

In [8]:
#class to time code blocks
# usage [with CodeTimer["label"]:]
import time

class CodeTimer:
    def __init__(self, name=None):
        self.name = " '"  + name + "'" if name else ''

    def __enter__(self):
        self.start = time.clock()

    def __exit__(self, exc_type, exc_value, traceback):
        self.took = (time.clock() - self.start) * 1000.0
        print('Code block' + self.name + ' took: ' + str(self.took) + ' ms')

In [51]:
import networkx as nx
#position of the k-th point out of n total points place on layers circles around (xc,yc)
#all circles have radius within maxlayeroffset from radius r
#instead of specifying layers one may specify number of points per layer pperlayer

primetable = np.array([1,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,59,61,67,71,73])


def innercircle_vec(labels, r=.25, nlayers = 3, step=7, maxlayeroffset=.1, xc=.5, yc=.5):
    #position points labelled with labels on nlayers of circles around xc,yc
    n = len(labels)
    layeroffset = 0. if nlayers==1 else 2* maxlayeroffset / (nlayers-1)

    #distribute the points on the layers proportional to layer radius
    layernp=[int(np.floor(((n+nlayers)/(nlayers*r)) * (r + (i-(nlayers-1)/2)*layeroffset)))
                  for i in range(nlayers)]
    #print(layernp)
    phi0 = np.random.random(len(layernp))*1.
    pos={}
    for k in range(n):
        insidelayers = np.argwhere(np.cumsum(layernp) > k)
        layeridx = insidelayers[0][0] #idx of layer k-th point is at
        npt = layernp[layeridx]
        step = min(primetable[[npt%p>0 for p in primetable]], key=lambda x:abs(x-(npt/3.5)))
        rl = r + (layeridx - (nlayers-1)/2)*layeroffset
        phi = 2*np.pi*k*step/npt + phi0[layeridx]
        x = xc + rl * np.cos(phi)
        y = yc + rl * np.sin(phi)
        pos[labels[k]] = np.array([x,y])
    return pos
    
    
def init_layout(G, nodes, R=.25, nperlayer=29):
    #sellers
    sellernames = pd.DataFrame(nodes.loc[nodes.type == "customer",:]).\
            sort_values(by="centrality", ascending=False).\
            reset_index(drop=True)
    pos = innercircle_vec(list(sellernames.name), r=R, nlayers=sellernames.shape[0]//nperlayer+1)

    #buyers - around the first seller linked to them
    for seller in list(sellernames.name):
        blist = list(G.neighbors(seller))
        bnotalloc = [b for b in blist if not b in pos.keys()]
        
        nb = len(bnotalloc)
        if nb>0:
            pos = dict(pos, 
                   **innercircle_vec(bnotalloc, r=.01+0.004*np.sqrt(nb//nperlayer), 
                    nlayers=nb//nperlayer+1, maxlayeroffset=0.004*np.sqrt(nb//nperlayer), 
                    xc=pos[seller][0], yc=pos[seller][1]))
    return pos


In [48]:
def show_network(inst0, from_nodes=None, from_f= "customer_name_1", to_f= "debtor_name_1", 
                 edge_color_attr="has_impairment1", edge_thick_attr="invoice_amount",               
                 tools = "box_select,tap,wheel_zoom,reset,pan,save",
                 title = 'Network Graph',
                 plot_w = 1200,
                 plot_h = 1200,
                 nx_param = {"layout_function": nx.spring_layout,
                            "k": 0.035,
                            "iterations": 0,
                            "scale": 1.,
                            "dim": 2},    
                 nodes_palette = [TTQcolor['sky'], TTQcolor['richPeach'], TTQcolor['Salmon']],
                 edges_palette = [TTQcolor['azureBlue'], TTQcolor['warningRed']]):
    #with CodeTimer("init"):
        #narrow the instrument set to from_nodes as source nodes
        if not from_nodes == None:
            inst = inst0.loc[inst0[from_f].apply(lambda x: x in from_nodes),:]
        else:
            inst=inst0

        #inst["edge"] = inst[[from_f, to_f]].apply(lambda t: t[from_f] + "/"+ t[to_f], axis=1)
        g_e = inst.groupby([from_f,to_f])

        def thicknessfunc(x, thirng = [1,2,3,4,5], xmin=1., xmax=1e7):
            t = np.round(np.interp(x.sum(), np.linspace(xmin, xmax, len(thirng)), thirng))
            return t

        edge_color_agg = lambda x: x.any()
        edge_thick_agg = thicknessfunc

        #group instruments to edges
        edges = g_e.agg({from_f: "first", to_f: "first", 
                        edge_color_attr: edge_color_agg, edge_thick_attr: edge_thick_agg})

        edges = edges.rename(columns = {from_f: "from_f", to_f: "to_f", edge_color_attr: "edge_color",
                                       edge_thick_attr: "edge_thick"})

        edge_tuples = list(zip(edges["from_f"], edges["to_f"]))
        G=nx.DiGraph()
        G.add_edges_from(edge_tuples)

    #centrality calculation
    #with CodeTimer("centrality"):
        centrality = nx.degree_centrality(G)
        nodes = pd.DataFrame(list(centrality.items()), columns=["name", "centrality"]).sort_values(
            by="centrality", ascending=False).reset_index(drop=True)

        #nodes = pd.DataFrame({"name": list(G.nodes())})
    #with CodeTimer("nodes"):
        nodes["type"] = nodes.name.apply(lambda x: "customer" if x in list(edges["from_f"]) else "buyer")

        nodes["markersize"] = np.ones(nodes.shape[0])*4
        nodes.loc[nodes.type=="customer", "markersize"] = 8
        #nodes nx attributes
        node_size = {k:v for k, v in zip(nodes.name, nodes.markersize)}
        node_type = {k:v for k, v in zip(nodes.name, nodes.type)}
        edges_highlight = {edge_tuples[p] : edges.edge_color[p] for p in range(len(edge_tuples))}
        nx.set_node_attributes(G, name='size', values=node_size) 
        nx.set_node_attributes(G, name='type', values=node_type)
        nx.set_edge_attributes(G, name='highlight', values=edges_highlight)
    #with CodeTimer('layout'):
        position1 = init_layout(G, nodes)
        #graph = from_networkx(G, nx.spring_layout, pos=position1, k=nx_k, iterations=nx_iterations, scale=nx_scale)
        #graph = from_networkx(G, nx.kamada_kawai_layout, dim=3)
        #graph = from_networkx(G, nx.spectral_layout, dim=2)
        #graph = from_networkx(G, nx_layout, pos=position1, k=nx_k, iterations=nx_iterations, scale=nx_scale, dim=2)
        graph = from_networkx(G, pos=position1, **nx_param)
        #graph = from_networkx(G, layout_function=position1)

        graph.edge_renderer.data_source.data['highlight']=[o for o in edges["edge_color"]] #setting the parameter to highlight for edges
        graph.edge_renderer.glyph = MultiLine(line_color=linear_cmap('highlight', edges_palette, False,True), #setting edge colors
                                                                    line_alpha=0.7, line_width=0.2)
        graph.node_renderer.data_source.data['index'] = [i for i in nodes['name']] #setting the company names
        graph.node_renderer.data_source.data['size'] = [s for s in nodes['markersize']] #setting node sizes
        graph.node_renderer.data_source.data['type'] = [s for s in nodes['type']] #setting types
        graph.node_renderer.glyph = Circle(size='size', 
            fill_color=factor_cmap('type', nodes_palette, nodes['type'].unique()), fill_alpha=0.8)
        graph.node_renderer.nonselection_glyph = Circle(size='size', 
            fill_color=factor_cmap('type', nodes_palette, nodes['type'].unique()),
                                                   fill_alpha=0.1, line_alpha=0.05)
        graph.edge_renderer.nonselection_glyph = MultiLine(
            line_color=linear_cmap('highlight', edges_palette, False,True), #creating non-selected edges
                                                                     line_alpha=0.05, line_width=0.05)

        graph.node_renderer.hover_glyph = Circle(size='size', fill_alpha=0.0, line_width=3, 
                                                 line_color='green') #creating hover settings for circles
        graph.edge_renderer.hover_glyph = MultiLine(line_color='#abdda4', line_width=0.8) #creating hover settings for edges

        graph.inspection_policy = NodesAndLinkedEdges()
        TOOLTIPS = [('Company Name', '@index'), ('Company Type', '@type')] #hover tooltips

        plot = figure(title = title, plot_width = plot_w, plot_height=plot_h, tools=tools, 
                      x_range=(-1.2,1.2), y_range=(-1.2,1.2))
        plot.add_tools(HoverTool(tooltips=TOOLTIPS))
        plot.renderers.append(graph)
        plot.xgrid.grid_line_color = None
        plot.ygrid.grid_line_color = None
        plot.axis.visible = False
    #with CodeTimer('show'):
        show(plot)  

In [53]:
custlist = list(inst.groupby("customer_name_1").agg({"has_impairment1": "sum"}).sort_values(by="has_impairment1",
                                            ascending=False).index[0:40])
show_network(inst, from_nodes=custlist)

In [52]:
show_network(inst)

In [60]:
from_f= "customer_name_1"
to_f= "debtor_name_1"
g_e = inst.groupby([from_f,to_f])

#group instruments to edges
edges = g_e.agg({from_f: "first", to_f: "first" 
                })

edges = edges.rename(columns = {from_f: "from_f", to_f: "to_f"})

edge_tuples = list(zip(edges["from_f"], edges["to_f"]))
G=nx.Graph()
G.add_edges_from(edge_tuples)

In [65]:
largest_cc = max(nx.connected_components(G), key=len)



17
79
3
2585
2
4
10
9
197
3
26
20
3
2
12
10
4
42
3
7
16
2
23
8
2
45
5
9
5
5
13
11
8
3
5
59
14
5
2
28
13
7
14
7
2
