In [2]:
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 27 21:35:59 2017

Visualize SAS Proc Varclus and where LexisNexis variables fit in
the existing cluster structure

@author: Jingmin
"""

import plotly.plotly as py
import plotly.graph_objs as go
import networkx as nx
import pandas as pd

# Empty graph
G = nx.Graph()

# Import cluster structure data
fmcc = pd.read_csv(
   r'C:\Users\GlowingToilet\Google Drive\Projects\clus-network\VC1.csv'
)

ln = pd.read_csv(
   r'C:\Users\GlowingToilet\Google Drive\Projects\clus-network\VC2.csv'
)

# Some clean up of var_name
fmcc['var_name'] = fmcc['var_name'].apply(lambda x: x.replace('WOE_', ''))
ln['var_name'] = ln['var_name'].apply(lambda x: x.replace('WOE_', ''))

print('Done')

Done


In [3]:

# Add nodes and edges
for row in fmcc.itertuples():
    _, clus, var, rsq, rsq_n, clus_n, _ = tuple(row)
    G.add_node(clus, type='clus', cluster=clus)
    G.add_node(var, type='fmcc var', cluster=clus)     
    G.add_node(clus_n, type='clus', cluster=clus_n)
    if rsq > 0.25:
        G.add_edge(clus, var, rsq=rsq)
        if rsq_n > 0.25:
            G.add_edge(clus_n, var, rsq=rsq_n)     

    
for row in ln.itertuples():
    _, clus, var, rsq, rsq_n, clus_n, _ = tuple(row)
    G.add_node(clus, type='clus', cluster=clus)
    G.add_node(var, type='ln var', cluster=clus)     
    G.add_node(clus_n, type='clus', cluster=clus_n)
    if rsq > 0.25:
        G.add_edge(clus, var, rsq=rsq)
        if rsq_n > 0.25:
            G.add_edge(clus_n, var, rsq=rsq_n)
print('Done')

Done


In [4]:
# Generate layout
pos = nx.spring_layout(G)
print('Done')

Done


In [9]:
# Customize the plot
edge_trace = go.Scatter(
    x=[],                     
    y=[],
    line=go.Line(
        width=[],
        color='#CCC' 
    ),
    hoverinfo=None,
    mode='line'
)

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]
    edge_trace['line']['width'].append(G[edge[0]][edge[1]]['rsq'] / 2)
    
node_trace = go.Scatter(
    x=[], 
    y=[], 
    text=[],
    marker=go.Marker(
        showscale=True,
        colorscale='Portland',
        color=[], 
        size=[],
        symbol=[],       
        colorbar=dict(
            thickness=15,
            title='Cluster',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=1)
    ),
    mode='markers', 
    hoverinfo='text',
)
        
for node in G.nodes():
    x, y = pos[node]
    node_trace['x'].append(x)
    node_trace['y'].append(y)
    
    if G.node[node]['type'].find('var') > -1:
        node_trace['marker']['size'].append(5)
        
        if G.node[node]['type'] == 'fmcc var':
            node_trace['marker']['symbol'].append('circle')
        else:
            node_trace['marker']['symbol'].append('x')
        
    else:
        node_trace['marker']['size'].append(15)
        node_trace['marker']['symbol'].append('diamond')
        
    node_trace['marker']['color'].append(int(G.node[node]['cluster'][-2:]))
    node_trace['text'].append(node)
print('Done')

Done


In [12]:
# Make the plot on Plotly
fig = go.Figure(data=go.Data([edge_trace, node_trace]),
                layout=go.Layout(
                    title='<br>Variable Cluster Structure for Medium FICO without LN Clusters',
                    titlefont=dict(size=16),
                    showlegend=False, 
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    annotations=[ dict(
                        showarrow=False,
                        text=("Github: <a href='https://github.com/jingmin1987/clus-network/'>"
                              'https://github.com/jingmin1987/clus-network/ </a>'
                              '<br>x: LN vars; diamond: Cluster Component'),
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002 ) ],
                    xaxis=go.XAxis(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=go.YAxis(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='varclus')