In [None]:
import pandas as pd
import plotly.graph_objects as go
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('data/FICO/heloc.csv')
df.replace({'RiskPerformance' : { 'Bad' : 1, 'Good' : 0}}, inplace=True)

In [None]:
nodes = pd.read_csv('./data/FICO/heloc.csv')
node_features = nodes.to_numpy()

# Load the edges data into a Pandas dataframe
edges = pd.read_csv('./data/FICO/heloc_edges.txt', delim_whitespace=True, header=None,dtype=np.int32)

In [None]:
## Plot full graph
# Create a graph object
G = nx.Graph()

# Add edges from DataFrame
for index, row in edges.iterrows():
    G.add_edge(row[0], row[1])

# Assign positions to nodes using a layout algorithm, e.g., spring layout
pos = nx.spring_layout(G)
nx.set_node_attributes(G, pos, 'pos')

edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    mode='lines')

node_x = []
node_y = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Complete Network graph',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    #text="Python code generated by networkx",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show()


In [None]:
## Plot a specific node and its connection (in this case the node 5 is selected)
# Define default nodes based on the 'RiskPerformance' attribute
default_nodes = nodes[nodes['RiskPerformance'] == 'Bad'].index.tolist()

# Create a subgraph for node 5 and its connections
ego_net = nx.ego_graph(G, 5)

# Define the position only for the subgraph
sub_pos = {node: pos[node] for node in ego_net.nodes()}

# Prepare Plotly traces
edge_x = []
edge_y = []
for edge in ego_net.edges():
    x0, y0 = sub_pos[edge[0]]
    x1, y1 = sub_pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    mode='lines')

node_x = []
node_y = []
node_color = []
for node in ego_net.nodes():
    x, y = sub_pos[node]
    node_x.append(x)
    node_y.append(y)
    # Assign colors based on node properties
    if node == 5:
        node_color.append('blue')  # Node 5 in blue
    elif node in default_nodes:
        node_color.append('red')  # Default nodes in red
    else:
        node_color.append('green')  # Non-default nodes in green

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        size=10,
        color=node_color,  # Use node-specific colors
        line=dict(width=2, color='DarkSlateGrey')))

# Create the figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='<br>Network graph focused on Node 5',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    annotations=[dict(
                        #text="Python code generated by networkx and visualized with Plotly",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002)],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

fig.show()


In [None]:
## Check if node 5 is defaulter
# Find the row corresponding to node 5
node_5_row = nodes.iloc[5]  # Assuming node 5 is at index 5, adjust as needed

# Check the value of the 'RiskPerformance' attribute for node 5
risk_performance_node_5 = node_5_row['RiskPerformance']

# Print the risk performance of node 5
print(f"Risk performance of node 5: {risk_performance_node_5}")

In [None]:
# Find all nodes included in the Plotly graph (node 5 and its connected nodes)
nodes_to_display = list(ego_net.nodes())

# Filter the DataFrame to include rows corresponding to the selected nodes
nodes_subset = nodes.loc[nodes_to_display]

# Display the subset of nodes
nodes_subset

In [None]:
nodes_subset.describe()

In [None]:
# Plot histograms for each feature
nodes_subset.hist(figsize=(12, 8))
plt.tight_layout()
plt.show()

In [None]:
# Set the desired y-axis limit
y_limit = (-5, 170)  # Adjust the limit as needed

# Plot box plots for each feature with a fixed y-axis limit
plt.figure(figsize=(12, 8))
sns.boxplot(data=nodes_subset)
plt.ylim(y_limit)  # Set the y-axis limit
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()