# Nifty Network Analysis - Prototype

This notebook demonstrates network analysis on stock market data from the NSE Nifty 500 index. We construct a correlation network from stock price movements and analyze its topology to identify scale-free characteristics.

## Setup


In [1]:
import niftynet as nn
import numpy as np
import pandas as pd
import networkx as nx
from plotly import graph_objects as go

### Configure Analysis Parameters

Define the time range and feature to analyze for the stock data.

In [2]:
START = "2022-04-01"
END = "2025-04-01"
FEATURE_COLUMN = "Close"

### Fetch Index Data

Retrieve the list of companies from the NSE archives.

In [3]:
# Fetch and catch all companies data
index_df = nn.data.fetch_index(
    url = "https://nsearchives.nseindia.com/content/equities/EQUITY_L.csv",
    ticker_column = "SYMBOL",
)

index_df.head(5)

Unnamed: 0,Symbol,NAME OF COMPANY,SERIES,DATE OF LISTING,PAID UP VALUE,MARKET LOT,ISIN NUMBER,FACE VALUE
0,20MICRONS,20 Microns Limited,EQ,06-OCT-2008,5,1,INE144J01027,5
1,21STCENMGM,21st Century Management Services Limited,EQ,03-MAY-1995,10,1,INE253B01015,10
2,360ONE,360 ONE WAM LIMITED,EQ,19-SEP-2019,1,1,INE466L01038,1
3,3IINFOLTD,3i Infotech Limited,EQ,22-OCT-2021,10,1,INE748C01038,10
4,3MINDIA,3M India Limited,EQ,13-AUG-2004,10,1,INE470A01017,10


### Fetch Stock Price Data

Download historical closing prices for all companies in the index for the specified date range.

In [4]:
# Fetch and catch stock prices for all nifty 500 companies and cache them in user data
stock_prices = nn.data.fetch_stock_data(
    start_date=START,
    end_date=END,
    column="Close"
)

stock_prices.head(5)

Unnamed: 0_level_0,20MICRONS.NS,21STCENMGM.NS,360ONE.NS,3IINFOLTD.NS,3MINDIA.NS,3PLAND.NS,5PAISA.NS,63MOONS.NS,A2ZINFRA.NS,AAATECH.NS,...,ZENTEC.NS,ZFCVINDIA.NS,ZIMLAB.NS,ZODIAC.NS,ZODIACLOTH.NS,ZOTA.NS,ZUARI.NS,ZUARIIND.NS,ZYDUSLIFE.NS,ZYDUSWELL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-01,79.83754,28.229397,387.545227,53.799999,18554.824219,15.15,369.5,265.963104,10.1,44.370197,...,201.103104,7577.655273,,87.435066,104.699997,316.900177,138.449997,176.822998,343.822418,308.238831
2022-04-04,83.229652,28.229397,386.734863,56.450001,18968.603516,15.85,378.950012,276.177277,10.6,45.656288,...,206.687927,7646.504395,,88.782524,107.900002,324.030487,166.100006,198.907455,352.553772,311.20343
2022-04-05,84.311188,28.762028,383.756256,59.25,19773.583984,16.5,376.700012,289.944183,11.1,45.656288,...,204.643494,7893.84668,,89.231682,108.5,319.227417,173.800003,196.600861,354.542664,312.39917
2022-04-06,86.03183,29.246237,398.033264,62.200001,19993.988281,17.299999,374.049988,304.401947,11.5,45.656288,...,203.596329,7891.757324,,93.673302,110.449997,320.267273,174.75,195.423035,358.471741,319.059601
2022-04-07,88.391556,29.004133,401.822174,67.550003,19606.037109,18.25,371.299988,303.612427,11.95,45.656288,...,213.569214,7860.964355,,98.314545,108.75,305.709656,167.949997,191.398743,358.132172,314.958588


### Data Quality Check

Remove stocks with consecutive missing values to ensure data quality for correlation calculations.

In [5]:
# remove columns that have NaN on two consecutive rows

consec_nan = (stock_prices.isna() & stock_prices.isna().shift(1)).any(axis=0)
stock_prices = stock_prices.loc[:, ~consec_nan]

stock_prices

Unnamed: 0_level_0,20MICRONS.NS,21STCENMGM.NS,360ONE.NS,3IINFOLTD.NS,3MINDIA.NS,3PLAND.NS,5PAISA.NS,63MOONS.NS,A2ZINFRA.NS,AAATECH.NS,...,ZENITHSTL.NS,ZENSARTECH.NS,ZENTEC.NS,ZFCVINDIA.NS,ZODIACLOTH.NS,ZOTA.NS,ZUARI.NS,ZUARIIND.NS,ZYDUSLIFE.NS,ZYDUSWELL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-01,79.837540,28.229397,387.545227,53.799999,18554.824219,15.150000,369.500000,265.963104,10.100000,44.370197,...,1.75,356.796722,201.103104,7577.655273,104.699997,316.900177,138.449997,176.822998,343.822418,308.238831
2022-04-04,83.229652,28.229397,386.734863,56.450001,18968.603516,15.850000,378.950012,276.177277,10.600000,45.656288,...,1.80,362.376831,206.687927,7646.504395,107.900002,324.030487,166.100006,198.907455,352.553772,311.203430
2022-04-05,84.311188,28.762028,383.756256,59.250000,19773.583984,16.500000,376.700012,289.944183,11.100000,45.656288,...,1.85,365.734406,204.643494,7893.846680,108.500000,319.227417,173.800003,196.600861,354.542664,312.399170
2022-04-06,86.031830,29.246237,398.033264,62.200001,19993.988281,17.299999,374.049988,304.401947,11.500000,45.656288,...,1.90,362.897003,203.596329,7891.757324,110.449997,320.267273,174.750000,195.423035,358.471741,319.059601
2022-04-07,88.391556,29.004133,401.822174,67.550003,19606.037109,18.250000,371.299988,303.612427,11.950000,45.656288,...,1.95,363.180756,213.569214,7860.964355,108.750000,305.709656,167.949997,191.398743,358.132172,314.958588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-24,203.281937,64.769997,967.858887,23.719999,28048.984375,40.369999,393.000000,824.934814,16.209999,81.635117,...,7.01,688.238770,1519.958374,12032.494141,93.150002,806.791382,193.979996,252.946686,913.238586,335.028717
2025-03-25,201.350647,66.059998,966.919373,22.500000,27984.884766,41.250000,378.450012,827.880981,15.460000,77.378792,...,6.87,706.449036,1465.385254,11980.119141,93.290001,803.793457,186.309998,244.265640,897.073669,332.725586
2025-03-26,194.282578,67.370003,948.971436,21.670000,27476.615234,39.410000,366.500000,819.541748,14.850000,74.730843,...,6.46,693.124451,1440.819946,12671.083984,90.339996,804.243164,182.369995,239.730759,897.222046,322.645721
2025-03-27,191.554886,68.040001,960.590698,21.980000,27420.873047,38.000000,373.600006,829.628723,14.100000,71.151222,...,6.24,679.306396,1455.149658,12636.932617,84.709999,811.388245,184.910004,234.488235,880.661743,337.112488


### Calculate Correlation Matrix

Compute the Pearson correlation coefficients between all pairs of stocks to identify relationships in price movements.

# Constructing the Network

## Correlation Matrix

In [6]:
corr_mat = nn.graph.return_correlation_matrix(price_data=stock_prices)

corr_mat

Unnamed: 0,20MICRONS.NS,21STCENMGM.NS,360ONE.NS,3IINFOLTD.NS,3MINDIA.NS,3PLAND.NS,5PAISA.NS,63MOONS.NS,A2ZINFRA.NS,AAATECH.NS,...,ZENITHSTL.NS,ZENSARTECH.NS,ZENTEC.NS,ZFCVINDIA.NS,ZODIACLOTH.NS,ZOTA.NS,ZUARI.NS,ZUARIIND.NS,ZYDUSLIFE.NS,ZYDUSWELL.NS
20MICRONS.NS,1.000000,0.105878,0.160837,0.257864,0.123279,0.180876,0.222435,0.161437,0.177369,0.232494,...,0.118122,0.214476,0.222738,0.089103,0.240157,0.230273,0.264915,0.282933,0.174524,0.179393
21STCENMGM.NS,0.105878,1.000000,0.068376,0.066487,0.010294,0.045988,0.036599,0.104943,0.053897,0.054572,...,0.052976,0.027655,0.060868,0.090163,0.050555,0.068983,0.082858,0.143728,0.065738,0.039057
360ONE.NS,0.160837,0.068376,1.000000,0.178522,0.114812,0.127147,0.157618,0.206344,0.138506,0.112942,...,0.088536,0.152735,0.238372,0.026992,0.112344,0.122784,0.158415,0.227684,0.070033,0.173506
3IINFOLTD.NS,0.257864,0.066487,0.178522,1.000000,0.135449,0.154288,0.313303,0.230050,0.156580,0.230009,...,0.185775,0.289864,0.228887,0.129892,0.242373,0.210568,0.337105,0.353974,0.197061,0.216526
3MINDIA.NS,0.123279,0.010294,0.114812,0.135449,1.000000,0.073565,0.140870,0.124720,0.146946,0.118606,...,0.065209,0.180239,0.148242,0.154816,0.201648,0.096671,0.114740,0.150749,0.101287,0.118651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZOTA.NS,0.230273,0.068983,0.122784,0.210568,0.096671,0.084644,0.172249,0.223893,0.160920,0.153297,...,0.070294,0.167863,0.260242,0.088935,0.200646,1.000000,0.256534,0.260719,0.176819,0.134281
ZUARI.NS,0.264915,0.082858,0.158415,0.337105,0.114740,0.126566,0.273876,0.287595,0.193714,0.196275,...,0.123483,0.234590,0.263231,0.141282,0.292698,0.256534,1.000000,0.615109,0.205451,0.231867
ZUARIIND.NS,0.282933,0.143728,0.227684,0.353974,0.150749,0.107418,0.261712,0.188250,0.160544,0.227948,...,0.116117,0.276286,0.250071,0.127177,0.280398,0.260719,0.615109,1.000000,0.212461,0.208190
ZYDUSLIFE.NS,0.174524,0.065738,0.070033,0.197061,0.101287,0.109939,0.206242,0.178693,0.067284,0.115485,...,0.045434,0.216654,0.137848,0.099215,0.169548,0.176819,0.205451,0.212461,1.000000,0.245257


### Set Threshold Range

Define the range of correlation thresholds to test for network construction.

## Thresholding for graph creation

In [21]:
INIT_THRESHOLD = 0.2
LAST_THRESHOLD = 1
STEP_THRESHOLD = 0.1

### Build Correlation Networks

Create network graphs for each threshold value. Two stocks are connected if their correlation exceeds the threshold.

In [22]:
g = []

for i in np.arange(INIT_THRESHOLD, LAST_THRESHOLD, STEP_THRESHOLD):
    G = nn.graph.build_correlation_graph(
        corr_mat,
        threshold=i,
    )
    g.append(G)

### Network Statistics

Display key metrics for each network: number of nodes, edges, and average degree.

In [27]:
for i, gi in enumerate(g):
    N = gi.number_of_nodes()
    L = gi.number_of_edges()
    K = sum(dict(gi.degree()).values()) / N

    print(f"Threshold: {INIT_THRESHOLD + i * STEP_THRESHOLD:.1f}")
    print(f"Number of nodes N: {N}")
    print(f"Number of connections L: {L}")
    print(f"Average degree K: {K:.3f}")
    print("-----")

Threshold: 0.2
Number of nodes N: 1672
Number of connections L: 503297
Average degree K: 602.030
-----
Threshold: 0.3
Number of nodes N: 1672
Number of connections L: 86510
Average degree K: 103.481
-----
Threshold: 0.4
Number of nodes N: 1672
Number of connections L: 6549
Average degree K: 7.834
-----
Threshold: 0.5
Number of nodes N: 1672
Number of connections L: 863
Average degree K: 1.032
-----
Threshold: 0.6
Number of nodes N: 1672
Number of connections L: 246
Average degree K: 0.294
-----
Threshold: 0.7
Number of nodes N: 1672
Number of connections L: 65
Average degree K: 0.078
-----
Threshold: 0.8
Number of nodes N: 1672
Number of connections L: 12
Average degree K: 0.014
-----
Threshold: 0.9
Number of nodes N: 1672
Number of connections L: 1
Average degree K: 0.001
-----


### Power Law Analysis

Fit power-law distributions to the degree distributions of each network and visualize them on log-log plots. The R² value indicates how well each network follows a power-law distribution.

In [None]:
# Create a grid of subplots for all graphs
num_graphs = len(g)
num_cols = 2
num_rows = (num_graphs + num_cols - 1) // num_cols

fig_grid = go.Figure()
fig_grid = fig_grid.set_subplots(
    rows=num_rows,
    cols=num_cols,
    subplot_titles=[f"Threshold={INIT_THRESHOLD + i * STEP_THRESHOLD:.1f}" for i in range(num_graphs)],
    specs=[[{"secondary_y": False} for _ in range(num_cols)] for _ in range(num_rows)]
)

# Update axes to log scale
for i in range(1, num_graphs + 1):
    fig_grid.update_xaxes(type="log", row=(i - 1) // num_cols + 1, col=(i - 1) % num_cols + 1)
    fig_grid.update_yaxes(type="log", row=(i - 1) // num_cols + 1, col=(i - 1) % num_cols + 1)

# Generate plots for each graph
for idx, gi in enumerate(g):
    row = idx // num_cols + 1
    col = idx % num_cols + 1

    degrees, counts = nn.graph.degree_distribution(gi)
    model, error = nn.metrics.log_linear_fitting(degrees, counts)

    print(f"Threshold: {INIT_THRESHOLD + idx * STEP_THRESHOLD:.1f}, R²: {error:.4f}")

    fig_grid = nn.visuals.plot_degree_distribution(degrees, counts, model, fig_grid, row, col)

# Update layout
fig_grid.update_layout(height=800, showlegend=False)
fig_grid.show()

Threshold: 0.2, R²: 0.0430
Threshold: 0.3, R²: 0.5739
Threshold: 0.4, R²: 0.7981
Threshold: 0.5, R²: 0.8407
Threshold: 0.6, R²: 0.9088
Threshold: 0.7, R²: 0.8237
Threshold: 0.8, R²: 1.0000
Threshold: 0.9, R²: nan


### Final Verdict

I will choose threshold 0.4 as it provides a good balance between network connectivity and sparsity, while also exhibiting a degree distribution that closely follows a power-law, indicating a scale-free structure.

## Network Analysis

In [55]:
# We finally build the graph with threshold 0.4
G = nn.graph.build_correlation_graph(
    corr_mat,
    threshold=0.4
)

### Compute Centrality Measures

Calculate various centrality metrics to identify the most important nodes in the network. These metrics reveal which stocks have the highest influence or connectivity.

In [56]:
# Compute all centrality measures
centralities = nn.metrics.compute_all_centralities(G)
centralities.head(10)

Unnamed: 0_level_0,degree,betweenness,closeness,eigenvector,pagerank
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
20MICRONS.NS,0.0,0.0,,2.664955e-20,0.000194
21STCENMGM.NS,0.0,0.0,,2.664955e-20,0.000194
360ONE.NS,0.0,0.0,,2.664955e-20,0.000194
3IINFOLTD.NS,0.003591,1.2e-05,0.385517,0.006810733,0.000526
3MINDIA.NS,0.0,0.0,,2.664955e-20,0.000194
3PLAND.NS,0.0,0.0,,2.664955e-20,0.000194
5PAISA.NS,0.0,0.0,,2.664955e-20,0.000194
63MOONS.NS,0.0,0.0,,2.664955e-20,0.000194
A2ZINFRA.NS,0.0,0.0,,2.664955e-20,0.000194
AAATECH.NS,0.0,0.0,,2.664955e-20,0.000194


### Top Nodes by Degree Centrality

Display the most connected stocks in the network.

In [57]:
# Get top 15 nodes by degree centrality
degree_centrality = nn.metrics.compute_degree_centrality(G)
top_degree = nn.metrics.get_top_nodes(degree_centrality, top_n=15)
print("Top 15 nodes by Degree Centrality:")
print(top_degree)

# Visualize as bar chart
fig_degree = nn.visuals.create_centrality_bar_chart(
    pd.Series(degree_centrality),
    title="Top 15 Stocks by Degree Centrality",
    top_n=15
)
fig_degree.show()

Top 15 nodes by Degree Centrality:
TATAPOWER.NS     0.144823
HFCL.NS          0.119090
SAIL.NS          0.106523
ANDHRSUGAR.NS    0.103531
RCF.NS           0.102334
BHEL.NS          0.097546
CANBK.NS         0.097546
IRCTC.NS         0.095751
HINDCOPPER.NS    0.094554
DLF.NS           0.087373
HEMIPROP.NS      0.085577
GIPCL.NS         0.081987
LTF.NS           0.077199
BEL.NS           0.076601
NATIONALUM.NS    0.075404
dtype: float64


### Community Detection

Identify clusters of stocks that move together. Communities represent groups of highly correlated companies.

In [58]:
# Detect communities using greedy modularity optimization
communities = nn.metrics.compute_community_structure(G, algorithm="greedy")

# Create a mapping of community to nodes
community_map = {}
for node, comm_id in communities.items():
    if comm_id not in community_map:
        community_map[comm_id] = []
    community_map[comm_id].append(node)

print(f"Number of communities detected: {len(community_map)}")
print("\nCommunity sizes:")
for comm_id, nodes in sorted(community_map.items(), key=lambda x: len(x[1]), reverse=True):
    print(f"Community {comm_id}: {len(nodes)} nodes")
    if len(nodes) <= 10:
        print(f"  Nodes: {nodes}")
    else:
        print(f"  Sample nodes: {nodes[:5]}...")

Number of communities detected: 1091

Community sizes:
Community 0: 256 nodes
  Sample nodes: ['KELLTONTEC.NS', 'DEEPAKFERT.NS', 'SNOWMAN.NS', 'GICHSGFIN.NS', 'RAJSREESUG.NS']...
Community 1: 211 nodes
  Sample nodes: ['COCHINSHIP.NS', 'GMRAIRPORT.NS', 'CUB.NS', 'GOACARBON.NS', 'GRAPHITE.NS']...
Community 2: 19 nodes
  Sample nodes: ['LTTS.NS', 'BSOFT.NS', 'WIPRO.NS', 'LTIM.NS', 'MASTEK.NS']...
Community 3: 14 nodes
  Sample nodes: ['VHL.NS', 'PILANIINVS.NS', 'CAMS.NS', 'NSIL.NS', 'CDSL.NS']...
Community 4: 13 nodes
  Sample nodes: ['SHREYANIND.NS', 'PDMJEPAPER.NS', 'ORISSAMINE.NS', 'ANDHRAPAP.NS', 'SATIA.NS']...
Community 5: 12 nodes
  Sample nodes: ['GRANULES.NS', 'CIPLA.NS', 'DIVISLAB.NS', 'BIOCON.NS', 'GLENMARK.NS']...
Community 6: 8 nodes
  Nodes: ['COROMANDEL.NS', 'PIIND.NS', 'NAVINFLUOR.NS', 'DEEPAKNTR.NS', 'ALKYLAMINE.NS', 'SRF.NS', 'BALAMINES.NS', 'ATUL.NS']
Community 7: 8 nodes
  Nodes: ['NESTLEIND.NS', 'HINDUNILVR.NS', 'DABUR.NS', 'GODREJCP.NS', 'TATACONSUM.NS', 'BRITANNIA.N

### Network Visualization

Create an interactive visualization of the correlation network with nodes colored by their community membership and sized by degree centrality.

In [59]:
# Convert community assignments to numeric values for coloring
community_colors = {node: float(comm_id) for node, comm_id in communities.items()}

# Visualize the network
fig_network = nn.visuals.create_network_plot(
    G,
    node_colors=community_colors,
    node_size_metric=degree_centrality,
    layout="spring",
    title="Stock Correlation Network (Threshold=0.4)"
)
fig_network.show()

### Clustering Coefficient

Analyze local clustering to understand how tightly-knit neighborhoods are in the network.

In [60]:
# Compute clustering coefficient
clustering_coeff = nn.metrics.compute_clustering_coefficient(G)

# Summary statistics
clustering_values = list(clustering_coeff.values())
print("Clustering Coefficient Statistics:")
print(f"Mean: {np.mean(clustering_values):.4f}")
print(f"Median: {np.median(clustering_values):.4f}")
print(f"Std Dev: {np.std(clustering_values):.4f}")
print(f"Min: {np.min(clustering_values):.4f}")
print(f"Max: {np.max(clustering_values):.4f}")
print(f"\nGlobal Clustering Coefficient: {nx.average_clustering(G):.4f}")

Clustering Coefficient Statistics:
Mean: 0.2067
Median: 0.0000
Std Dev: 0.3442
Min: 0.0000
Max: 1.0000

Global Clustering Coefficient: 0.2067


### Betweenness Centrality Analysis

Identify stocks that act as bridges between different parts of the network.

In [61]:
# Compute betweenness centrality
betweenness_centrality = nn.metrics.compute_betweenness_centrality(G)
top_betweenness = nn.metrics.get_top_nodes(betweenness_centrality, top_n=15)

print("Top 15 nodes by Betweenness Centrality (Bridge nodes):")
print(top_betweenness)

# Visualize as bar chart
fig_betweenness = nn.visuals.create_centrality_bar_chart(
    pd.Series(betweenness_centrality),
    title="Top 15 Stocks by Betweenness Centrality",
    top_n=15
)
fig_betweenness.show()

Top 15 nodes by Betweenness Centrality (Bridge nodes):
TATAPOWER.NS     0.014344
HFCL.NS          0.010308
ANDHRSUGAR.NS    0.009054
DLF.NS           0.008307
LTF.NS           0.007425
RCF.NS           0.005777
BHEL.NS          0.004569
CANBK.NS         0.004074
SAIL.NS          0.003985
SUTLEJTEX.NS     0.003761
HEMIPROP.NS      0.003684
DEN.NS           0.003615
GIPCL.NS         0.003436
IRCTC.NS         0.003382
RIIL.NS          0.003373
dtype: float64


### PageRank Analysis

Calculate the influence of each stock based on its position and connections in the network.

In [62]:
# Compute PageRank
pagerank = nn.metrics.compute_pagerank(G)
top_pagerank = nn.metrics.get_top_nodes(pagerank, top_n=15)

print("Top 15 nodes by PageRank:")
print(top_pagerank)

# Visualize as bar chart
fig_pagerank = nn.visuals.create_centrality_bar_chart(
    pd.Series(pagerank),
    title="Top 15 Stocks by PageRank",
    top_n=15
)
fig_pagerank.show()

Top 15 nodes by PageRank:
TATAPOWER.NS     0.012952
HFCL.NS          0.010910
ANDHRSUGAR.NS    0.009205
RCF.NS           0.008305
SAIL.NS          0.007968
BHEL.NS          0.007615
CANBK.NS         0.007414
IRCTC.NS         0.007339
DLF.NS           0.007284
HINDCOPPER.NS    0.007119
HEMIPROP.NS      0.006783
LTF.NS           0.006631
GIPCL.NS         0.006600
RIIL.NS          0.006077
BEL.NS           0.005773
dtype: float64


## Summary Statistics

Display key network properties and insights.

In [63]:
import math

# Calculate network statistics
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
avg_degree = 2 * num_edges / num_nodes
density = nx.density(G)
avg_clustering = nx.average_clustering(G)

# Connected components
num_components = nx.number_connected_components(G)
if nx.is_connected(G):
    diameter = nx.diameter(G)
    avg_shortest_path = nx.average_shortest_path_length(G)
else:
    largest_cc = max(nx.connected_components(G), key=len)
    subgraph = G.subgraph(largest_cc)
    diameter = nx.diameter(subgraph)
    avg_shortest_path = nx.average_shortest_path_length(subgraph)
    num_nodes_largest = len(largest_cc)

print("=" * 60)
print("NETWORK ANALYSIS SUMMARY")
print("=" * 60)
print(f"\nBasic Properties:")
print(f"  Number of nodes (stocks): {num_nodes}")
print(f"  Number of edges (correlations): {num_edges}")
print(f"  Network density: {density:.4f}")
print(f"  Average degree: {avg_degree:.2f}")

print(f"\nTopological Properties:")
print(f"  Number of connected components: {num_components}")
if not nx.is_connected(G):
    print(f"  Size of largest component: {num_nodes_largest} nodes ({100*num_nodes_largest/num_nodes:.1f}%)")
print(f"  Network diameter: {diameter}")
print(f"  Average shortest path length: {avg_shortest_path:.2f}")

print(f"\nClustering:")
print(f"  Average clustering coefficient: {avg_clustering:.4f}")
print(f"  Number of communities: {len(community_map)}")

print(f"\nCentrality Summary:")
print(f"  Highest degree centrality: {max(degree_centrality.values()):.4f}")
print(f"  Highest betweenness centrality: {max(betweenness_centrality.values()):.4f}")
print(f"  Highest PageRank: {max(pagerank.values()):.6f}")

print("=" * 60)

NETWORK ANALYSIS SUMMARY

Basic Properties:
  Number of nodes (stocks): 1672
  Number of edges (correlations): 6549
  Network density: 0.0047
  Average degree: 7.83

Topological Properties:
  Number of connected components: 1074
  Size of largest component: 560 nodes (33.5%)
  Network diameter: 8
  Average shortest path length: 2.70

Clustering:
  Average clustering coefficient: 0.2067
  Number of communities: 1091

Centrality Summary:
  Highest degree centrality: 0.1448
  Highest betweenness centrality: 0.0143
  Highest PageRank: 0.012952
