In [24]:
import pandas as pd 
import numpy as np 
import os 

In [25]:
io_df = pd.read_csv(r".\processed\MOSPI Matrix Final - ALL.csv", header=1)

# **Technical Coefficients**

In [34]:
# Get sector names (rows 0-130, column 1)
sector_names = io_df.iloc[0:131, 1].copy()

# Get use matrix (rows 0-130, columns 2:133) - ONLY 131 columns to match 131 rows
use_matrix = io_df.iloc[0:131, 2:133].copy()  # Changed from 2: to 2:133

# Get Total Output (row 134, columns 2:133)
total_output = io_df.iloc[134, 2:133].copy()

print(f"Use matrix shape: {use_matrix.shape}")
print(f"Total output length: {len(total_output)}")
print(f"Sector names: {len(sector_names)}")

Use matrix shape: (131, 131)
Total output length: 131
Sector names: 131


In [35]:
use_matrix

Unnamed: 0,Paddy,Wheat,Jowar,Bajra,Maize,Gram,Pulses,Sugarcane,Groundnut,Coconut,...,Education\n and research,Medical and\n Health,Legal\n services,Computer\n related,Other\n Business,Real estate\n services,Renting of\n machinery &,"Community,\n social and",Other\n services,Public\n administrati
0,1043496,12701,35,1,48,17773,54046,0,518,0,...,0,0,0,0,0,0,0,0,0,1120330
1,98107,515592,16,1,75,44,62194,0,6,0,...,0,0,0,0,0,0,0,0,0,2922124
2,40,138,9852,0,5,0,96,0,0,0,...,0,0,0,0,0,0,0,0,0,4823
3,43,150,0,1594,5,0,104,0,0,0,...,0,0,0,0,0,0,0,0,0,14984
4,93,322,1,0,8046,1,230,0,1,0,...,0,0,0,0,0,0,0,0,0,25902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,193,95,14,6,17,14,15,21,8,0,...,0,0,8335,0,0,165746,11144,38016,0,0
127,22,15,2,1,2,2,7,3,2,0,...,135,0,0,148226,484,895377,6267,37793,33936,0
128,2,8,0,0,0,0,5,0,0,0,...,11089,387,3511,2193,7597,7596,367,10980,10483,0
129,2321,1173,140,62,188,166,240,249,112,0,...,263697,26403,29744,15437,43416,5888,5353,110342,188623,219


**Calculating Individual Coefficients**

$$
a_{ij} = \frac{input_{ij}}{totaloutput_{j}}
$$

In [36]:
tech_coef = use_matrix.div(total_output, axis=1)
tech_coef = tech_coef.fillna(0)

  tech_coef = tech_coef.fillna(0)


In [37]:
# Add sector names
tech_coef.insert(0, 'sector_name', sector_names.values)

# Check stats
print("\nTechnical coefficient stats:")
display(tech_coef)

print(f"\nNegative values: {(tech_coef.iloc[:, 1:] < 0).sum().sum()}")
print(f"Values > 1: {(tech_coef.iloc[:, 1:] > 1).sum().sum()}")

# Save
tech_coef.to_csv('processed_io_data/technical_coefficients.csv', index=False)
print("\n✓ Technical coefficients recalculated and saved!")


Technical coefficient stats:


  tech_coef.insert(0, 'sector_name', sector_names.values)


Unnamed: 0,sector_name,Paddy,Wheat,Jowar,Bajra,Maize,Gram,Pulses,Sugarcane,Groundnut,...,Education\n and research,Medical and\n Health,Legal\n services,Computer\n related,Other\n Business,Real estate\n services,Renting of\n machinery &,"Community,\n social and",Other\n services,Public\n administrati
0,Paddy,4.575146e-02,8.702969e-04,2.279588e-05,6.445729e-07,1.784698e-05,5.281290e-03,8.828703e-03,0.000000e+00,1.710633e-04,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.011393
1,Wheat,4.301443e-03,3.532935e-02,1.042097e-05,6.445729e-07,2.788590e-05,1.307471e-05,1.015972e-02,0.000000e+00,1.981428e-06,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.029717
2,Jowar,1.753776e-06,9.456025e-06,6.416715e-03,0.000000e+00,1.859060e-06,0.000000e+00,1.568211e-05,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000049
3,Bajra,1.885309e-06,1.027829e-05,0.000000e+00,1.027449e-03,1.859060e-06,0.000000e+00,1.698896e-05,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000152
4,Maize,4.077530e-06,2.206406e-05,6.513109e-07,0.000000e+00,2.991600e-03,2.971524e-07,3.757173e-05,0.000000e+00,3.302380e-07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000263
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,Real estate services,8.461970e-06,6.509582e-06,9.118353e-06,3.867437e-06,6.320805e-06,4.160134e-06,2.450330e-06,2.804533e-06,2.641904e-06,...,0.000000,0.000000,0.001067,0.000000,0.000000,0.007474,0.005254,0.000880,0.000000,0.000000
127,Renting of machinery &\n equipment,9.645769e-07,1.027829e-06,1.302622e-06,6.445729e-07,7.436241e-07,5.943049e-07,1.143487e-06,4.006476e-07,6.604760e-07,...,0.000002,0.000000,0.000000,0.001981,0.000010,0.040377,0.002955,0.000875,0.002153,0.000000
128,"Community, social and\n personal services",8.768881e-08,5.481754e-07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,8.167767e-07,0.000000e+00,0.000000e+00,...,0.000164,0.000012,0.000450,0.000029,0.000155,0.000343,0.000173,0.000254,0.000665,0.000000
129,Other services,1.017629e-04,8.037621e-05,9.118353e-05,3.996352e-05,6.990066e-05,4.932731e-05,3.920528e-05,3.325375e-05,3.698666e-05,...,0.003888,0.000833,0.003809,0.000206,0.000886,0.000266,0.002524,0.002554,0.011968,0.000002



Negative values: 5
Values > 1: 0

✓ Technical coefficients recalculated and saved!


# **Calculate Liontief Inverse**
The Leontief Inverse, also known as the 'total_requirements_matrix" is a tool used in economics to analyze the interdependence of different industries. It shows the total direct and indirect production required from every sector of an economy to meet a one-unit increase in final consumer demand for a product.

*This model quantifies how a change in one industry, like a new car purchase, creates a ripple effect of production across other industries. It provides a comprehensive picture of the economic activity stimulated by a single consumer choice.* 

In [38]:
# Get the coefficients matrix, exclude the sector name 
sector_names = tech_coef['sector_name']
A = tech_coef.drop(columns=["sector_name"]).values
# Check matrix dimensions
n = A.shape[0]
print(f"Matrix Size: {n} x {n}")

Matrix Size: 131 x 131


**Calculating Liontief Inverse**
$$
L = {(I-A)}^{-1}
$$

In [39]:
# Create an Identity Matrix: 
I = np.eye(n)

# Calculate Leontief Inverse: L = (I - A)^-1
try:
    leontief_inverse = np.linalg.inv(I - A)
    print("✓ Leontief inverse calculated successfully")
except np.linalg.LinAlgError:
    print("✗ Error: Matrix is singular, using pseudo-inverse")
    leontief_inverse = np.linalg.pinv(I - A)
leontief_inverse

✓ Leontief inverse calculated successfully


array([[1.04939126e+00, 1.56177925e-03, 2.61726854e-03, ...,
        6.66558434e-04, 3.47834222e-04, 1.77376222e-02],
       [5.88938907e-03, 1.03702227e+00, 3.80611657e-03, ...,
        3.10488616e-04, 1.62368924e-04, 3.20112280e-02],
       [4.92357791e-05, 3.31767078e-05, 1.00651881e+00, ...,
        4.92844438e-05, 3.53781702e-05, 9.27657408e-04],
       ...,
       [1.64262934e-04, 8.27285231e-05, 1.24214249e-04, ...,
        1.00041143e+00, 7.94757775e-04, 1.34515781e-04],
       [3.43655748e-04, 1.87547088e-04, 2.65247585e-04, ...,
        2.80812461e-03, 1.01243452e+00, 1.98532736e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00]], shape=(131, 131))

In [40]:
# Convert to dataframe with sector names: 
leontief_df = pd.DataFrame(
    leontief_inverse, 
    index = sector_names,
    columns = sector_names
)
leontief_df = leontief_df

In [41]:
leontief_df.to_csv('processed_io_data/leontief_inverse.csv')

print(f"\n✓ Step 2 Complete!")
print(f"  Shape: {leontief_df.shape}")
print(f"  Saved: processed_io_data/leontief_inverse.csv")


✓ Step 2 Complete!
  Shape: (131, 131)
  Saved: processed_io_data/leontief_inverse.csv


# **Backward and Forward Linkages**
- *Backward Linkage* = Sum of Column in Leontief matrix 
- *Forward Linkage* = Sum of Row in Leontief Mtrix 


In [42]:
# Backward Linkage
backward_linkage = leontief_df.sum(axis=0)
# Forward Linkage
forward_linkage = leontief_df.sum(axis=1)

In [43]:
# Identify Key Sectors (both Linkages > average)
avg_backward = backward_linkage.mean()
avg_forward = forward_linkage.mean()

print(f"\nAverage backward linkage: {avg_backward:.2f}")
print(f"Average forward linkage: {avg_forward:.2f}")


Average backward linkage: 2.50
Average forward linkage: 2.50


In [44]:
nodes_df = pd.DataFrame({
    'sector_name': leontief_df.index,
    'backward_linkage': backward_linkage.values,
    'forward_linkage': forward_linkage.values,
    'is_key_sector': (backward_linkage.values > avg_backward) & (forward_linkage.values > avg_forward)
})
# Add sector_id
nodes_df.insert(0, 'sector_id', range(1, len(nodes_df) + 1))

# Save
nodes_df.to_csv('processed_io_data/production_network_nodes.csv', index=False)

In [45]:
print(f"\n✓ Step 3 Complete!")
print(f"  Total sectors: {len(nodes_df)}")
print(f"  Key sectors: {nodes_df['is_key_sector'].sum()}")

print("\nTop 5 sectors by backward linkage:")
print(nodes_df.nlargest(5, 'backward_linkage')[['sector_name', 'backward_linkage', 'forward_linkage']])

print("\nTop 5 sectors by forward linkage:")
print(nodes_df.nlargest(5, 'forward_linkage')[['sector_name', 'backward_linkage', 'forward_linkage']])


✓ Step 3 Complete!
  Total sectors: 131
  Key sectors: 11

Top 5 sectors by backward linkage:
                                        sector_name  backward_linkage  \
39                    Hydrogenated\n oil(vanaspati)          9.287575   
40               Edible oils other than\n vanaspati          8.348087   
42                    Miscellaneous food\n products          7.951248   
43  Grain Mill products, starch and starch products          7.886302   
38                                 Khandsari, boora          7.826715   

    forward_linkage  
39         1.086266  
40         1.653595  
42         1.490475  
43         1.211304  
38         1.029845  

Top 5 sectors by forward linkage:
                sector_name  backward_linkage  forward_linkage
115  Communication services          2.126229        17.277730
116                   Trade          1.859594        15.542087
63       Petroleum products          2.590902        12.329200
112           Air transport          2.614770  

# **Network Edge List**

In [70]:
# Reload use_matrix and tech_coef for edge creation
use_matrix_reload = io_df.iloc[0:131, 2:133].apply(pd.to_numeric, errors='coerce').fillna(0)

# Step 4 FIX: Create Network Edge List with correct column mapping

print("Tech coef columns (first 10):")
print(tech_coef.columns[1:11].tolist())

print("\nSector names (first 10):")
print(sector_names.head(10).tolist())

# Rename tech_coef columns to match sector names
tech_coef_fixed = tech_coef.copy()
tech_coef_fixed.columns = ['sector_name'] + sector_names.tolist()

Tech coef columns (first 10):
['Paddy', 'Wheat', 'Jowar', 'Bajra', 'Maize', 'Gram', 'Pulses', 'Sugarcane', 'Groundnut', 'Coconut']

Sector names (first 10):
['Paddy', 'Wheat', 'Jowar', 'Bajra', 'Maize', 'Gram', 'Pulses', 'Sugarcane', 'Groundnut', 'Coconut']


In [71]:
# Now rebuild edges
edges = []

for i in range(len(tech_coef_fixed)):
    source_sector = tech_coef_fixed.iloc[i]['sector_name']
    
    for j, target_sector in enumerate(sector_names):
        coefficient = tech_coef_fixed.iloc[i, j + 1]
        
        if coefficient > 0.001:  # Only significant flows
            edges.append({
                'source_sector': source_sector,
                'target_sector': target_sector,
                'input_coefficient': coefficient,
                'input_value': use_matrix_reload.iloc[i, j]
            })

edges_df = pd.DataFrame(edges)
edges_df.to_csv('processed_io_data/production_network_edges.csv', index=False)

print(f"\n✓ Step 4 FIXED!")
print(f"  Created {len(edges_df):,} network edges")

# Check targets now
print(f"\nUnique target sectors: {edges_df['target_sector'].nunique()}")
print("\nSample edges:")
display(edges_df.head(10))


✓ Step 4 FIXED!
  Created 3,401 network edges

Unique target sectors: 131

Sample edges:


Unnamed: 0,source_sector,target_sector,input_coefficient,input_value
0,Paddy,Paddy,0.045751,1043496.0
1,Paddy,Gram,0.005281,17773.0
2,Paddy,Pulses,0.008829,54046.0
3,Paddy,Other crops,0.007219,264143.0
4,Paddy,Poultry & Eggs,0.082899,857493.0
5,Paddy,Edible oils other than\n vanaspati,0.013877,98591.0
6,Paddy,Miscellaneous food\n products,0.01734,731096.0
7,Paddy,"Grain Mill products, starch and starch products",0.477925,13269746.0
8,Paddy,Beverages,0.004787,32284.0
9,Paddy,Other chemicals,0.002052,23300.0


# **Calculate Network Centrality Metrics**

In [72]:
import networkx as nx 

# Create directed graph from edges
G = nx.from_pandas_edgelist(
    edges_df,
    source='source_sector',
    target='target_sector',
    edge_attr='input_coefficient',
    create_using=nx.DiGraph()
)

print(f"Network created:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Density: {nx.density(G):.4f}")

Network created:
  Nodes: 131
  Edges: 3401
  Density: 0.1997


In [73]:
# Calculate centrality measures
print("\nCalculating centrality metrics...")

degree_centrality = nx.degree_centrality(G)
print("  ✓ Degree centrality")

betweenness = nx.betweenness_centrality(G)
print("  ✓ Betweenness centrality")

closeness = nx.closeness_centrality(G)
print("  ✓ Closeness centrality")

eigenvector = nx.eigenvector_centrality(G, max_iter=1000)
print("  ✓ Eigenvector centrality")

pagerank = nx.pagerank(G)
print("  ✓ PageRank")


Calculating centrality metrics...
  ✓ Degree centrality
  ✓ Betweenness centrality
  ✓ Closeness centrality
  ✓ Eigenvector centrality
  ✓ PageRank


In [74]:
# Combine with nodes dataframe
network_metrics = nodes_df.copy()
network_metrics['degree_centrality'] = network_metrics['sector_name'].map(degree_centrality)
network_metrics['betweenness_centrality'] = network_metrics['sector_name'].map(betweenness)
network_metrics['closeness_centrality'] = network_metrics['sector_name'].map(closeness)
network_metrics['eigenvector_centrality'] = network_metrics['sector_name'].map(eigenvector)
network_metrics['pagerank'] = network_metrics['sector_name'].map(pagerank)

# Fill NaN with 0 for any isolated nodes
network_metrics = network_metrics.fillna(0)

# Save
network_metrics.to_csv('processed_io_data/network_metrics.csv', index=False)

print(f"\n✓ Step 5 Complete!")
print(f"  Saved: network_metrics.csv")


✓ Step 5 Complete!
  Saved: network_metrics.csv


In [78]:
# Display top sectors by different metrics
print("\n" + "="*70)
print("TOP 5 SECTORS BY PAGERANK:")
print("="*70)
display(network_metrics.nlargest(5, 'pagerank')[['sector_name', 'pagerank', 'degree_centrality', 'betweenness_centrality']])

print("\n" + "="*70)
print("TOP 5 SECTORS BY BETWEENNESS (Bridge Sectors):")
print("="*70)
display(network_metrics.nlargest(5, 'betweenness_centrality')[['sector_name', 'betweenness_centrality', 'degree_centrality', 'pagerank']])

print("\n" + "="*70)
print("TOP 5 KEY SECTORS (High Backward + Forward Linkages):")
print("="*70)
key_sectors = network_metrics[network_metrics['is_key_sector']].copy()
if len(key_sectors) > 0:
    display(key_sectors.nlargest(5, 'pagerank')[['sector_name', 'backward_linkage', 'forward_linkage', 'pagerank', 'betweenness_centrality']])
else:
    print("No key sectors identified")

print("\n" + "="*70)
print("NETWORK SUMMARY:")
print("="*70)
print(f"Total sectors: {len(network_metrics)}")
print(f"Key sectors: {network_metrics['is_key_sector'].sum()}")
print(f"Average PageRank: {network_metrics['pagerank'].mean():.6f}")
print(f"Average Betweenness: {network_metrics['betweenness_centrality'].mean():.6f}")
print(f"Sectors with Betweenness > 0: {(network_metrics['betweenness_centrality'] > 0).sum()}")


TOP 5 SECTORS BY PAGERANK:


Unnamed: 0,sector_name,pagerank,degree_centrality,betweenness_centrality
100,"Bicycles, cycle-rickshaw",0.049882,0.276923,0.0
45,Tobacco Products,0.045294,0.2,0.0
130,Public administration and\n defence,0.017119,0.207692,0.0
48,Woolen textiles,0.016054,0.269231,0.007998
42,Miscellaneous food\n products,0.015866,0.523077,0.035073



TOP 5 SECTORS BY BETWEENNESS (Bridge Sectors):


Unnamed: 0,sector_name,betweenness_centrality,degree_centrality,pagerank
116,Trade,0.135703,1.438462,0.014298
106,Construction and\n construction services,0.048364,1.015385,0.010264
42,Miscellaneous food\n products,0.035073,0.523077,0.015866
72,Other chemicals,0.034482,0.9,0.015045
117,Hotels & Restaurant,0.032069,0.461538,0.012479



TOP 5 KEY SECTORS (High Backward + Forward Linkages):


Unnamed: 0,sector_name,backward_linkage,forward_linkage,pagerank,betweenness_centrality
72,Other chemicals,2.737014,3.863796,0.015045,0.034482
71,"Synthetic fibers, resin",3.052159,2.61391,0.012787,0.020447
66,Organic heavy chemicals,2.819236,7.996421,0.011502,0.020244
65,Inorganic heavy chemicals,3.017084,5.864812,0.010462,0.021108
79,Iron and steel foundries,2.590097,3.839517,0.009427,0.008199



NETWORK SUMMARY:
Total sectors: 131
Key sectors: 11
Average PageRank: 0.007634
Average Betweenness: 0.007129
Sectors with Betweenness > 0: 126


In [66]:
# Check network statistics
print(f"Total sectors: {len(sector_names)}")
print(f"Total possible edges: {len(sector_names) * len(sector_names)}")
print(f"Actual edges: {len(edges_df)}")
print(f"Network density: {len(edges_df) / (len(sector_names)**2) * 100:.2f}%")

# Check threshold effect
print("\nEdges by coefficient threshold:")
for threshold in [0.0001, 0.001, 0.01, 0.05]:
    count = (tech_coef_fixed.iloc[:, 1:] > threshold).sum().sum()
    print(f"  > {threshold}: {count:,} edges")

Total sectors: 131
Total possible edges: 17161
Actual edges: 3401
Network density: 19.82%

Edges by coefficient threshold:
  > 0.0001: 5,484 edges
  > 0.001: 3,401 edges
  > 0.01: 1,308 edges
  > 0.05: 348 edges


# **Summary**

In [79]:
# ====================================================================
# FINAL SUMMARY: INPUT-OUTPUT TABLE PROCESSING COMPLETE
# ====================================================================

import os

print("\n" + "="*70)
print("INPUT-OUTPUT TABLE PROCESSING COMPLETE!")
print("="*70)

files = {
    'technical_coefficients.csv': 'Technical coefficients matrix (131×131)',
    'leontief_inverse.csv': 'Leontief inverse matrix',
    'production_network_nodes.csv': 'Sector linkages and key sectors',
    'production_network_edges.csv': 'Network edges (input flows)',
    'network_metrics.csv': 'Complete network centrality metrics'
}

print("\nGenerated files in 'processed_io_data/':\n")
total_size = 0
for filename, description in files.items():
    path = f'processed_io_data/{filename}'
    if os.path.exists(path):
        size_kb = os.path.getsize(path) / 1024
        total_size += size_kb
        print(f"  ✓ {filename:40} {size_kb:>8.1f} KB")
        print(f"    → {description}")
    else:
        print(f"  ✗ {filename:40} MISSING")

print(f"\n  Total size: {total_size:.1f} KB")

print("\n" + "="*70)
print("KEY FINDINGS:")
print("="*70)
print(f"  • {len(network_metrics)} sectors analyzed")
print(f"  • {network_metrics['is_key_sector'].sum()} key sectors identified")
print(f"  • {len(edges_df):,} inter-sectoral linkages mapped")
print(f"  • Network density: 19.82%")
print(f"  • Most central sector: {network_metrics.nlargest(1, 'pagerank')['sector_name'].values[0]}")
print(f"  • Top bridge sector: Trade (betweenness: 0.136)")

print("\n" + "="*70)
print("READY FOR ANALYSIS!")
print("="*70)
print("\nNext steps:")
print("  1. Merge commodity prices with network metrics")
print("  2. Build master dataset with all variables")
print("  3. Run causal analysis & ML models")


INPUT-OUTPUT TABLE PROCESSING COMPLETE!

Generated files in 'processed_io_data/':

  ✓ technical_coefficients.csv                  225.1 KB
    → Technical coefficients matrix (131×131)
  ✓ leontief_inverse.csv                        363.2 KB
    → Leontief inverse matrix
  ✓ production_network_nodes.csv                  8.8 KB
    → Sector linkages and key sectors
  ✓ production_network_edges.csv                253.4 KB
    → Network edges (input flows)
  ✓ network_metrics.csv                          21.6 KB
    → Complete network centrality metrics

  Total size: 872.2 KB

KEY FINDINGS:
  • 131 sectors analyzed
  • 11 key sectors identified
  • 3,401 inter-sectoral linkages mapped
  • Network density: 19.82%
  • Most central sector: Bicycles, cycle-rickshaw
  • Top bridge sector: Trade (betweenness: 0.136)

READY FOR ANALYSIS!

Next steps:
  1. Merge commodity prices with network metrics
  2. Build master dataset with all variables
  3. Run causal analysis & ML models
