# **Causal Analysis: Instrumental Variables, Synthetic Control and VAR**

This notebook establishes causal relationships between commodity price shocks and sectoral IIP using three Complementary Methods

In [4]:
# Importing All Libraries
import pandas as pd 
import numpy as np 
import statsmodels as st 
import scipy.stats as stp
# from linearmodels.iv.IV2SLS
import matplotlib.pyplot as plt 
import seaborn as sns 
import warnings 
import os 
import datetime


np.random.seed(42)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
plt.style.use('seaborn-v0_8')
warnings.filterwarnings('ignore')


In [8]:
# All File Paths
root_path = r"../"
processed_data_path = "../data/processed/"
io_data_path = "../data/processed_io_data/"
output_sprint2= "./sprint2_outputs/"
figures_path = "./spirnt2_outputs/figures/"

# Creating Output directories: 
os.makedirs(figures_path, exist_ok=True)
print("Output Directories Intiated")

Output Directories Intiated


**The Master Dataset**

In [10]:
# Loading Master Dataset
masterDF = pd.read_csv(processed_data_path + "master_dataset.csv")
masterDF.head(3)

Unnamed: 0,sector_name,date,iip_index,iip_mom_growth,iip_yoy_growth,is_energy_intensive,io_sector_name,sector_id,backward_linkage,forward_linkage,is_key_sector,degree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank,5,CRUDE_PETRO,WHEAT_US_HRW,RICE_05,COPPER,ALUMINUM,CRUDE_PETRO_logret,WHEAT_US_HRW_logret,RICE_05_logret,COPPER_logret,ALUMINUM_logret,CRUDE_PETRO_vol_3m,CRUDE_PETRO_vol_6m,CRUDE_PETRO_vol_12m,WHEAT_US_HRW_vol_3m,WHEAT_US_HRW_vol_6m,WHEAT_US_HRW_vol_12m,RICE_05_vol_3m,RICE_05_vol_6m,RICE_05_vol_12m,COPPER_vol_3m,COPPER_vol_6m,COPPER_vol_12m,ALUMINUM_vol_3m,ALUMINUM_vol_6m,ALUMINUM_vol_12m,CRUDE_PETRO_shock,WHEAT_US_HRW_shock,RICE_05_shock,COPPER_shock,ALUMINUM_shock,Year,Month,ONI,Month_num,ENSO_Phase,ONI_lag_1m,ONI_lag_3m,ONI_lag_6m,wpi_(a)__food_articles,wpi_ii_fuel_and_power,wpi_iii___manufactured_products,gdp_constant,gdp_current,gdp_growth_yoy,ARG,AUS,BRA,CAN,CHN,DEU,EA20,EU27_2020,FRA,GBR,IDN,IND,ITA,KOR,SAU,TUR,USA,ZAF,g20_avg_cpi_growth,energy_trade_value,total_trade_value,oil_shock_x_pagerank,oil_shock_x_betweenness,CRUDE_PETRO_lag1,WHEAT_US_HRW_lag1,RICE_05_lag1,COPPER_lag1,ALUMINUM_lag1,iip_yoy_growth_lag1,year,month,quarter
0,Manufacture of beverages,2012-04-01,134.2,,,False,Beverages,45.0,6.735902,1.047036,False,0.3,0.00212,0.512674,0.080434,0.015732,633,113.6655,266.323922,547.75,8289.48,2049.67,-0.035601,-0.063832,-0.000456,-0.021635,-0.063553,0.048197,0.036931,0.043585,0.046484,0.036586,0.054928,0.014263,0.040527,0.043094,0.035629,0.031643,0.058625,0.046561,0.046538,0.036056,0,0,0,0,0,2012.0,AMJ,-0.3,4.0,Neutral,-0.4,-0.7,-1.0,6.2,6.3,3.5,2205223.0,2313203.0,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,2012,4,2
1,Manufacture of beverages,2012-05-01,147.1,9.612519,,False,Beverages,45.0,6.735902,1.047036,False,0.3,0.00212,0.512674,0.080434,0.015732,634,104.086034,264.358724,600.5,7955.642857,2007.630952,-0.088042,-0.007406,0.091944,-0.041106,-0.020723,0.066614,0.053685,0.045979,0.043505,0.035919,0.049914,0.048649,0.058719,0.048654,0.022344,0.039656,0.057612,0.028027,0.043486,0.035953,0,0,1,0,0,2012.0,MJJ,-0.1,5.0,Neutral,-0.3,-0.4,-0.9,6.7,6.7,4.2,2205223.0,2313203.0,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,113.6655,266.323922,547.75,8289.48,2049.67,,2012,5,2
2,Manufacture of beverages,2012-06-01,130.5,-11.28484,,False,Beverages,45.0,6.735902,1.047036,False,0.3,0.00212,0.512674,0.080434,0.015732,635,90.728254,276.189919,600.0,7423.02381,1890.178571,-0.137349,0.043782,-0.000833,-0.069295,-0.060284,0.050882,0.077419,0.060171,0.053828,0.037356,0.049639,0.053456,0.054457,0.046589,0.023962,0.050967,0.059555,0.02384,0.048468,0.037558,0,0,0,0,0,2012.0,JJA,0.1,6.0,Neutral,-0.1,-0.4,-0.5,9.3,5.5,4.4,2205223.0,2313203.0,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,104.086034,264.358724,600.5,7955.642857,2007.630952,,2012,6,2


In [11]:
masterDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3476 entries, 0 to 3475
Data columns (total 93 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   sector_name                      3476 non-null   object 
 1   date                             3476 non-null   object 
 2   iip_index                        3476 non-null   float64
 3   iip_mom_growth                   3454 non-null   float64
 4   iip_yoy_growth                   3190 non-null   float64
 5   is_energy_intensive              3476 non-null   bool   
 6   io_sector_name                   3476 non-null   object 
 7   sector_id                        3476 non-null   float64
 8   backward_linkage                 3476 non-null   float64
 9   forward_linkage                  3476 non-null   float64
 10  is_key_sector                    3476 non-null   bool   
 11  degree_centrality                3476 non-null   float64
 12  betweenness_centrali

In [15]:
print("Unique Sectors:")
display(pd.Series(masterDF["sector_name"].unique()))

Unique Sectors:


0                              Manufacture of beverages
1     Manufacture of other non-metallic mineral prod...
2                               Manufacture of textiles
3     Manufacture of pharmaceuticals, medicinal chem...
4                   Manufacture of electrical equipment
5     Manufacture of computer, electronic and optica...
6                              Manufacture of furniture
7                           Manufacture of basic metals
8           Manufacture of leather and related products
9                                   Other manufacturing
10                         Manufacture of food products
11    Manufacture of fabricated metal products, exce...
12    Manufacture of motor vehicles, trailers and se...
13       Manufacture of chemicals and chemical products
14        Manufacture of machinery and equipment n.e.c.
15             Manufacture of other transport equipment
16              Manufacture of paper and paper products
17    Manufacture of coke and refined petroleum 

# **Part A**: Instrumental Variables (IV) analysis

### **Valid Instruments from Literature Survey**

#### Instrument 1: OPEC Production Quotas

- **Exogeneity:** OPEC production decisions are made collectively by member countries based on geopolitical interests and internal cartel dynamics, independent of India’s manufacturing or industrial output. This provides strong exogeneity because the shocks originate far outside the scope of Indian domestic policy or demand fluctuations.
- **Relevance:** OPEC quotas have an immediate and measurable impact on global oil supply, which in turn influences international oil prices. These price changes transmit to India through import costs and can affect downstream input costs in manufacturing.
- **Literature Support:**
  - Hamilton (2009), “Causes and Consequences of the Oil Shock of 2007-08”: Demonstrates how OPEC’s supply decisions drive international oil price shocks and impact real economic activity.
  - Kilian (2009), “Not All Oil Price Shocks Are Alike”: Differentiates exogenous oil supply shocks from those related to demand and elaborates on the implications for empirical identification strategies.
  - Baumeister & Hamilton (2019): Presents advanced modeling of structural oil market shocks, emphasizing OPEC-driven events as valid external instruments.
- **Expected First-Stage Relationship:** Reductions in OPEC quotas are expected to cause increases in oil prices, which statistically would appear as a negative coefficient in a first-stage regression using quotas as an instrument.

***

#### Instrument 2: El Niño-Southern Oscillation (ONI Index)

- **Exogeneity:** The ONI index measures oceanic temperature anomalies driving El Niño events that are entirely exogenous to Indian economic output. ENSO cycles originate globally and are uninfluenced by regional policy or production trends.
- **Relevance:** El Niño conditions are statistically linked to reduced precipitation and heat shocks in South Asia, which result in crop failures, lower yields, and higher food commodity prices—particularly for staples such as rice and wheat.
- **Literature Support:**
  - Hsiang & Meng (2015), “Tropical Economics”: Reviews mechanisms by which global climate variability impacts agricultural productivity and economic outcomes.
  - Dell et al. (2014), “What Do We Learn from the Weather?”: Explores empirical connections between weather shocks (including ENSO) and macroeconomic performance.
  - Cashin et al. (2017): Quantifies relationships between climate-induced shocks and international commodity prices.
- **Expected First-Stage Relationship:** Positive ONI values (El Niño episodes) are associated with increased food prices in India, especially wheat and rice—generally expressed as a positive coefficient.

***

#### Instrument 3: Rainfall Deviations (Standardized Anomalies)

- **Exogeneity:** Rainfall deviations (measured as standardized anomalies) in key agricultural regions of India reflect stochastic climate shocks, fundamentally external to the domestic production system. These deviations are considered valid instruments because they are unpredictable, originate from atmospheric conditions, and cannot be easily influenced by policy or economic actors.
- **Relevance:** Deviations from long-run mean rainfall affect crop output directly, pushing commodity prices higher or lower as supply fluctuates. For India, erratic monsoon rainfall remains a dominant determinant of food price volatility.
- **Literature Support:** Cashin et al. (2017) and Dell et al. (2014) provide empirical support for using both ONI and direct rainfall measures as instruments for commodity price shocks.
- **Expected First-Stage Relationship:** Negative rainfall anomalies (drought conditions) lead to higher food prices; positive anomalies can moderate prices or induce declines, typically a positive price response to negative rainfall shocks.

***

### 2025-Oriented Data Context

- As of 2025, OPEC has continued to exert substantial influence over oil prices through quota management, with recent supply reductions in 2023–2025 contributing to elevated oil benchmarks globally.
- The 2023–2024 El Niño was classified as strong, resulting in severe drought impacts and commodity price spikes in multiple Asian countries, including India.
- Rainfall anomalies during the 2024 and early 2025 monsoons saw record-setting deviations in central and northern India, driving up wheat and pulse prices nationwide.

***

**References:**  
 Hamilton (2009), Causes and Consequences of the Oil Shock of 2007-08  
 Kilian (2009), Not All Oil Price Shocks Are Alike  
 Hsiang & Meng (2015), Tropical Economics  
 Cashin et al. (2017); Dell et al. (2014), What Do We Learn from the Weather?

Check ONI Instrument

In [20]:
# ONI Statistics
print("ONI Summary Statistics")
oni_cols = ["ONI", 'ONI_lag_1m', "ONI_lag_3m", "ONI_lag_6m"]
oni_stats = masterDF[oni_cols].describe()
display(oni_stats)

ONI Summary Statistics


Unnamed: 0,ONI,ONI_lag_1m,ONI_lag_3m,ONI_lag_6m
count,1408.0,1430.0,1474.0,1496.0
mean,0.24375,0.233846,0.210448,0.180882
std,0.784274,0.782239,0.782366,0.800315
min,-0.8,-0.8,-0.8,-1.0
25%,-0.3,-0.3,-0.3,-0.325
50%,0.0,0.0,-0.1,-0.1
75%,0.6,0.6,0.6,0.525
max,2.3,2.3,2.3,2.3


In [23]:
# Count Enso Phases: 
if "ENSO_Phase" in masterDF.columns: 
    enso_counts = masterDF['ENSO_Phase'].value_counts()
    print("ENSO Phase Distribution:")
    display(enso_counts)
    # Calculate Percentages:
    total = len(masterDF['ENSO_Phase'].dropna())
    for phase, count in enso_counts.items(): 
        pct = (count/total)*100
        print(f" {phase}: {count} ({pct:.1f}%)")

ENSO Phase Distribution:


ENSO_Phase
Neutral    902
El Nino    418
La Nina    176
Name: count, dtype: int64

 Neutral: 902 (60.3%)
 El Nino: 418 (27.9%)
 La Nina: 176 (11.8%)


In [25]:
# Check for Missing Values: 
oni_missing = masterDF[oni_cols].isnull().sum()
print("Missing Values in ONI Variables")
display(oni_missing)
print(f"Total Number of Data Points: {len(masterDF[oni_cols])}")

Missing Values in ONI Variables


ONI           2068
ONI_lag_1m    2046
ONI_lag_3m    2002
ONI_lag_6m    1980
dtype: int64

Total Number of Data Points: 3476


Verify Endogenous Variables: (Commodity Prices)

In [27]:
commodity_cols = ['CRUDE_PETRO', 'WHEAT_US_HRW', 'RICE_05', 'COPPER', 'ALUMINUM']

for col in commodity_cols:
    if col in masterDF.columns:
        non_missing = masterDF[col].notna().sum()
        print(f"    {col}:")
        print(f"    Mean: ${masterDF[col].mean():.2f}")
        print(f"    Std Dev: ${masterDF[col].std():.2f}")
        print(f"    Range: ${masterDF[col].min():.2f} - ${masterDF[col].max():.2f}")
        print(f"    Non-missing: {non_missing:,} observations")
        print("     ===========")
    else:
        print(f"  {col}: NOT FOUND")

    CRUDE_PETRO:
    Mean: $72.03
    Std Dev: $23.01
    Range: $21.04 - $116.80
    Non-missing: 3,476 observations
    WHEAT_US_HRW:
    Mean: $264.60
    Std Dev: $78.89
    Range: $141.83 - $522.29
    Non-missing: 3,476 observations
    RICE_05:
    Mean: $462.63
    Std Dev: $75.05
    Range: $357.00 - $660.00
    Non-missing: 3,476 observations
    COPPER:
    Mean: $7118.63
    Std Dev: $1498.80
    Range: $4471.79 - $10230.89
    Non-missing: 3,476 observations
    ALUMINUM:
    Mean: $2022.26
    Std Dev: $374.10
    Range: $1459.93 - $3498.37
    Non-missing: 3,476 observations
