In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests

# Load the dataset
file_path = "interpolated_kerala_data_1991_2011.xlsx" #Change to your file path
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Define candidate variable pairs for Granger causality test
variable_pairs = [
("LIT_RATE", "PROP_WORK"), # Literacy Rate →
Workforce Participation
("SEX_RATIO", "PROP_WORK"), # Sex Ratio → Workforce
Participation
("POP", "PROP_WORK"), # Population → Workforce
Participation
("NO_HOUSEHOLD", "PROP_WORK") # No. of Households →
Workforce Participation


]

# Store results
granger_results = {}

# Test each pair for Granger causality
for col1, col2 in variable_pairs:
test_result = grangercausalitytests(df[[col1, col2]],maxlag=3, verbose=False)

# Extract p-values for different lags (lag 1, 2, 3)
p_values = [test_result[lag][0]['ssr_ftest'][1] for lag in
test_result.keys()]

# Store the minimum p-value (strongest evidence of causality)
granger_results[(col1, col2)] = min(p_values)

# Sort results by p-value in ascending order
sorted_granger_results = sorted(granger_results.items(),
key=lambda x: x[1])

# Display the top Granger causality relationships
for (cause, effect), p_val in sorted_granger_results:
print(f"{cause} → {effect} | p-value: {p_val:.5f}")