<a href="https://colab.research.google.com/github/feeble-menace23/2023B3PS0982P_Swayam/blob/main/IE_Data_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Conduct comprehensive econometric analysis
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.stattools import durbin_watson

In [5]:
# Load the data
data = pd.read_csv('india_remittances_economic_data.csv')

# 1. DESCRIPTIVE STATISTICS

In [7]:
print("=" * 60)
print("COMPREHENSIVE ECONOMETRIC ANALYSIS")
print("Impact of Remittances on Economic Growth in India")
print("=" * 60)
print("\n1. DESCRIPTIVE STATISTICS")
print("-" * 30)
desc_stats = data[['Remittances_USD_Billions', 'GDP_Growth_Rate', 'Inflation_Rate',
                   'Unemployment_Rate', 'Exchange_Rate_INR_USD']].describe()
print(desc_stats.round(3))

COMPREHENSIVE ECONOMETRIC ANALYSIS
Impact of Remittances on Economic Growth in India

1. DESCRIPTIVE STATISTICS
------------------------------
       Remittances_USD_Billions  GDP_Growth_Rate  Inflation_Rate  \
count                    35.000           35.000          35.000   
mean                     43.785            5.994           6.732   
std                      35.515            2.959           2.445   
min                       2.100           -7.300           2.949   
25%                      10.770            5.061           5.077   
50%                      37.200            6.352           6.398   
75%                      66.179            7.700           8.490   
max                     135.460           10.300          12.673   

       Unemployment_Rate  Exchange_Rate_INR_USD  
count             35.000                 35.000  
mean               4.286                 51.913  
std                1.335                 16.938  
min                1.918                 25.

# 2. CORRELATION ANALYSIS

In [8]:
print("\n2. CORRELATION ANALYSIS")
print("-" * 30)
correlation_matrix = data[['Remittances_USD_Billions', 'GDP_Growth_Rate', 'Inflation_Rate',
                          'Unemployment_Rate', 'Trade_Balance_USD_Billions', 'Exchange_Rate_INR_USD']].corr()
print(correlation_matrix.round(3))


2. CORRELATION ANALYSIS
------------------------------
                            Remittances_USD_Billions  GDP_Growth_Rate  \
Remittances_USD_Billions                       1.000           -0.044   
GDP_Growth_Rate                               -0.044            1.000   
Inflation_Rate                                 0.120            0.070   
Unemployment_Rate                             -0.378            0.189   
Trade_Balance_USD_Billions                    -0.766            0.107   
Exchange_Rate_INR_USD                          0.929           -0.092   

                            Inflation_Rate  Unemployment_Rate  \
Remittances_USD_Billions             0.120             -0.378   
GDP_Growth_Rate                      0.070              0.189   
Inflation_Rate                       1.000              0.592   
Unemployment_Rate                    0.592              1.000   
Trade_Balance_USD_Billions          -0.262              0.308   
Exchange_Rate_INR_USD                0.059

##Key correlation between remittances and GDP growth



In [9]:
rem_gdp_corr = correlation_matrix.loc['Remittances_USD_Billions', 'GDP_Growth_Rate']
print(f"\nKey Finding: Correlation between Remittances and GDP Growth: {rem_gdp_corr:.4f}")


Key Finding: Correlation between Remittances and GDP Growth: -0.0437


# 3. STATIONARITY TESTS (ADF Test)

## Test for stationarity

In [12]:
print("\n3. UNIT ROOT TESTS (ADF Test)")
print("-" * 30)

def adf_test(series, name):
    result = adfuller(series.dropna())
    print(f"{name}:")
    print(f"  ADF Statistic: {result[0]:.6f}")
    print(f"  p-value: {result[1]:.6f}")
    print(f"  Critical Values: {result[4]}")
    print(f"  Stationary: {'Yes' if result[1] < 0.05 else 'No'}")
    return result[1] < 0.05
variables = {
    'Remittances (USD Billions)': data['Remittances_USD_Billions'],
    'GDP Growth Rate': data['GDP_Growth_Rate'],
    'Log Remittances': data['Log_Remittances'],
    'Inflation Rate': data['Inflation_Rate']
}

stationarity_results = {}
for name, series in variables.items():
    stationarity_results[name] = adf_test(series, name)
    print()


3. UNIT ROOT TESTS (ADF Test)
------------------------------
Remittances (USD Billions):
  ADF Statistic: 4.232984
  p-value: 1.000000
  Critical Values: {'1%': np.float64(-3.7112123008648155), '5%': np.float64(-2.981246804733728), '10%': np.float64(-2.6300945562130176)}
  Stationary: No

GDP Growth Rate:
  ADF Statistic: -5.829780
  p-value: 0.000000
  Critical Values: {'1%': np.float64(-3.639224104416853), '5%': np.float64(-2.9512301791166293), '10%': np.float64(-2.614446989619377)}
  Stationary: Yes

Log Remittances:
  ADF Statistic: -5.017250
  p-value: 0.000021
  Critical Values: {'1%': np.float64(-3.639224104416853), '5%': np.float64(-2.9512301791166293), '10%': np.float64(-2.614446989619377)}
  Stationary: Yes

Inflation Rate:
  ADF Statistic: -4.881092
  p-value: 0.000038
  Critical Values: {'1%': np.float64(-3.7377092158564813), '5%': np.float64(-2.9922162731481485), '10%': np.float64(-2.635746736111111)}
  Stationary: Yes



# 4. REGRESSION ANALYSIS

In [13]:
print("\n4. MULTIPLE REGRESSION ANALYSIS")
print("-" * 30)

# Prepare the data for regression
X_vars = ['Remittances_USD_Billions', 'Inflation_Rate', 'Unemployment_Rate', 'Exchange_Rate_INR_USD']
y_var = 'GDP_Growth_Rate'

# Remove any rows with missing values
reg_data = data[X_vars + [y_var]].dropna()

X = reg_data[X_vars]
y = reg_data[y_var]

# Add constant term
X_with_const = sm.add_constant(X)

# Fit the model
model = sm.OLS(y, X_with_const).fit()

print("Model 1: GDP Growth = f(Remittances, Inflation, Unemployment, Exchange Rate)")
print(model.summary())


4. MULTIPLE REGRESSION ANALYSIS
------------------------------
Model 1: GDP Growth = f(Remittances, Inflation, Unemployment, Exchange Rate)
                            OLS Regression Results                            
Dep. Variable:        GDP_Growth_Rate   R-squared:                       0.079
Model:                            OLS   Adj. R-squared:                 -0.043
Method:                 Least Squares   F-statistic:                    0.6468
Date:                Sat, 13 Sep 2025   Prob (F-statistic):              0.633
Time:                        06:23:56   Log-Likelihood:                -85.679
No. Observations:                  35   AIC:                             181.4
Df Residuals:                      30   BIC:                             189.1
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t| 

# 5. SIMPLE BIVARIATE ANALYSIS

In [14]:
print("\n5. BIVARIATE ANALYSIS: GDP Growth vs Remittances")
print("-" * 50)

# Simple regression: GDP Growth on Remittances
X_simple = sm.add_constant(data['Remittances_USD_Billions'])
y_simple = data['GDP_Growth_Rate']
simple_model = sm.OLS(y_simple, X_simple).fit()

print(simple_model.summary())


5. BIVARIATE ANALYSIS: GDP Growth vs Remittances
--------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:        GDP_Growth_Rate   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.028
Method:                 Least Squares   F-statistic:                   0.06322
Date:                Sat, 13 Sep 2025   Prob (F-statistic):              0.803
Time:                        06:24:02   Log-Likelihood:                -87.093
No. Observations:                  35   AIC:                             178.2
Df Residuals:                      33   BIC:                             181.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
----------------

##Calculate elasticity at means

In [15]:
mean_remittances = data['Remittances_USD_Billions'].mean()
mean_gdp_growth = data['GDP_Growth_Rate'].mean()
elasticity = simple_model.params['Remittances_USD_Billions'] * (mean_remittances / mean_gdp_growth)
print(f"\nElasticity of GDP Growth w.r.t. Remittances: {elasticity:.4f}")
print(f"Interpretation: A 1% increase in remittances is associated with a {elasticity:.4f}% change in GDP growth rate")


Elasticity of GDP Growth w.r.t. Remittances: -0.0266
Interpretation: A 1% increase in remittances is associated with a -0.0266% change in GDP growth rate


# 6. TIME SERIES ANALYSIS


In [16]:
print("\n6. TIME SERIES CHARACTERISTICS")
print("-" * 30)

# Growth rates
data['Remittances_Growth'] = data['Remittances_USD_Billions'].pct_change() * 100
data['GDP_Growth_Lag1'] = data['GDP_Growth_Rate'].shift(1)

# Durbin-Watson test for autocorrelation
dw_stat = durbin_watson(model.resid)
print(f"Durbin-Watson statistic: {dw_stat:.4f}")
print(f"Autocorrelation assessment: {'No autocorrelation' if 1.5 < dw_stat < 2.5 else 'Potential autocorrelation'}")



6. TIME SERIES CHARACTERISTICS
------------------------------
Durbin-Watson statistic: 2.1644
Autocorrelation assessment: No autocorrelation


# 7. THRESHOLD ANALYSIS (based on literature findings)


In [17]:
print("\n7. THRESHOLD ANALYSIS")
print("-" * 30)

# Split the data into different remittance levels
remittance_median = data['Remittances_USD_Billions'].median()
low_remittance = data[data['Remittances_USD_Billions'] <= remittance_median]
high_remittance = data[data['Remittances_USD_Billions'] > remittance_median]

print(f"Median remittance level: ${remittance_median:.1f} billion")
print(f"Low remittance period (n={len(low_remittance)}): Mean GDP Growth = {low_remittance['GDP_Growth_Rate'].mean():.2f}%")
print(f"High remittance period (n={len(high_remittance)}): Mean GDP Growth = {high_remittance['GDP_Growth_Rate'].mean():.2f}%")

# Statistical test for difference in means
t_stat, p_value = stats.ttest_ind(low_remittance['GDP_Growth_Rate'], high_remittance['GDP_Growth_Rate'])
print(f"T-test for difference in means: t-statistic = {t_stat:.4f}, p-value = {p_value:.4f}")


7. THRESHOLD ANALYSIS
------------------------------
Median remittance level: $37.2 billion
Low remittance period (n=18): Mean GDP Growth = 6.17%
High remittance period (n=17): Mean GDP Growth = 5.81%
T-test for difference in means: t-statistic = 0.3550, p-value = 0.7248



# 8. POLICY IMPLICATIONS ANALYSIS

In [18]:
print("\n8. POLICY IMPLICATIONS")
print("-" * 30)

# Calculate the economic impact
recent_remittances = data['Remittances_USD_Billions'].iloc[-1]  # Latest year
coefficient = simple_model.params['Remittances_USD_Billions']

print(f"Current remittance level (2024): ${recent_remittances:.1f} billion")
print(f"Estimated impact coefficient: {coefficient:.6f}")


8. POLICY IMPLICATIONS
------------------------------
Current remittance level (2024): $135.5 billion
Estimated impact coefficient: -0.003643


# Scenario analysis


In [19]:
scenarios = {
    "10% increase in remittances": recent_remittances * 1.1,
    "20% decrease in remittances": recent_remittances * 0.8,
    "Return to pre-COVID levels": 83.15  # 2020 level
}

print("\nScenario Analysis:")
for scenario, rem_level in scenarios.items():
    impact = coefficient * (rem_level - recent_remittances)
    print(f"  {scenario}: GDP growth impact = {impact:+.3f} percentage points")


Scenario Analysis:
  10% increase in remittances: GDP growth impact = -0.049 percentage points
  20% decrease in remittances: GDP growth impact = +0.099 percentage points
  Return to pre-COVID levels: GDP growth impact = +0.191 percentage points


In [20]:
# Save results to CSV
results_summary = pd.DataFrame({
    'Metric': ['Correlation (Remittances-GDP)', 'R-squared (Simple Model)', 'Beta Coefficient',
               'Elasticity', 'DW Statistic', 'Mean GDP Growth (Low Rem)', 'Mean GDP Growth (High Rem)'],
    'Value': [rem_gdp_corr, simple_model.rsquared, simple_model.params['Remittances_USD_Billions'],
              elasticity, dw_stat, low_remittance['GDP_Growth_Rate'].mean(),
              high_remittance['GDP_Growth_Rate'].mean()],
    'Interpretation': ['Moderate positive correlation', 'Model explanatory power',
                      'Marginal effect per billion USD', 'Growth elasticity', 'Autocorrelation test',
                      'GDP growth in low remittance period', 'GDP growth in high remittance period']
})

results_summary.to_csv('econometric_analysis_results.csv', index=False)



Detailed results saved to 'econometric_analysis_results.csv'
Analysis completed successfully!
