In [12]:
import csv
import os
import copy
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
import pickle as pkl
import matplotlib.pyplot as plt
import random
import statsmodels.api as sm
import warnings
import pymc as pm

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.filterwarnings('ignore')

In [45]:
gdp_regression_data = pd.read_csv("../data/regression/gdp_regression_data.csv").dropna().reset_index(drop=True)

In [46]:
model = {
    "covariates" : [
        'fd_humidity_[weight]', 'fd_humidity_[weight]_2', 'fd_humidity_annual_std_[weight]', 
        'fd_humidity_annual_std_[weight]_2', 'fd_precip_[weight]', 'fd_precip_[weight]_2', 
        'fd_precip_[weight]_3', 'fd_precip_annual_std_[weight]', 'fd_precip_daily_std_[weight]', 
        'fd_precip_daily_std_[weight]_2', 'fd_precip_daily_std_[weight]_3', 'fd_temp_annual_std_[weight]', 
        'fd_temp_annual_std_[weight]_2', 'fd_temp_annual_std_[weight]_3', 'fd_temp_daily_std_[weight]', 
        'fd_temp_daily_std_[weight]_2', 'humidity_daily_std_[weight]', 'precip_[weight]', 
        'precip_daily_std_[weight]', 'temp_[weight]', 'temp_[weight]_2', 
        'temp_[weight]_3', 'wildfire', 'wildfire_heat_wave'
    ],
    "fixed_effects" : ["country","year"],
    "incremental_effects" : 3,
    "weights" : "unweighted",
    "target" : "fd_ln_gdp"
}

In [47]:
vars_to_grab = []
for covar in model["covariates"]:
    vars_to_grab.append(covar.replace("[weight]",model["weights"]))
for fe in model["fixed_effects"]:
    for fe_col in [col for col in gdp_regression_data.columns if col.endswith(f"{fe}_fixed_effect")]:
        vars_to_grab.append(fe_col)
    for i in range(model["incremental_effects"]):
        for ie_col in [col for col in gdp_regression_data.columns if col.endswith(f"incremental_effect_{i}")]:
            vars_to_grab.append(ie_col)

In [48]:
covariate_data = gdp_regression_data[vars_to_grab]
target_data = gdp_regression_data[model["target"]]

# Scale the data

In [49]:
covar_cols = [col for col in covariate_data.columns if "incremental_effect" not in col and "fixed_effect" not in col]
covar_scalers = [StandardScaler() for covar in covar_cols]
for i in range(len(covar_scalers)):
    covariate_data[covar_cols[i]+"_scaled"] = covar_scalers[i].fit_transform(np.array(covariate_data[covar_cols[i]]).reshape(-1,1)).flatten()
    covariate_data.drop(covar_cols[i], axis=1, inplace=True)

In [50]:
covariate_data

Unnamed: 0,ABW_country_fixed_effect,AFG_country_fixed_effect,AGO_country_fixed_effect,ALB_country_fixed_effect,AND_country_fixed_effect,ARG_country_fixed_effect,ARM_country_fixed_effect,ASM_country_fixed_effect,ATG_country_fixed_effect,AUS_country_fixed_effect,...,fd_temp_daily_std_unweighted_scaled,fd_temp_daily_std_unweighted_2_scaled,humidity_daily_std_unweighted_scaled,precip_unweighted_scaled,precip_daily_std_unweighted_scaled,temp_unweighted_scaled,temp_unweighted_2_scaled,temp_unweighted_3_scaled,wildfire_scaled,wildfire_heat_wave_scaled
0,0,1,0,0,0,0,0,0,0,0,...,-0.472018,-0.634019,-0.889244,-0.842560,-0.874153,-0.910390,-1.133507,-1.143882,-0.200037,-0.073883
1,0,1,0,0,0,0,0,0,0,0,...,-0.576544,-0.762871,-0.962669,-0.879713,-0.855830,-1.034438,-1.220360,-1.193785,-0.200037,-0.073883
2,0,1,0,0,0,0,0,0,0,0,...,-0.592436,-0.769573,-0.934163,-0.854164,-0.838400,-0.780149,-1.033857,-1.081315,-0.200037,-0.073883
3,0,1,0,0,0,0,0,0,0,0,...,-0.348926,-0.444686,-0.833475,-0.670873,-0.771789,-0.938759,-1.154063,-1.156097,-0.200037,-0.073883
4,0,1,0,0,0,0,0,0,0,0,...,-2.131928,-2.626404,-0.840840,-0.542775,-0.689343,-0.733057,-0.995693,-1.055948,-0.200037,-0.073883
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8468,0,0,0,0,0,0,0,0,0,0,...,0.793592,0.954015,-0.073602,-0.691125,-0.646604,0.307462,0.136706,-0.025183,-0.200037,-0.073883
8469,0,0,0,0,0,0,0,0,0,0,...,-0.179552,-0.213815,0.157047,-0.522360,-0.417123,0.331692,0.169666,0.011587,-0.200037,-0.073883
8470,0,0,0,0,0,0,0,0,0,0,...,-2.949127,-3.372045,0.073150,-0.289471,-0.143470,0.276852,0.095494,-0.070685,-0.200037,-0.073883
8471,0,0,0,0,0,0,0,0,0,0,...,2.365515,2.681517,0.069663,-0.580230,-0.441113,0.231115,0.034810,-0.136717,-0.200037,-0.073883


# Standard regression to get prior estimates

In [53]:
model = sm.OLS(target_data,covariate_data)
covar_coef_errors = model.fit().summary2().tables[1]["Coef."]
covar_std_errors = model.fit().summary2().tables[1]["Std.Err."]