In [2]:
import pymc as pm
import pandas as pd
import numpy as np
import arviz as az
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from pytensor import tensor as pt
import pickle as pkl
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as clr
import scipy.stats as stats
import seaborn as sns
import itertools as it
import country_converter as cc
import math
import random
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.tsatools import add_lag
import pyfixest as pf



In [10]:
data = pd.read_stata("../data/kotz/data/T_econ.dta")
data["T5_mean_diff"] = diff(data["T5_mean"])
data["T5_mean_diff_lag"] = np.insert(add_lag(data["T5_mean_diff"])[:,1], 0, np.NaN)
data["T5_seas_diff_mXT5_varm"] = data["T5_seas_diff_m"] * data["T5_varm"]
data["T5_mean_mXT5_mean_diff"] = data["T5_mean_m"] * data["T5_mean_diff"]
data["T5_mean_mXT5_mean_diff_lag"] = data["T5_mean_m"] * data["T5_mean_diff_lag"]

In [13]:
model_vars = [
    "dlgdp_pc_usd",
    "T5_varm",
    "T5_seas_diff_mXT5_varm",
    "T5_mean_diff",
    # "T5_mean_mXT5_mean_diff",
    # "T5_mean_diff_lag",
    # "T5_mean_mXT5_mean_diff_lag",
    # "P5_totalpr"
]
data = data.dropna(subset=model_vars).reset_index(drop=True)
scalers, scaled_data = {}, {}
for var in model_vars:
    scalers[var] = StandardScaler()
    scaled_data[var] = scalers[var].fit_transform(np.array(data[var]).reshape(-1,1)).flatten()

In [14]:
scaled_df = pd.DataFrame()
for var in scaled_data:
    scaled_df[var] = scaled_data[var]
scaled_df["ID"] = data["ID"]
scaled_df["yearn"] = data["yearn"]

In [15]:
pf.feols("""
    dlgdp_pc_usd ~ 
    T5_varm +
    T5_seas_diff_mXT5_varm +
    T5_mean_diff
    | 0
    """,
    data=scaled_df
).tidy()

Unnamed: 0_level_0,Estimate,Std. Error,t value,Pr(>|t|),2.5%,97.5%
Coefficient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Intercept,-3.7313550000000004e-17,0.005831,-6.399086e-15,1.0,-0.011429,0.011429
T5_varm,-0.15495,0.018065,-8.577264,0.0,-0.190359,-0.119541
T5_seas_diff_mXT5_varm,0.2004163,0.018062,11.09586,0.0,0.165014,0.235819
T5_mean_diff,-0.01270498,0.005835,-2.177405,0.029458,-0.024142,-0.001268


In [None]:
data_len = len(scaled_df)
year_mult_mat = [np.zeros(data_len) for year in set(scaled_df.yearn)]
region_mult_mat = [np.zeros(data_len) for region in set(data.ID)]
region_index = -1
curr_region = ""

min_year = min(scaled_df.yearn)
for row_index, row in enumerate(scaled_df.itertuples()):
    if row.ID != curr_region:
        region_index += 1
        curr_region = row.ID
    year_index = row.yearn - min_year
    region_mult_mat[region_index][row_index] = 1
    year_mult_mat[year_index][row_index] = 1

with pm.Model() as model:

    covar_coefs = pm.Normal("covar_coefs", 0, 10, shape=len(model_vars[1:]))
    covar_terms = pm.Deterministic("covar_terms", pt.sum(covar_coefs * scaled_df[model_vars[1:]], axis=1))

    # year_coefs = pt.expand_dims(pm.Normal("year_coefs", 0, 10, shape=(len(set(scaled_df.yearn))-1)),axis=1)
    # year_coefs = pm.math.concatenate([[[0]],year_coefs])
    # year_fixed_effects = pm.Deterministic("year_fixed_effects",pt.sum(year_coefs*year_mult_mat,axis=0))

    # region_coefs = pt.expand_dims(pm.Normal("region_coefs", 0, 10, shape=(len(set(scaled_df.ID))-1)),axis=1)
    # region_coefs = pm.math.concatenate([[[0]],region_coefs])
    # region_fixed_effects = pm.Deterministic("region_fixed_effects",pt.sum(region_coefs*region_mult_mat,axis=0))
    
    gdp_prior = pm.Deterministic(
        "gdp_prior", 
        covar_terms #+
        # year_fixed_effects +
        # region_fixed_effects
    )
    
    gdp_std_scale = pm.HalfNormal("gdp_std_scale", 10)
    gdp_std = pm.HalfNormal("gdp_std", sigma=gdp_std_scale)
    gdp_posterior = pm.Normal("gdp_posterior", gdp_prior, gdp_std, observed=scaled_df[model_vars[0]])

    prior = pm.sample_prior_predictive()
    trace = pm.sample(target_accept=.99, cores=4)
    posterior = pm.sample_posterior_predictive(trace, extend_inferencedata=True)

    with open('output/models/bayes_models/tfp_bayes_yfe_cre_for_drought_full.pkl', 'wb') as buff:
        pkl.dump({
            "prior":prior,
            "trace":trace,
            "posterior":posterior,
            "var_list":model_vars
        },buff)

Sampling: [covar_coefs, gdp_posterior, gdp_std, gdp_std_scale]


In [None]:
np.mean(post.posterior.covar_coefs[:,:,0].data.flatten())