In [7]:
import matplotlib.pyplot as plt
from scipy import stats
import xarray as xr
import pandas as pd
import numpy as np
import pymc as pm
import random
from pytensor import tensor as pt

In [25]:
# create formatted dataset from pop-weighted country temp data by month
result_dict = {"Country":[],"Year":[],"Avg_PopWeighted_Temp":[]}
# data = pd.read_csv("../data/burke/data/input/nc/pop_weighted_country_temps_by_month.csv")
data = pd.read_csv("../data/burke/data/input/nc/unweighted_country_temps_by_month.csv")
col_prefix = "unweighted_monthly_temp.mean.X"
years = [str(year) for year in list(range(1900,2018))]
months = [str(month) if month >= 10 else "0"+str(month) for month in list(range(1,13))]
for _, row in data.iterrows():
    country = row["country"]
    for year in years:
        all_vals_by_year = []
        for month in months:
            col_name = col_prefix + year + "." + month + ".01"
            all_vals_by_year.append(row[col_name])
        result_dict["Country"].append(country)
        result_dict["Year"].append(year)
        result_dict["Mean_Temp"].append(np.nanmean(all_vals_by_year))
pd.DataFrame.from_dict(result_dict).to_csv("../data/burke/data/input/custom_monthly_unweighted_temp_by_country.csv")

  result_dict["Avg_PopWeighted_Temp"].append(np.nanmean(all_vals_by_year))


In [6]:
# create formatted dataset from unweighted country precip data by month
result_dict = {"Country":[],"Year":[],"Unweighted_Precipitation":[]}
# data = pd.read_csv("../data/burke/data/input/nc/pop_weighted_country_temps_by_month.csv")
data = pd.read_csv("../data/burke/data/input/nc/unweighted_country_precip_by_month.csv")
col_prefix = "unweighted_monthly_precip.mean.precip_clipped_by_country_mask_"
years = [str(year) for year in list(range(1900,2018))]
months = [str(month) if month >= 10 else "0"+str(month) for month in list(range(1,13))]
month_count = 0
for _, row in data.iterrows():
    month_count += 1
    country = row["country"]
    for year in years:
        all_vals_by_year = []
        for month in months:
            col_name = col_prefix + str(month_count)
            all_vals_by_year.append(row[col_name])
        result_dict["Country"].append(country)
        result_dict["Year"].append(year)
        result_dict["Unweighted_Precipitation"].append(np.nanmean(all_vals_by_year))
pd.DataFrame.from_dict(result_dict).to_csv("../data/burke/data/input/custom_monthly_unweighted_precip_by_country.csv")

  result_dict["Unweighted_Precipitation"].append(np.nanmean(all_vals_by_year))


In [47]:
cont_data = [i for i in range(100)]
disc_data = []
for i in range(len(cont_data)):
    disc_data.append(random.choice([1 if i2 < i else 0 for i2 in range(len(cont_data))]))
for index in range(len(disc_data)):
    if disc_data[index] == 1:
        disc_data[index] = random.choice([0,1])

with pm.Model() as model:
    temp_prior = pm.Normal("temp_prior", 50, 30)
    temp_std = pm.HalfNormal("temp_std", 30)
    temp_posterior = pm.Normal("temp_posterior", temp_prior, temp_std, observed=cont_data)
    drought_temp_coef_matrix = pm.Normal("drought_temp_coef_matrix", np.zeros((1, 2)), 10)
    drought_temp_likelihood_coefs = pm.Deterministic("drought_temp_likelihood_coefs",  pt.expand_dims(temp_posterior, axis=1) / drought_temp_coef_matrix)
    drought_prior = pm.Deterministic("drought_prior", pm.math.softmax(drought_temp_likelihood_coefs, axis=-1))
    drought_posterior = pm.Categorical("drought_posterior", drought_prior, observed=disc_data)
    prior = pm.sample_prior_predictive()
    trace = pm.sample()
    posterior = pm.sample_posterior_predictive(trace, extend_inferencedata=True)

Sampling: [drought_posterior, drought_temp_coef_matrix, temp_posterior, temp_prior, temp_std]
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [temp_prior, temp_std, drought_temp_coef_matrix]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 28 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
There were 385 divergences after tuning. Increase `target_accept` or reparameterize.
Sampling: [drought_posterior, temp_posterior]


In [48]:
print(posterior["posterior"]["drought_temp_coef_matrix"][0][0].data)
print(posterior["posterior_predictive"]["temp_posterior"][0][0].data)
print(posterior["posterior_predictive"]["drought_posterior"][0][0].data)

[[-14.26782194 -12.55610603]]
[ 40.3102402   14.83095789  26.5531689   19.74284033  26.95302559
  30.60122259  -5.27218147  33.56728039   4.60855218  38.52839184
  44.53427288  70.34300879  63.04655999  29.05991549  70.9671835
  13.76689078  78.3575228   48.25463962  90.63366081  10.16378188
  16.64943001  28.07147489  31.56158126  35.23919002  39.91932769
  77.50856782  20.61026046  29.41375854 127.13766116  98.22415167
 -12.05451785  23.16673241  46.72218159  11.66926631  81.24860185
  23.42020287  15.51476525  25.10838599  26.612293    36.05452247
  82.5847512    2.95345251  32.97057062   6.32683881  89.45406727
  82.50293908  35.10941193  38.17476144  35.35456465  36.5212818
  76.26779663  49.88048189  31.50412594  36.69483123  26.82544515
  80.79949935  66.81841882  56.67701219  39.74987116  45.20926887
  50.07489458  54.80316994  75.44914393  24.58162264  95.74527566
  26.19675354 115.04195107  56.3259031   72.09887669  60.63196094
  74.27352916   8.5398681   70.41773322  74.7534

In [50]:
low_temp = []
high_temp = []
print(np.mean(posterior["posterior_predictive"]["drought_posterior"][1][0].data))
for i in range(len(posterior["posterior_predictive"]["drought_posterior"][1][0].data)):
    if posterior["posterior_predictive"]["temp_posterior"][1][0].data[i] < 50:
        low_temp.append(posterior["posterior_predictive"]["drought_posterior"][1][0].data[i])
    else:
        high_temp.append(posterior["posterior_predictive"]["drought_posterior"][1][0].data[i])
print(np.mean(low_temp))
print(np.mean(high_temp))

0.43
0.5952380952380952
0.3103448275862069
