In [1]:
import csv
import os
import pymc as pm
from pymc import do, observe
import pandas as pd
import numpy as np
import arviz as az
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OrdinalEncoder
from pytensor import tensor as pt
import pickle as pkl
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import itertools as it
import country_converter as cc

In [3]:
integrated_data = pd.read_csv("../data/processed/five_node_model_integrated_data.csv")
temp_scaler, precip_scaler, tfp_scaler = StandardScaler(), MinMaxScaler(), StandardScaler()
normalized_temp = temp_scaler.fit_transform(np.array(integrated_data["Unweighted_Temp"]).reshape(-1,1)).flatten()
normalized_precip = precip_scaler.fit_transform(np.array(integrated_data["Precipitation"]).reshape(-1,1)).flatten()
normalized_tfp = tfp_scaler.fit_transform(np.array(integrated_data["TFP_Change"]).reshape(-1,1)).flatten()

### 12-19: I noticed that the global effect of temperature on ag tfp is lower than expected (-.05 TFP for 1.5C warming, -.1 TFP for 2.5C warming.

#### Possible explanations:
1. Most of the effect of temp on tfp goes through precipitation and drought (-.7 TFP when drought)
2. Only the warmer regions are negatively effected by rising temperature; the colder regions may even be helped by rising temperature

#### Investigating explanation 1
- Build model with only temperature and compare coefficients

In [26]:
with pm.Model() as observed_temp_model:

    temp_mw = pm.Dirichlet("temp_mixture_weights", np.array([1]*3))
    temp_prior = pm.Normal("temp_prior", 0, 1, shape=(1,3))
    temp_std = pm.HalfNormal("temp_std", 1, shape=3)
    temp_posterior = pm.NormalMixture(
        "temp_posterior", 
        temp_mw, 
        temp_prior, 
        temp_std,
        observed = normalized_temp
    )
    temp_tfp_coef = pm.Normal('temp_tfp_coef',0,10)
    temp_tfp_coef2 = pm.Normal('temp_tfp_coef2',0,10)
    tfp_intercept = pm.Normal('tfp_intercept',0,10)
    tfp_prior = pm.Deterministic(
        "tfp_prior",
        tfp_intercept +
        (temp_tfp_coef * temp_posterior) +
        (temp_tfp_coef2 * pt.sqr(temp_posterior))
    )
    tfp_std = pm.HalfNormal('tfp_std', sigma=10)
    tfp_posterior = pm.Normal('tfp_posterior', mu=tfp_prior, sigma=tfp_std, observed = normalized_tfp)

    temp_only_trace = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [temp_mixture_weights, temp_prior, temp_std, temp_posterior_unobserved, temp_tfp_coef, temp_tfp_coef2, tfp_intercept, tfp_std, tfp_posterior_unobserved]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 226 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [38]:
print(np.mean(np.array(temp_only_trace.posterior.temp_tfp_coef).flatten()))
print(np.std(np.array(temp_only_trace.posterior.temp_tfp_coef).flatten()))
print(np.mean(np.array(temp_only_trace.posterior.temp_tfp_coef2).flatten()))
print(np.std(np.array(temp_only_trace.posterior.temp_tfp_coef2).flatten()))

-0.03541401491077579
0.01506787277690856
-0.02464423278170637
0.011379041775752847


In [42]:
global_trace = pd.read_pickle(f"../models/temp_precip_drought_tfp_global.pkl")["trace"]
global_temp_coef1 = np.mean(np.array(global_trace.posterior.temp_tfp_coef).flatten())
print(global_temp_coef1)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef).flatten()))
global_temp_coef2 = np.mean(np.array(global_trace.posterior.temp_tfp_coef2).flatten())
print(global_temp_coef2)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef2).flatten()))

-0.028864015482541968
0.015021390871132459
-0.023729767349794005
0.011037244625251057


In [56]:
model_0_res = []
model_1_5_res = []
model_2_0_res = []
model_2_5_res = []

mean_temp = temp_scaler.transform(np.array([val for val in integrated_data["Unweighted_Temp"] if not np.isnan(val)]).reshape(-1,1)).flatten()
mean_temp_1_5 = temp_scaler.transform(np.array([1.5 + val for val in integrated_data["Unweighted_Temp"] if not np.isnan(val)]).reshape(-1,1)).flatten()
mean_temp_2_0 = temp_scaler.transform(np.array([2.0 + val for val in integrated_data["Unweighted_Temp"] if not np.isnan(val)]).reshape(-1,1)).flatten()
mean_temp_2_5 = temp_scaler.transform(np.array([2.5 + val for val in integrated_data["Unweighted_Temp"] if not np.isnan(val)]).reshape(-1,1)).flatten()

for i in range(len(mean_temp)):
    if not np.isnan(i):
        model_0_res.append((mean_temp[i] * global_temp_coef1) + (mean_temp[i] * mean_temp[i] * global_temp_coef2))
        model_1_5_res.append((mean_temp_1_5[i] * global_temp_coef1) + (mean_temp_1_5[i] * mean_temp_1_5[i] * global_temp_coef2))
        model_2_0_res.append((mean_temp_2_0[i] * global_temp_coef1) + (mean_temp_2_0[i] * mean_temp_2_0[i] * global_temp_coef2))
        model_2_5_res.append((mean_temp_2_5[i] * global_temp_coef1) + (mean_temp_2_5[i] * mean_temp_2_5[i] * global_temp_coef2))

tfp_transformed_0 = tfp_scaler.inverse_transform(np.array(model_0_res).reshape(-1,1)).flatten()
tfp_transformed_1_5 = tfp_scaler.inverse_transform(np.array(model_1_5_res).reshape(-1,1)).flatten()
tfp_transformed_2_0 = tfp_scaler.inverse_transform(np.array(model_2_0_res).reshape(-1,1)).flatten()
tfp_transformed_2_5 = tfp_scaler.inverse_transform(np.array(model_2_5_res).reshape(-1,1)).flatten()

print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_1_5))
print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_2_0))
print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_2_5))

0.044528382540510725
0.06192501074844725
0.08059855597026694


In [57]:
global_trace = pd.read_pickle(f"../models/temp_precip_drought_tfp_global_no_scale.pkl")["trace"]
global_temp_coef1 = np.mean(np.array(global_trace.posterior.temp_tfp_coef).flatten())
print(global_temp_coef1)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef).flatten()))
global_temp_coef2 = np.mean(np.array(global_trace.posterior.temp_tfp_coef2).flatten())
print(global_temp_coef2)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef2).flatten()))

0.06783436911284663
0.03740649393484065
-0.0024944679445515813
0.001204912392930932


In [62]:
model_0_res = []
model_1_5_res = []
model_2_0_res = []
model_2_5_res = []

temp_data = [val for val in integrated_data["Unweighted_Temp"] if not np.isnan(val)]

for i in range(len(temp_data)):
    if not np.isnan(i):
        model_0_res.append((temp_data[i] * global_temp_coef1) + (temp_data[i] * temp_data[i] * global_temp_coef2))
        model_1_5_res.append(((temp_data[i] + 1.5) * global_temp_coef1) + ((temp_data[i] + 1.5) * (temp_data[i] + 1.5) * global_temp_coef2))
        model_2_0_res.append(((temp_data[i] + 2.0) * global_temp_coef1) + ((temp_data[i] + 2.0) * (temp_data[i] + 2.0) * global_temp_coef2))
        model_2_5_res.append(((temp_data[i] + 2.5) * global_temp_coef1) + ((temp_data[i] + 2.5) * (temp_data[i] + 2.5) * global_temp_coef2))

print(np.mean(model_0_res) - np.mean(model_1_5_res))
print(np.mean(model_0_res) - np.mean(model_2_0_res))
print(np.mean(model_0_res) - np.mean(model_2_5_res))

0.044001461286752835
0.06116308299355533
0.07957193867263365


### It appears that the effect size is roughly the same between a temp-only model and the full model, and that the data scaling is not the issue
#### This validates yesterday's approach and results

#### Investigating explanation 2

In [69]:
countries_by_region = {}
data_by_region = {}
for country in set(integrated_data["Country"]):
    region = cc.convert(country, to="UNregion")
    if region not in countries_by_region:
        countries_by_region[region] = []
    countries_by_region[region].append(country)
for region, countries in countries_by_region.items():
    regional_data = integrated_data.loc[(integrated_data["Country"].isin(countries))]
    data_by_region[region] = regional_data

In [76]:
regional_model_files = os.listdir("../models/regional_models")
for file in regional_model_files:
    region = file.split("tfp_")[1].split(".")[0]
    print(region)
    regional_model = pd.read_pickle(f"../models/regional_models/{file}")
    regional_temp_scaler = regional_model["temp_scaler"]
    regional_tfp_scaler = regional_model["tfp_scaler"]
    regional_trace = regional_model["trace"]
    temp_coef_1 = np.mean(np.array(regional_trace.posterior.temp_tfp_coef).flatten())
    temp_coef_2 = np.mean(np.array(regional_trace.posterior.temp_tfp_coef2).flatten())
    scaled_temp_data = regional_temp_scaler.transform(np.array(data_by_region[region]["Unweighted_Temp"]).reshape(-1,1)).flatten()
    model_0_res, model_1_5_res, model_2_0_res, model_2_5_res = [], [], [], []
    for temp in scaled_temp_data:
        if not np.isnan(temp):
            model_0_res.append((temp * global_temp_coef1) + (temp * temp * global_temp_coef2))
            model_1_5_res.append(((temp + 1.5) * temp_coef_1) + ((temp + 1.5) * (temp + 1.5) * temp_coef_2))
            model_2_0_res.append(((temp + 2.0) * temp_coef_1) + ((temp + 2.0) * (temp + 2.0) * temp_coef_2))
            model_2_5_res.append(((temp + 2.5) * temp_coef_1) + ((temp + 2.5) * (temp + 2.5) * temp_coef_2))

    tfp_transformed_0 = regional_tfp_scaler.inverse_transform(np.array(model_0_res).reshape(-1,1)).flatten()
    tfp_transformed_1_5 = regional_tfp_scaler.inverse_transform(np.array(model_1_5_res).reshape(-1,1)).flatten()
    tfp_transformed_2_0 = regional_tfp_scaler.inverse_transform(np.array(model_2_0_res).reshape(-1,1)).flatten()
    tfp_transformed_2_5 = regional_tfp_scaler.inverse_transform(np.array(model_2_5_res).reshape(-1,1)).flatten()

    print("Mean temperature:", np.mean(data_by_region[region]["Unweighted_Temp"]))
    print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_1_5))
    print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_2_0))
    print(np.mean(tfp_transformed_0) - np.mean(tfp_transformed_2_5))

Middle Africa
Mean temperature: 24.91930546669173
0.003228036267629947
0.0054682133004346455
0.008549307758330925
Australia and New Zealand
Mean temperature: 15.827711562763158
0.12735970013726639
0.16229503980764326
0.19155835500233068
Southern Asia
Mean temperature: 18.918324394881576
-0.010947922371868615
-0.016081722086597874
-0.02236296120386417
Northern America
Mean temperature: 0.5982907999298248
0.028504899205202515
0.038767987698627486
0.049566661738681225
South America
Mean temperature: 21.134287506415205
-0.0089205667068877
-0.012980359387851146
-0.017899516687323327
Eastern Asia
Mean temperature: 7.192924345127193
0.055751700005376964
0.08944997817444458
0.13445034584034346
South-eastern Asia
Mean temperature: 25.237569583114038
0.0026189036201683894
0.004220099507007259
0.006311708684412734
Central America
Mean temperature: 24.05471369350877
0.01946246378051418
0.02759573907508503
0.03692789945252726
Northern Africa
Mean temperature: 21.98016042204678
-0.001020205641392060

#### At first glance there is not a regional relationship between mean temperature/effect on temp increase on ag tfp

# Use hist-nat and historical temp values from 2015-2020

In [111]:
# process data
hist_nat = pd.read_csv("../data/ortiz-bobea/data2/CMIP6_co/hist-nat_BCC-CSM2-MR_1948-2020_both.csv")
hist = pd.read_csv("../data/ortiz-bobea/data2/CMIP6_co/historical_BCC-CSM2-MR_1948-2020_both.csv")
hist_nat = pd.concat([hist_nat["tasmax"] - 273, hist_nat["tasmin"] - 273],axis=1)
hist_nat_mean = np.array(np.mean(hist_nat, axis=1))
print(hist_nat_mean)
hist = pd.concat([hist["tasmax"] - 273, hist["tasmin"] - 273],axis=1)
hist_mean = np.array(np.mean(hist, axis=1))

[ 1.0265657   1.69431661  6.44849383 ... 25.21599029 23.81814192
 22.96967276]


In [107]:
global_trace = pd.read_pickle(f"../models/temp_precip_drought_tfp_global.pkl")["trace"]
global_temp_coef1 = np.mean(np.array(global_trace.posterior.temp_tfp_coef).flatten())
print(global_temp_coef1)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef).flatten()))
global_temp_coef2 = np.mean(np.array(global_trace.posterior.temp_tfp_coef2).flatten())
print(global_temp_coef2)
print(np.std(np.array(global_trace.posterior.temp_tfp_coef2).flatten()))

-0.028864015482541968
0.015021390871132459
-0.023729767349794005
0.011037244625251057


In [113]:
hist_res = []
hist_nat_res = []

hist_scaled = temp_scaler.transform(np.array(hist_mean).reshape(-1,1)).flatten()
hist_nat_scaled = temp_scaler.transform(np.array(hist_nat_mean).reshape(-1,1)).flatten()

for i in range(len(hist_scaled)):
    hist_res.append((hist_scaled[i] * temp_coef_1) + (hist_scaled[i] * hist_scaled[i] * temp_coef_2))
    hist_nat_res.append((hist_nat_scaled[i] * temp_coef_1) + (hist_nat_scaled[i] * hist_nat_scaled[i] * temp_coef_2))

tfp_transformed_hist = tfp_scaler.inverse_transform(np.array(hist_res).reshape(-1,1)).flatten()
tfp_transformed_hist_nat = tfp_scaler.inverse_transform(np.array(hist_nat_res).reshape(-1,1)).flatten()

print(np.mean(tfp_transformed_hist_nat - tfp_transformed_hist))

-0.025119151714475707


In [112]:
print(np.mean(hist_nat_mean))
print(np.mean(hist_mean))

18.104758744823663
18.485149190098202
