In [None]:
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pickle
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from wildfires.data.datasets import data_map_plot
from wildfires.data.datasets import DATA_DIR
with open(os.path.join(DATA_DIR, 'mean_cubes.pickle'), 'rb') as f:
    mean_cubes = pickle.load(f)
print(mean_cubes)
# so that analysis below can be replicated for other kinds of cubes
cubes = mean_cubes

In [None]:
land_cube = cubes.extract_strict(iris.Constraint(name='pftNoLand'))
land_mask = np.isclose(land_cube.data.data, 1.)
fig = data_map_plot(land_mask, name='Land Mask')

# remove this entry from the CubeList
del cubes[cubes.index(land_cube)]

In [None]:
for cube in cubes:
    fig = data_map_plot(cube)

In [None]:
from copy import deepcopy
# Check that all the cubes have masks
assert np.all([hasattr(cube.data, 'mask') for cube in cubes])
# Respect the masking of 'monthly burned area' and ignore all others - for all others, replace
# masked data with 0.
# Thereafter, apply the land_mask, so that only data over land is considered.

burned_area_cube = cubes.extract_strict(iris.Constraint(name='monthly burned area'))
burned_area_mask = burned_area_cube.data.mask
combined_mask = burned_area_mask | land_mask

cubes_mod = deepcopy(cubes)
assert isinstance(cubes_mod, iris.cube.CubeList)
for cube in cubes_mod:
    # ignore burned area in this step
    if cube.name() != 'monthly burned area':
        # Fill data gaps with 0
        cube.data.data[cube.data.mask] = 0
    # Then apply global combined mask.
    cube.data.mask = combined_mask

mpl.rcParams['figure.figsize'] = (10, 7)
for cube in cubes_mod:
    fig = data_map_plot(cube)

In [None]:
# Check that there aren't any inf's or nan's in the data.
for cube in cubes_mod:
    assert not np.any(np.isinf(cube.data.data[~cube.data.mask]))
    assert not np.any(np.isnan(cube.data.data[~cube.data.mask]))

In [None]:
lats = cubes_mod[0].coord('latitude').points
lons = cubes_mod[1].coord('longitude').points
lat_mask = np.meshgrid(lats, lons, indexing='ij')[0] > 60

cube = deepcopy(cubes_mod.extract_strict(iris.Constraint(name='precip')))
 
fig = data_map_plot(cube)
data = cube.data
data.mask[lat_mask] = True
fig = data_map_plot(data)

cubes_mod2 = deepcopy(cubes_mod)

for cube in cubes_mod2:
    cube.data.mask[lat_mask] = True

In [None]:
burned_area_cube = cubes_mod2.extract_strict(iris.Constraint(name='monthly burned area'))
endog_data = pd.Series(burned_area_cube.data.data[~burned_area_cube.data.mask])
names = []
data = []
for cube in cubes_mod2:
    if cube.name() != 'monthly burned area':
        names.append(cube.name())
        data.append(cube.data.data[~cube.data.mask].reshape(-1, 1))
exog_data = pd.DataFrame(np.hstack(data), columns=names)
exog_data['temperature range'] = exog_data['maximum temperature'] - exog_data['minimum temperature']
del exog_data['minimum temperature']

print(names)

# Carry out log transformation for select variables.
log_var_names = ['temperature range',
                 'dry_days']

for name in log_var_names:
    mod_data = exog_data[name] + 0.01
    assert np.all(mod_data >= (0.01 - 1e-8)), '{:}'.format(name)
    exog_data['log ' + name] = np.log(mod_data)
    del exog_data[name]

# Carry out square root transformation
sqrt_var_names = ['Combined Flash Rate Time Series', 'popd']
for name in sqrt_var_names:
    assert np.all(exog_data[name] >= 0), '{:}'.format(name)
    exog_data['sqrt ' + name] = np.sqrt(exog_data[name])
    del exog_data[name]

In [None]:
import statsmodels.api as sm
model = sm.GLM(endog_data, exog_data, faimly=sm.families.Binomial())
model_results = model.fit()
print(model_results.summary())
plt.figure(figsize=(12, 9))
plt.hexbin(endog_data, model_results.fittedvalues, bins='log')
plt.xlabel('real data')
plt.ylabel('prediction')
plt.colorbar()
# plt.savefig('real_vs_prediction.png')

global_mask = burned_area_cube.data.mask

ba_predicted = np.zeros_like(global_mask, dtype=np.float64)
ba_predicted[~global_mask] = model_results.fittedvalues
ba_predicted = np.ma.MaskedArray(ba_predicted, mask=global_mask)
fig = data_map_plot(
        ba_predicted,
        name='Predicted Mean Burned Area',
        filename='predicted_mean.png',
        log=True)

ba_data = np.zeros_like(global_mask, dtype=np.float64)
ba_data[~global_mask] = endog_data.values
ba_data = np.ma.MaskedArray(ba_data, mask=global_mask)
fig = data_map_plot(
        ba_data,
        name='Mean observed burned area (GFEDv4)',
        filename='observed_mean_ba.png',
        log=True)

In [None]:
_ = plt.matshow(exog_data.corr())
_ = plt.xticks(range(len(exog_data.columns)), exog_data.columns, rotation='vertical')
_ = plt.yticks(range(len(exog_data.columns)), exog_data.columns)