# PlotTrainingData

This notebook creates a huge grid plot of all the covariates we're interested in modeling and analyzing.

In [None]:
import os
import numpy as np
import pandas as pd
import rasterio as rio
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn import metrics

In [None]:
# raw paths
data = os.path.join('..', 'data')
plots = os.path.join('..', 'docs', 'img')
training = os.path.join(data, 'fia_calif_plot_level_climate_model.csv')

# veg data
v_metrics = ['CC', 'CH', 'LF', 'LC']
stats = ['MN', 'VA', 'SK', 'KU']
v_labels = []
for metric in v_metrics:
    for stat in stats:
        v_labels.append(f'{metric}_{stat}')

vegs = [
    'gs://cfo-public/vegetation/California-Vegetation-CanopyCover-2020-Summer-00010m.tif',
    'gs://cfo-public/vegetation/California-Vegetation-CanopyHeight-2020-Summer-00010m.tif',
    'gs://cfo-public/vegetation/California-Vegetation-CanopyLayerCount-2020-Summer-00010m.tif',
    'gs://cfo-public/vegetation/California-Vegetation-LadderFuelDensity-2020-Summer-00010m.tif',
]
veg_vrt = os.path.join(data, 'veg.vrt')

# climate data
c_labels = ['aet', 'aprpck', 'cwd', 'ppt', 'tmn', 'tmx']
clims = [os.path.join(f'{c_label}_sierra_sierra.tif') for c_label in c_labels]
clim_tif = os.path.join(data, 'clim-utm.tif')

# read the training data
df = pd.read_csv(training)

In [None]:
# prep the data
xvars = v_labels + c_labels
yvars = ['BAPA', 'TPA', 'AG_biomass_gm2', 'CARBON_A']
x = df[xvars]
y = df[yvars]

In [None]:
# get the figure row size
ncols = 4
nrows = np.ceil((len(xvars) + len(yvars)) / float(ncols)).astype(int)

# create the figure
fig, axs = plt.subplots(nrows, ncols, figsize=(10, 16))

for i, yvar in enumerate(yvars):
    ymin = np.percentile(df[yvar], 2)
    ymax = np.percentile(df[yvar], 98)
    axs[0, i].hist(
        df[yvar],
        range=(ymin, ymax),
        color='orange',
        edgecolor='black',
        rwidth=1.0,
        align='left',
        label=yvar,
    )
    axs[0, i].set_title(yvar)
    
for j in np.arange(nrows-1):
    for i in np.arange(ncols):
        df_idx = (j * ncols) + i
        if df_idx >= len(xvars):
            axs[j+1, i].set_visible(False)
        else:
            xvar = xvars[df_idx]
            ymin = np.percentile(df[xvar], 2)
            ymax = np.percentile(df[xvar], 98)
            axs[j+1, i].hist(
                df[xvar],
                range=(ymin, ymax),
                color='green',
                edgecolor='black',
                rwidth=1.0,
                align='left',
            )
            axs[j+1, i].set_title(xvar)

fig.tight_layout()

plt.savefig(os.path.join(plots, 'CovariateHistograms.png'), dpi=200)