# 6. Various Plots

<div class="alert alert-block alert-info">
    <b>About:</b>
    This notebook refers to the studies presented in <b>Chapter 5.3 and 5.4</b> of the Ph.D. thesis [3].
    We can not guarantee completeness or correctness of the code.
    If you find bugs or if you have suggestions on how to improve the code, we encourage you to post your ideas as <a href="https://github.com/felixriese/alpaca-processing/issues">GitHub issue</a>.
</div>

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import cross_validate, train_test_split
import sklearn.metrics as me
from scipy.signal import savgol_filter
import pandas as pd
from tqdm.notebook import tqdm

import utils

## Load data

In [None]:
# CHANGE maximum soilmoisture value
max_sm = 40

# CHANGE areas from ["1", "2_1", "2_2", "3", "4", "5"]
areas = ["1", "2_1", "2_2", "3", "4", "5"]
areas_stacked = [["1"], ["2_1", "2_2"], ["3"], ["4"], ["5"]]

In [None]:
color_dict = {
    "1": "tab:blue",
    "2": "tab:green",
    "3": "tab:red",
    "4": "tab:purple",
    "5": "tab:orange",}

In [None]:
# load data
df = pd.read_csv("data/peru_data.csv", index_col=0)

# remove areas which are not used
df = df[df["area"].isin(areas)]

# remove too large soil moisture values
df = df[df["soilmoisture"]<=max_sm]

# define hyperspectral bands
hypbands = utils.getHyperspectralBands(True)
hypbands_all = utils.getHyperspectralBands(False)
# print(hypbands.shape)

# create arrays
X = df[hypbands.astype("str")].values
y = df["soilmoisture"].values

In [None]:
# mask data with too large soil moisture values
mask = y < max_sm
y = y[mask]
X = X[mask]
print(X.shape, y.shape)

## Soil Moisture Histogram

In [None]:
fontsize = 18

fig, ax = plt.subplots(1, 1, figsize=(10, 4))
bins = np.arange(0, max_sm, 1)
n, bins, patches = plt.hist(y, bins=bins)
plt.xlim(0, max_sm)
plt.ylim(0, np.max(n)*1.3)
plt.xlabel("Soil moisture in %", fontsize=fontsize, labelpad=12)
plt.ylabel("Number of datapoints", fontsize=fontsize, labelpad=12)

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)
plt.tight_layout()
plt.savefig("plots/hist_soilmoisture.pdf", bbox_inches="tight")

In [None]:
fontsize = 18

bins = np.arange(0, max_sm, 3)
fig, ax_list = plt.subplots(2, 3, figsize=(12, 8), sharey=True) #, sharex=True)
for i, area in enumerate(areas_stacked):
    ax = ax_list[i//3, i%3]
    
    n, bins, patches = ax.hist(df[df["area"].isin(area)]["soilmoisture"].values,
                                bins=bins, #label="Area "+str(area[0][0]),
                                alpha=1.0, histtype="bar", stacked=True,
                               color=color_dict[str(area[0][0])])

    ax.set_title("Area A"+str(area[0][0]), fontsize=fontsize, y=0.88)
    # ax.set_xlim(0, max_sm)
    # ax.set_ylim(n_rows, 0)

    if i % 3 == 0:
        ax.set_ylabel("Number of datapoints", fontsize=fontsize, labelpad=15)
    #if (i // 3 == 1) or (i == 2):
    ax.set_xlabel("Soil moisture in %", fontsize=fontsize)

    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(fontsize)
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(fontsize)

fig.delaxes(ax_list[-1,-1])
    
plt.tight_layout()
plt.savefig("plots/hist_perarea_soilmoisture.pdf", bbox_inches="tight")

## Spectra per area

In [None]:
fontsize = 18

fig, ax = plt.subplots(1, 1, figsize=(12, 4))
for area in [["1"], ["2_1", "2_2"], ["3_2"], ["4n"], ["5a", "5b"]]:
    # include nans because of gaps, see https://matplotlib.org/examples/pylab_examples/nan_test.html
    area_spectra = df[df["area"].isin(area)][hypbands.astype("str")]
    area_spectra_nan = np.full(shape=(area_spectra.shape[0], 170), fill_value=np.nan)
    indices = [i for i, a in enumerate(hypbands_all) if a in hypbands]
    area_spectra_nan[:, indices] = area_spectra
    
    ax.step(hypbands_all, np.mean(area_spectra_nan, axis=0), label="Area A"+area[0][0], linestyle="solid",
            color=color_dict[area[0][0]])
    # plt.plot(hypbands, np.median(area_spectra, axis=0), label=area, linestyle="dashed")
    # plt.fill_between(hypbands, np.mean(area_spectra) + np.std(area_spectra),
    #                  np.mean(area_spectra) - np.std(area_spectra), alpha=0.2)
ax.legend(ncol=1, fontsize=fontsize, frameon=False, bbox_to_anchor=(1.0, 0.85))
ax.set_xlabel("Wavelength in nm", fontsize=fontsize, labelpad=12)
ax.set_ylabel("Reflectance", fontsize=fontsize, labelpad=12)
ax.set_xlim(800, 2400)

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)
plt.tight_layout()
plt.savefig("plots/spectra_mean_all.pdf", bbox_inches="tight")

## Removed bands

In [None]:
fontsize = 18
leg_factor = 1.0

# remove_bands_full = [0, 1, 49, 50, 51, 52, 53, 98, 99, 100, 101, 102, 103, 104, 105,
#                      106, 107, 108, 109, 110, 111, 154, 155, 156, 157, 158, 159, 160,
#                      161, 162, 163, 164, 165, 166, 167, 168, 169]
remove_bands_full = utils.getIgnoredBands()

area_spectra = df[df["area"] == "1"][hypbands.astype("str")]
area_spectra_nan = np.full(shape=(area_spectra.shape[0], 170), fill_value=np.nan)
indices = [i for i, a in enumerate(hypbands_all) if a in hypbands]
area_spectra_nan[:, indices] = area_spectra

# plot_proc = np.copy(plot_spec)
# plot_proc[remove_bands_full] = 0

fig, ax = plt.subplots(1, 1, figsize=(12, 4))
# ax.plot(hypbands_all, np.mean(area_spectra_nan, axis=0), label="Mean spectrum of area A1")
ax.step(hypbands_all, np.mean(area_spectra_nan, axis=0), color="tab:blue",
        label="Mean spectrum")
ax.errorbar(hypbands_all, np.mean(area_spectra_nan, axis=0),
            yerr=np.std(area_spectra_nan, axis=0), label="Standard deviation",
           fmt="none", color="tab:blue", alpha=0.5)

for i, b in enumerate(remove_bands_full):
    if i == 0:
        ax.axvline(x=hypbands_all[b], c="tab:orange", alpha=1.0, label="Removed bands")
    else:
        ax.axvline(x=hypbands_all[b], c="tab:orange", alpha=1.0)

ax.set_xlim(hypbands_all[0]-20, hypbands_all[-1]+20)
ax.set_ylim(0, 0.6)

ax.set_xlabel("Wavelength in nm", fontsize=fontsize, labelpad=12)
ax.set_ylabel("Reflectance", fontsize=fontsize, labelpad=12)

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(fontsize)

plt.legend(fontsize=fontsize*leg_factor, ncol=1, bbox_to_anchor=(1.0, 0.7), frameon=False)
plt.tight_layout()
plt.savefig("plots/remove_bands.pdf", bbox_inches="tight")