## _GeoCryoAI Codebase_
#### Journal: Environmental Research Letters Manuscript (ERL-116349)
#### Title: Investigating Permafrost Carbon Dynamics in Alaska with Artificial Intelligence
#### Author: Bradley A. Gay
#### Date: 08/20/2022 (Updated: 10/01/2023)

# Step 1: Load Libraries and Functions

In [1]:
import os, sys, datetime, re, glob, requests, warnings, intake
#
import bs4, eofs, pyts, pyarrow, polars, pickle, math, time, cftime, tqdm
import numpy as np
import pandas as pd
import xarray as xr
#
import progressbar, codecs, netrc, zipfile, getpass
#
from datetime import datetime
from datetime import datetime as dt
from datetime import timedelta
#
from numpy import isnan, array, count_nonzero
from pandas import read_csv, DataFrame, concat
from itertools import groupby, islice
from operator import itemgetter
from pathlib import Path
#
#
import IPython
import ipywidgets as widgets
from IPython.display import display
#
import dask
import dask.dataframe as dd
import dask.array as da
from dask.distributed import Client
import dask_ml
from dask_ml.model_selection import train_test_split
from dask.diagnostics import ProgressBar
import vaex
import vaex.ml
import vaex.ml.tensorflow
import flox
#
import torch
import tensorflow as tf
import tensorflow.compat.v1 as tf
# import tensorflow_addons as tfa
# from tensorflow_addons.metrics.r_square import RSquare
from tensorflow.keras import utils
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import *
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.utils import conv_utils, tf_utils
import tensorboard
tensorboard.__version__
# Clear any logs from previous runs
#rm -rf ./logs/
# Load the TensorBoard notebook extension.
%load_ext tensorboard
#
from tensorflow import keras
import tensorflow.keras.backend as K
import keras.backend as K
import keras.optimizers
from keras import *
from keras import layers, optimizers, models
from keras import backend as K
from keras.layers import *
from keras.layers import Masking, Dense, InputLayer, Dropout, Flatten, BatchNormalization, Conv1D, Conv2D, Conv3D, \
Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, Bidirectional, MaxPool1D, MaxPool2D, MaxPool3D, Reshape
from keras.preprocessing import *
from keras.models import Model, Sequential
from keras.activations import swish, elu, gelu, selu, sigmoid, relu, tanh, linear, softmax, swish
from keras.utils import plot_model
import keras_tuner
from keras_tuner import *
from keras_tuner import HyperModel, HyperParameters, RandomSearch, BayesianOptimization, Hyperband
from keras.callbacks import ReduceLROnPlateau
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience = 2, verbose=1,factor=0.75, min_lr=0.00001)
#
import sklearn, statsmodels, pyrsgis
import arch
import arch.unitroot
import scipy.signal as signal
from scipy.stats import pearsonr
#
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as col
import matplotlib.cm as cm
%matplotlib inline
mpl.rcParams['agg.path.chunksize'] = 10000
import seaborn as sns
import graphviz
#
import csv, io, h5py
import cv2 as cv
import netCDF4 as nc
import geopandas as gpd
import rioxarray as rxr
import spectral, pydot, pydotplus
import earthpy as et
import rasterio as rio
#
import xbatcher, tiledb
import buteo as beo
#
from arch.unitroot import ADF
from arch.unitroot import KPSS
#
from pyrsgis import raster
from shapely.geometry import box, mapping
from pywaffle import Waffle
from eofs.standard import Eof
#
import statsmodels.tsa.stattools as tsa
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
#
from sklearn import linear_model
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, PowerTransformer, Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, accuracy_score, confusion_matrix, \
ConfusionMatrixDisplay, consensus_score, explained_variance_score,r2_score, roc_auc_score, roc_curve, jaccard_score, \
mean_squared_error, nan_euclidean_distances, precision_score
#
from spectral import *
from libpysal.weights import lat2W
from esda.moran import Moran
from rasterio.enums import Resampling
from pyrsgis.ml import array_to_chips
from shapely import geometry
from rasterio.mask import mask
from pyrsgis import ml
from pyrsgis import ml, raster, convert
from dateutil.rrule import DAILY,rrule
from time import gmtime, strftime
from netCDF4 import Dataset
from rasterio.plot import show
from rasterio.plot import show_hist
from osgeo import gdal 
from tqdm import tqdm
from packaging import version
from skimage import data, io 
from skimage.color import rgb2gray
from dataclasses import dataclass
from typing import Iterable
from xbatcher import BatchGenerator
from xbatcher.loaders.torch import IterableDataset, MapDataset

print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

# Prettier plotting with seaborn
sns.set(font_scale=1.5, style="white")
# 1. Initialize the notebook using the necessary libraries

#tf.disable_v2_behavior()
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Suppress warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore", category=RuntimeWarning)
#os.environ['HDF5_USE_FILE_LOCKING']='FALSE'

ProgressBar().register()

Using TensorFlow backend
TensorFlow version:  2.13.0


In [None]:
### FUNCTIONS ###

def divisorGenerator(n):
    large_divisors = []
    for i in range(1, int(math.sqrt(n) + 1)):
        if n % i == 0:
            yield i
            if i*i != n:
                large_divisors.append(n / i)
    for divisor in reversed(large_divisors):
        yield divisor

def compute_vif(features):
    X = features.iloc[:,:]
    X['intercept'] = 1
    vif = pd.DataFrame()
    vif["Variable"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    vif = vif[vif['Variable']!='intercept']
    return vif

def series_to_supervised(data, lags = 1, forecasting_steps = 1, dropna=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    for i in range(lags, 0, -1):
        cols.append(df.shift(i))
        names += [(df.columns[j], str('t-%d') %  i) for j in range(n_vars)]
    for a in range(0, forecasting_steps):
        cols.append(df.shift(-a))
        if a == 0:
            names += [(df.columns[b], str('t')) for b in range(n_vars)]
        else:
            names += [(df.columns[b], str('t+%d') %  a) for b in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg = agg.loc[:,~agg.columns.duplicated()]
    if dropna:
        agg.dropna(inplace=True)
    return agg

def create_dataset(dataset, look_back=3):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)

# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

def calculate_pvalues(df):
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            tmp = df[df[r].notnull() & df[c].notnull()]
            pvalues[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)
    return pvalues

def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(dftest[0:4], index=["Test Statistic", "p-value", "#Lags Used", "Number of Observations Used"])
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"])
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

#### Archived

In [None]:
# import tensorflow as tf
# gpus = tf.config.experimental.list_physical_devices('GPU')
# for gpu in gpus:
#   tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# # #batch size
# # #available GPU memory bytes / 4 / (size of tensors + trainable parameters)
# # #16000 / 4 / (12 + 1)
# # #~256

# # # tf.compat.v1.disable_eager_execution()
# # # tf.config.run_functions_eagerly(False)
# # #tf.compat.v1.disable_eager_execution()

# import tensorflow as tf
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))#toggle to -1 to disengage GPU; or GPU to engage
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#     try:
# # Currently, memory growth needs to be the same across GPUs
#         for gpu in gpus:
#             tf.config.experimental.set_memory_growth(gpu, True)#toggle to False to disengage GPU; or True to engage
#             logical_gpus = tf.config.experimental.list_logical_devices('GPU')#toggle to -1 to disengage GPU; or GPU to engage
#             print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
#     except RuntimeError as e:
# # Memory growth must be set before GPUs have been initialized
#         print(e)

In [None]:
# with tf.device('/device:GPU:0'):

# Step 2: Load In Situ Dataframe (1m)

In [None]:
#file = os.path.join('/Users/bgay/Downloads/data/df_ak.pkl')
file = os.path.join('/Users/bgay/Desktop/Research/Manuscripts/M1/ERL/manuscript/data.pkl')
with open(file, 'rb') as f:
    df = pickle.load(f)

# file=os.path.join('/Users/bgay/Desktop/data/alt_ak.csv')
# df=pd.read_csv(file)

In [None]:
df.index=df['DATE']
df.index.name = None
df=df.sort_index()
df=df.replace(-9999,np.nan)
df.rename(columns = {'CH4_MIXING_RATION_MEAN':'CH4_MR_MEAN'}, inplace = True)
df.rename(columns = {'CH4_MIXING_RATION_STD':'CH4_MR_STD'}, inplace = True)
df=df.drop(df.columns[0:5],axis=1)
df=df.drop(df.columns[44],axis=1)
df=df.dropna(axis=0,how='all')
df.index=pd.to_datetime(df.index,format='%Y')
nointdf=df

In [None]:
#Imputer
df2=df.copy()
numeric_cols=df2.columns.values
for col in numeric_cols:
    missing = df2[col].isnull()
    num_missing = np.sum(missing)
    if num_missing > 0:  # only do the imputation for the columns that have missing values.
        #print('imputing missing values for: {}'.format(col))
        df2['{}_ismissing'.format(col)] = missing
        #med = df[col].median()
        df2[col] = df2[col].fillna(method='ffill').fillna(method='bfill')

df2=df2.drop(df2.iloc[:,-91:],axis=1)

#### Archived

In [None]:
#plt.plot(df['ALT'].groupby(df.index).nunique().resample('Y').mean().reset_index(level=0, drop=True)#.interpolate(method='cubic_spline'))
plt.plot(df['ALT'].groupby(df.index).resample('A').mean().reset_index(level=0, drop=True))
plt.plot(df['ALT'].groupby(df.index).resample('A').median().reset_index(level=0, drop=True))
#plt.plot(df['ALT'].groupby(df.index).resample('M').mean().reset_index(level=0, drop=True))
#plt.plot(df['ALT'].groupby(df.index).resample('M').median().reset_index(level=0, drop=True))
#plt.plot(df['ALT'].groupby(df.index).resample('D').mean().reset_index(level=0, drop=True))
#plt.plot(df['ALT'].groupby(df.index).resample('D').median().reset_index(level=0, drop=True))
plt.show()

In [None]:
# plt.plot(df['ALT'].groupby(df.index).nunique().resample('Y').mean().reset_index(level=0, \
# drop=True).dropna().interpolate(method='cubicspline'))

In [None]:
# print(df['ALT'].groupby(df.index).resample('A').mean().reset_index(level=0, drop=True).var())
# print(df['ALT'].groupby(df.index).resample('A').mean().reset_index(level=0, drop=True).std())
# print(df['ALT'].groupby(df.index).resample('A').median().reset_index(level=0, drop=True).var())
# print(df['ALT'].groupby(df.index).resample('A').median().reset_index(level=0, drop=True).std())

In [None]:
# prng = pd.period_range("1969", "2022", freq="A-DEC")
#df['ALT'].to_period(freq='Y')#.to_timestamp()

In [None]:
import scipy.signal as signal
dfdet=signal.detrend(df['ALT'].dropna(), type='constant')

plt.plot(signal.detrend(df['ALT'].dropna(), type='constant'))
plt.plot(df['ALT'].dropna().values)
plt.plot(df['ALT'].dropna().rolling(window=100, win_type="gaussian").mean(std=0.1).values)
#plt.plot(df['ALT'].rolling(window=100, win_type="gaussian").mean(std=0.1).dropna().values)

In [None]:
plt.plot(df.groupby(df.index.year).resample('A').mean().reset_index(level=0, drop=True).loc['2003':'2021','CO2_1_1_1'])
plt.plot(df.groupby(df.index.year).resample('A').median().reset_index(level=0, drop=True).loc['2003':'2021','CO2_1_1_1'])

In [None]:
plt.plot(df.groupby(df.index.year).resample('A').mean().reset_index(level=0, drop=True).loc['2011':,'FCH4_1'])
plt.plot(df.groupby(df.index.year).resample('A').median().reset_index(level=0, drop=True).loc['2011':,'FCH4_1'])

In [None]:
plt.plot(df['ALT'].interpolate())
plt.plot((df['ALT'].replace(-9999,np.nan).ffill()).bfill(),alpha=0.5)#fillna(method='bfill'))

In [None]:
# df=df.replace(np.nan, -9999)
# df=df.loc[df.iloc[:,15] > -20103115.48]
# df=df.loc[df.iloc[:,15] > -19632788.15]
# df=df.loc[df.iloc[:,15] < 6.69e+19]
# df=df.loc[df.iloc[:,15] < 7.29e+16]
# df=df.loc[df.iloc[:,15] < 8817006896.0]
# df=df.loc[df.iloc[:,15] > -19632788.15]
# df=df.loc[df.iloc[:,19] < 733410.482]
# df=df.replace(-9999,np.nan)

In [None]:
dfdet=signal.detrend(df2[].dropna().values, type='constant')

In [None]:
plt.plot(df2['ALT'].dropna().values)
plt.plot(dfdet)
plt.plot(df2['ALT'].dropna().rolling(window=100, win_type="gaussian").mean(std=0.1).values, alpha=0.4)
#plt.plot(df['ALT'].rolling(window=100, win_type="gaussian").mean(std=0.1).dropna().values)

In [None]:
plt.plot(df['ALT'].values)

In [None]:
#print(df.columns.values.tolist())

In [None]:
alt=pd.DataFrame(df.iloc[:,-1]).replace(-9999,np.nan).dropna().sort_index() #98753

In [None]:
plt.plot(alt.values)

In [None]:
ch4=pd.DataFrame(df.iloc[:,19]).replace(-9999,np.nan).dropna().sort_index() #212642

In [None]:
plt.plot(ch4.values)

In [None]:
co2=pd.DataFrame(df.iloc[:,7]).replace(-9999,np.nan).dropna().sort_index() #547096

In [None]:
plt.plot(co2.values)

In [None]:
sns.distplot(df.iloc[:,-1].replace(-9999,np.nan).dropna())

In [None]:
sns.distplot(df.iloc[:,-1])

In [None]:
plt.plot(df.iloc[:,-1].values)

In [None]:
#df.describe().transpose()

In [None]:
# file = os.path.join('/Users/bradleygay/Downloads/df_pre-clean.pkl')
# with open(file, 'wb') as f:
#     pickle.dump(df,f)
file = os.path.join('/Users/bgay/Downloads/df_clean.pkl')
with open(file, 'wb') as f:
    pickle.dump(df,f)

In [None]:
file = os.path.join('/Users/bgay/df_clean.pkl')
with open(file, 'rb') as f:
    df=pickle.load(f)

In [None]:
altdf=pd.DataFrame(df['ALT'].replace(-9999,np.nan).dropna().sort_index())
altdf2=pd.DataFrame(df2['ALT'].replace(-9999,np.nan).dropna().sort_index())

In [None]:
plt.plot(altdf.values)

In [None]:
plt.plot(altdf2.values)

In [None]:
co2df=pd.DataFrame(df['CO2_1_1_1'].replace(-9999,np.nan).dropna().sort_index())
co2df2=pd.DataFrame(df2['CO2_1_1_1'].replace(-9999,np.nan).dropna().sort_index())

In [None]:
plt.plot(co2df.values)

In [None]:
plt.plot(co2df2.values)

In [None]:
ch4df=pd.DataFrame(df['FCH4_1'].replace(-9999,np.nan).dropna().sort_index())
ch4df2=pd.DataFrame(df2['FCH4_1'].replace(-9999,np.nan).dropna().sort_index())

In [None]:
plt.plot(ch4df.values)

In [None]:
plt.plot(ch4df2.values)

In [None]:
# ds=df.to_xarray()
# ds.groupby('index').min()

# Step 3: VIF

## After removing a feature, compute VIF values iteratively.

In [None]:
features = df
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('GPP',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_3_4',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_3_6',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_1_1_3',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_3_5',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_2_6',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_3_3',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_2_3',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('NEE',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_3_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CH4_1_2_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_2_4',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('RECO',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CO2_STD',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('H2O_MEAN',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('SWC_1_2_4',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_2_5',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('SWC_1_1_4',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_STD',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CH4_STD',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TA_1_4_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CO2_1_2_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TS_2_2_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('soil_[CO2]_25cm',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('SWC_1_2_6',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CH4_1_1_2',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('WD_STD',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('LW_OUT',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('SWC_1_2_2',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('WS_STD',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CH4_1_4_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('LAI',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('TA_1_2_1',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('SWC_1_1_3',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features=features.drop('CO2_MEAN',axis=1)
compute_vif(features).sort_values('VIF', ascending=False)

In [None]:
features

In [None]:
#'CO2_MEAN,'TA_1_2_1','SWC_1_1_3' from original dataset to acknowledge potential multicollinearity issues VIF<4.

In [None]:
filepath = Path('/Users/bgay/Downloads/data_ak_vif.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
features.to_csv(filepath)

In [None]:
file = os.path.join('/Users/bgay/Downloads/data_ak_vif.csv')
vifdf = pd.read_csv(file)

In [None]:
vifdf=features

In [None]:
vifdf.index=df.index
vifdf.index.name = None
vifdf=vifdf.sort_index()

In [None]:
#vifdf=vifdf.drop(vifdf.columns[-1], axis=1) #Get rid of rightmost 'intercept' column (1's) from previous VIF method

In [None]:
print(vifdf.columns.values.tolist())

In [None]:
vifdf.replace(-9999,np.nan).describe()

In [None]:
# df=df.drop(columns='SWC_DEPTH', axis=1)
# df=df.drop(columns='CH4_MEAN', axis=1)
# df=df.drop(columns='CH4_MIXING_RATIO_MEAN', axis=1)
# df=df.drop(columns='CH4_MIXING_RATIO_STD', axis=1)
# df=df.drop(columns='SOIL_PCAR', axis=1)
# df=df.drop(columns='TS_MEAN', axis=1)

## Filter dataframe with VIF array

In [None]:
vif_list=['WS_MEAN', 'WD_MEAN', 'CO2_1_1_1', 'CO2_1_3_1', 'CO2_1_4_1', 'soil_[CO2]_5cm', 'soil_[CO2]_15cm', 
'CH4_MEAN', 'CH4_1_1_1', 'CH4_1_3_1', 'FCH4_1', 'FCH4_2', 'FCH4_3', 'CH4_MR_MEAN', 
'CH4_MR_STD', 'TA_1_1_1', 'TA_1_3_1', 'TA_1_5_1', 'H', 'LE', 'SW_IN_MEAN', 'SW_IN_STD', 'SW_IN', 
'SW_OUT', 'LW_IN', 'NETRAD', 'G_1_1_1', 'G_1_1_2', 'G_1_1_3', 'G_1_1_4', 'G_1_1_5', 'TS_MEAN', 'TS_1_1_1', 
'TS_1_1_2', 'TS_2_2_2', 'TS_2_3_2', 'P', 'H2O_STD', 'H2O', 'FC', 'SWC_DEPTH', 'SWC_1_1_1', 'SWC_1_1_2', 
'SWC_1_1_5', 'SWC_1_1_6', 'SWC_1_2_1', 'SWC_1_2_3', 'SWC_1_2_5', 'WTD_1_1_1', 'WTD_2_1_1', 'WTD_3_1_1', 
'TW_1_1_1', 'TW_2_1_1', 'D_SNOW', 'SOIL_PCAR', 'ALT']

In [None]:
nointdf=nointdf[vif_list].dropna(axis=0,how='all')#.drop_duplicates()

In [None]:
df2=df2.loc[:,vif_list]
#df4=df2.loc[:,vif_list].drop_duplicates().dropna(axis=0,how='all')

In [None]:
nointdf=nointdf.replace(-9999,np.nan).dropna(axis=0,how='all')
df2=df2.replace(-9999,np.nan).dropna(axis=0,how='all')

#### Archived

In [None]:
plt.plot(nointdf_2.loc['2011':'2021','ALT'].dropna().values)

In [None]:
plt.plot(df4.loc['2011':'2021','ALT'].dropna().values)

In [None]:
plt.plot(nointdf_2.loc['2011':'2021','FCH4_1'].dropna().values)

In [None]:
plt.plot(df4.loc['2011':'2021','FCH4_1'].dropna().values)

In [None]:
plt.plot(nointdf_2.loc['2011':'2021','CO2_1_1_1'].dropna().values)

In [None]:
plt.plot(df4.loc['2011':'2021','CO2_1_1_1'].dropna().values)

In [None]:
#df3.describe().T

In [None]:
#df4.describe().T

In [None]:
#df[~((df>qdf.loc[high]) & (df<qdf.loc[high]))].dropna()

In [None]:
#df.memory_usage(deep=True).sum()
#features.memory_usage(deep=True).sum()
#1113205440/1796752640
#1-0.6195652173913043
#38.043% data compression (683.547MB)

In [None]:
# from statsmodels.stats.outliers_influence import variance_inflation_factor
  
# # the independent variables set
# X = df2.values
  
# # VIF dataframe
# vif_data = pd.DataFrame()
# vif_data["feature"] = X.columns
  
# # calculating VIF for each feature
# vif_data["VIF"] = [variance_inflation_factor(X.values, i)
#                           for i in range(len(X.columns))]
  
# print(vif_data)

# Step 4: Correlations

In [None]:
highCorrList =[]
corrDFCorr = nointdf.corr()
columns = nointdf.columns

for i in range(0, 10):
    for j in range(i+1, 10):
        tempCorr = corrDFCorr.iloc[i, j]
        if (tempCorr >= 0.5 and tempCorr < 1) or (tempCorr < 0 and tempCorr <= -0.5):
            highCorrList.append([tempCorr, i, j])

highCorrList = sorted(highCorrList, key=lambda x: -abs(x[0]))

for x, y, z in highCorrList:
    print("{0} , {1} : {2}".format(columns[y], columns[z], x))

In [None]:
highCorrList =[]
corrDFCorr = df2.corr()
columns = df2.columns

for i in range(0, 10):
    for j in range(i+1, 10):
        tempCorr = corrDFCorr.iloc[i, j]
        if (tempCorr >= 0.5 and tempCorr < 1) or (tempCorr < 0 and tempCorr <= -0.5):
            highCorrList.append([tempCorr, i, j])

highCorrList = sorted(highCorrList, key=lambda x: -abs(x[0]))

for x, y, z in highCorrList:
    print("{0} , {1} : {2}".format(columns[y], columns[z], x))

In [None]:
#vifdf.max()
#WS_MEAN,WD_MEAN,WTD_1_1_1, TS_MEAN, TS_1_1_2, SW_IN_MEAN, CH4_MIXING_RATIO_MEAN, FCH4_1,CH4_1_3_1,CO2_1_3_1

In [None]:
#pd.DataFrame(nointdf_2['ALT'].dropna()) #1969-2022
#pd.DataFrame(nointdf_2['CO2_1_1_1'].dropna()) #2003-2021
#pd.DataFrame(nointdf_2['FCH4_1'].dropna()) #2011-2022

In [None]:
#sns.color_palette("hls", 56)

In [None]:
corrdf=nointdf.iloc[:,:].corr()
plt.figure(figsize = (30, 25))
#legend=sns.color_palette("hls", 94)
sns.heatmap(corrdf, cmap='icefire', vmax=0.8, robust=True, square=True, linewidths=0.45, linecolor='black', annot=False)
#plt.savefig('dfnodups_24Sep23.svg', dpi=1000, bbox_inches='tight')

In [None]:
corrdf=df4.iloc[:,:].corr()
plt.figure(figsize = (30, 25))
#legend=sns.color_palette("hls", 94)
sns.heatmap(corrdf, cmap='icefire', vmax=0.8, robust=True, square=True, linewidths=0.45, linecolor='black', annot=False)
#plt.savefig('dfnodups_24Sep23.svg', dpi=1000, bbox_inches='tight')

In [None]:
# sns.pairplot(dups)
# plt.show()

## Statistics

In [None]:
#df #91 features (-9999)
#df3 #56 features (-9999)
#df2 #91 features
#df4 #56 features

In [None]:
newdf=df2.copy()

In [None]:
q1 = newdf.quantile(0.25,interpolation='nearest')
q3 = newdf.quantile(0.75,interpolation='nearest')
iqr = q3 - q1
out_low = q1 - 1.5 * iqr
out_high = q3 + 1.5 * iqr
cols=newdf.columns.tolist()
newdf2=newdf[~((newdf < (out_low)) | (newdf > (out_high))).any(axis=1)]

In [None]:
# #dic(dic['WS_MEAN']==True)]
# #filter(dic.get, dic)
# dic2={}
# for key, value in dic.items():
#     if value == True:
#         dic2[key] = value

# df[(df>qdf.loc[low,df.columns]) & (df < qdf.loc[high,df.columns])]
# filtdf=df.apply(lambda x: df[(df>qdf.loc[low,df.columns]) & (df < qdf.loc[high,df.columns])], axis=0)
# filtdf.dropna(inplace=True)
# print(filtdf.head())

#print(df[~((df < (q1 - 1.5 * iqr)) | (df > (q3 + 1.5 * iqr))).any(axis=1)==False].dropna())
#df[~((df < (q1 - 1.5 * iqr)) |(df > (q3 + 1.5 * iqr))).any(axis=1)]

#df[((df < out_low) | (df > out_high))==False].dropna().describe().T

# cols=df.columns.tolist()
# q=df[cols[0]].quantile([0.05, 0.95]).values
# newdf=df[((df[cols[0]] > q[0]) & (df[cols[0]] < q[1]))]
# for col in cols[1:]:
#     q=df[col].quantile([0.05, 0.95]).values
#     newdf.join(df[((df[col] > q[0]) & (df[col] < q[1]))], how='inner')

#df[((df < out_low) | (df > out_high))]#==False].dropna()
#df[((df < out_low) | (df > out_high))]
#np.sum((df < out_low) | (df > out_high))
#df[~((df < out_low) | (df > out_high)).any(axis=1)#==False].dropna().describe().T
#df[((df < out_lowb) & (df > out_highb))==False].dropna()#.describe().T
#df[~(((df >= (out_low)) & (df <= (out_high))).all(axis=1))]

#out_low
#df < out_low
#np.sum(df < out_low)

#(df < out_low) | (df > out_high)
#np.sum((df < out_low) | (df > out_high))

# dic={}
# for col in df.columns.values:
#     dic[col]=(df[col] < out_low[col]) | (df[col] > out_high[col])

#(ex[cols[0]] < out_low[cols[0]]) | (ex[cols[0]] > out_high[cols[0]])
#ex[((ex[cols[0]] == False) & (ex[cols[0]] == False))]

#ex[((ex[cols[0]] == False))]

#ex=ex[(ex['outliers_tukey'] == False)]
#ex=ex.drop(columns=['outliers_tukey'])
#ex.describe().T.head(10)

#plt.plot(ex.groupby(ex.index).transform(np.mean)['ALT'])
#plt.plot(ex.groupby(ex.index).transform(np.mean)['CO2_1_1_1'])
#plt.plot(ex.groupby(ex.index).transform(np.mean)['FCH4_1'])
#sns.boxplot(data=ex.groupby(ex.index).transform(np.mean)['ALT'], orient="v", palette='viridis')
#sns.boxplot(data=ex['ALT'], orient="v", palette='viridis')
#sns.displot(kind='kde',data=ex.groupby(ex.index).transform(np.mean)['ALT'])
#sns.displot(kind='kde',data=ex.groupby(ex.index).transform(np.mean)['CO2_1_1_1'])
#sns.displot(kind='kde',data=ex.groupby(ex.index).transform(np.mean)['FCH4_1'])
#plt.show()

# extr=ex.groupby(ex.index).transform(np.mean)
# extr.describe().T.head(10)

#ex.loc[~ex.loc[:,out[1]]]
#ex.loc[~((ex.loc[:,out[-2]] & ex.loc[:,out[-1]]))]
#ex.loc[:,(ex.iloc[:,-1]).values]
#ex[~(ex.loc[:,out[col]] == False) | (ex.loc[:,out[col]] == False)].value_counts(out[-1])

# def qdf(s,k=1.5,return_thresholds=False):
#     q25, q75 = np.percentile(s, 5), np.percentile(s, 95)
#     #q25, q75 = df.quantile(0.05,interpolation='nearest'), df.quantile(0.95,interpolation='nearest')
#     iqr = q75 - q25
#     cut_off = iqr * k
#     lower, upper = q25 - cut_off, q75 + cut_off
#     if return_thresholds:
#         return lower, upper
#     else:
#         return [True if x < lower or x > upper else False for x in s]

#iqr1 = df.apply(qdf)

In [None]:
ex=newdf2.copy()
cols=ex.columns.tolist()
for col in cols:
    ex[col+'_outliers_tukey'] = (ex[col] < out_low[col]) | (ex[col] > out_high[col])

In [None]:
out=ex.columns[-56:].tolist()

In [None]:
ex.drop(columns=out, axis=1)['ALT'].dropna()

In [None]:
pd.DataFrame(ex[((ex[cols[1]] == False)).dropna()].all(axis=1))#.dropna()

In [None]:
for col in cols:
    ex[((ex[cols] == False)).dropna()].all(axis=1)#.dropna()

In [None]:
out=ex.columns[-53:].tolist()
for col in out:
    ex2=ex[(ex[col]==False).dropna()]

ex2.drop(columns=out, axis=1, inplace=True)
ex2.describe().T.head(10)

In [None]:
cols=ex.columns.tolist()
for col in cols:
    lower_quantile, upper_quantile = df[col].quantile([.25, .75])
    df.loc[(df[col] > lower_quantile) & (df[col] < upper_quantile)].head()

In [None]:
from statsmodels.tsa.seasonal import MSTL
stl_kwargs = {"seasonal_deg": 0} 
model = MSTL(ex2.ALT, periods=(54), stl_kwargs=stl_kwargs)
res = model.fit()
seasonal = res.seasonal # contains both seasonal components
trend = res.trend
residual = res.resid

In [None]:
plt.figure(figsize=(10,6))
plt.plot(ex2.ALT.values,label='ALT')
plt.plot(seasonal.values,label='seasonal')
plt.plot(trend.values,label='trend')
plt.plot(residual.values,label='residual')
plt.legend()
plt.show()

In [None]:
len(ex.ALT.replace(0,np.nan).dropna())/11
#len(ex.CO2_1_1_1.replace(0,np.nan).dropna())/54
#len(ex.FCH4_1.replace(0,np.nan).dropna())/54

In [None]:
seasonal_decompose(ex2.ALT.replace(0,np.nan).dropna(), model='yearly', extrapolate_trend='freq', period=10).plot()#period=45091).plot()
plt.show()

In [None]:
seasonal_decompose(ex2.CO2_1_1_1.replace(0,np.nan).dropna(), model='yearly', extrapolate_trend='freq', period=10).plot()#period=45156).plot()
plt.show()

In [None]:
seasonal_decompose(ex2.FCH4_1.replace(0,np.nan).dropna(), model='yearly', extrapolate_trend='freq', period=10).plot()#period=45091).plot()
plt.show()

In [None]:
plot_acf(ex2.ALT,lags= 30)
plt.show()

In [None]:
plot_pacf(ex2.ALT, lags=30)
plt.show()

In [None]:
#The dataset is highly non-stationary as can be seen from the ACF and PACF plots.

In [None]:
ex2

In [None]:
ex2.shape

In [None]:
#pd.DataFrame(ex.describe().T).head(50)
#pd.DataFrame(df.describe().T).tail(50)
#ex.describe().T.head(10)

In [None]:
#	        	count		mean		std			min				25%			50%			75%			max
#WS_MEAN	    2441240.0	-331.962269	1250.621935	-7999.180000	1.043000	2.730000	5.181400	359.999000
#WS_STD	    	2441240.0	887.473228	1927.005984	0.000000		1.001022	1.198322	1.198322	5784.826406
#WD_MEAN    	2441240.0	-414.366847	1695.705661	-7999.194729	59.598000	121.972700	227.479000	360.000000
#WD_STD	    	2441240.0	1141.633877	2187.161895	0.000000		9.522521	15.148940	15.148940	5974.249502
#CO2_MEAN		2441240.0	-944.158394	2724.131753	-9055.249031	400.366661	413.442133	413.442133	666.757231
#CO2_STD	   	2441240.0	948.961290	1917.391558	0.000000		2.200290	4.849716	4.849716	5527.841799
#CO2_1_1_1		2441240.0	377.720295	106.708493	-7.945502		380.989908	399.500000	417.500000	986.000000
#CO2_1_2_1		2441240.0	407.034613	27.799666	320.116000		391.019095	413.971332	414.360631	775.028400
#CO2_1_3_1		2441240.0	409.444220	25.439992	316.768000		393.998469	413.773026	418.861556	755.544795
#CO2_1_4_1		2441240.0	420.343604	48.551287	-4.663500		398.251900	412.041100	430.183700	899.182400
#soil_[CO2]_5cm	2441240.0	888.149674	186.787902	100.100000		840.000000	942.000000	942.000000	5963.000000
#soil_[CO2]_15cm2441240.0	947.248324	322.106599	100.200000		607.400000	1167.000000	1167.000000	5938.000000
#soil_[CO2]_25cm2441240.0	1162.015365	550.594556	-9.300000		562.400000	1605.000000	1605.000000	6016.000000
#CH4_MEAN	    2441240.0	8.658754	1.247081	-0.484000		8.986000	8.986000	8.986000	62.935000
#CH4_STD	   	2441240.0	0.060128	0.010965	0.001000		0.065000	0.065000	0.065000	0.150000
#CH4_1_1_1		2441240.0	1896.517305	481.218456	-25.765400		1899.480259	1974.101588	2081.330000	6783.800000
#CH4_1_2_1		2441240.0	1900.530605	55.243138	1010.941348		1885.611720	1906.542170	1920.939435	3119.338807
#CH4_1_3_1		2441240.0	1902.909639	59.669274	1000.507117		1883.295314	1902.294335	1921.933575	3124.718378
#CH4_1_4_1		2441240.0	1904.101243	55.481003	1023.379951		1892.642250	1912.788370	1921.545445	3122.626322
#CH4_1_1_2		2441240.0	2002.534057	69.560109	1519.870000		1944.000000	1992.000000	2065.000000	2969.000000
#FCH4_1	    	2441240.0	47.586074	228.989749	-2255.044730	-0.485019	1.201585	10.149789	1805.150000
#FCH4_2	    	2441240.0	0.396615	0.502794	-2.798230		0.000127	0.006140	0.952362	4.517780
#FCH4_3	    	2441240.0	0.521419	0.335826	-2.705823		0.508771	0.508771	0.673438	7.846163
#CH4_RATIO_MEAN	2441240.0	2051.293488	512.171326	-7606.679992	2098.635200	2098.635200	2098.635200	3083.678345
#CH4_RATIO_STD	2441240.0	42.017689	508.251702	0.000000		0.000000	0.000000	0.000000	7243.801479
#TA_1_1_1		2441240.0	-2.465523	14.871898	-46.610000		-12.510752	-1.766000	8.796862	49.050000
#TA_1_2_1		2441240.0	-8.196320	14.990450	-47.310905		-20.874000	-11.145001	2.817000	33.095001
#TA_1_3_1		2441240.0	-6.669106	14.556445	-48.199076		-17.393300	-9.752896	3.623632	33.622998
#TA_1_4_1		2441240.0	-8.417416	15.854576	-50.411146		-20.321560	-10.182999	2.998077	34.726747
#TA_1_5_1		2441240.0	-5.550884	5.371307	-30.904200		-8.888400	-8.888400	-0.947300	21.493200
#H	        	2441240.0	12.508008	51.296174	-490.227957		-9.814500	-0.941325	16.095887	788.152896
#LE	        	2441240.0	18.472419	36.316575	-378.790000		-0.233658	2.075250	20.405000	577.900000
#SW_IN_MEAN		2441240.0	36.829852	219.248627	-6667.313333	-0.300000	-0.300000	1.695000	931.296667
#SW_IN_STD		2441240.0	10.036924	222.459307	0.000000		1.011484	1.011484	2.184960	7585.205152
#SW_IN	    	2441240.0	109.703758	172.364710	-14.190000		0.000000	14.100000	154.621442	947.125621
#SW_OUT	    	2441240.0	33.252744	71.092313	-13.120000		0.226913	5.080000	31.240000	860.703593
#LW_IN	    	2441240.0	269.742140	56.167293	106.000000		228.000000	284.100000	314.705652	487.349048
#LW_OUT	    	2441240.0	298.601912	60.539794	135.600000		249.805618	303.213841	338.000000	539.200000
#NETRAD	    	2441240.0	33.688920	100.038253	-611.138000		-15.004500	-0.883984	37.402250	795.030000
#G_1_1_1 		2441240.0	1.990315	12.330120	-172.078500		-3.947000	-0.577845	4.574700	296.700000
#G_1_1_2 		2441240.0	0.638221	11.197455	-72.040000		-5.126286	-1.489326	4.730000	360.300000
#G_1_1_3 		2441240.0	1.763595	11.231484	-63.300600		-3.435000	-0.206000	4.553000	318.800000
#G_1_1_4 		2441240.0	1.829016	13.570547	-71.250000		-5.172555	-0.928000	4.182965	223.600000
#G_1_1_5		2441240.0	0.551955	0.712206	0.233000		0.389000	0.389000	0.389000	3.737000
#TS_MEAN	    2441240.0	-325.164645	844.372255	-9776.826378	-1.328250	-0.315711	-0.315711	18.745000
#TS_STD	    	2441240.0	601.043140	1418.530796	0.000000		0.549789	0.549789	1.935710	5063.031841
#TS_1_1_1		2441240.0	-1.092178	7.629765	-42.996000		-4.301000	-1.017000	3.493000	51.823000
#TS_1_1_2		2441240.0	-4.207983	157.221466	-7999.000000	-3.208000	-1.043000	2.531000	45.254000
#TS_1_1_3		2441240.0	-8.491679	157.268365	-7999.000000	-14.902600	-2.707700	3.506200	44.461000
#TS_2_2_1		2441240.0	-4.732272	7.098692	-26.425000		-10.180300	-6.178000	0.522000	36.236000
#TS_2_2_2		2441240.0	-6.621048	8.309621	-29.256700		-12.664900	-6.723000	-0.121000	26.702000
#TS_2_2_3		2441240.0	-5.711360	7.335875	-24.108700		-11.348400	-7.343000	-0.109000	26.738000
#TS_2_2_4		2441240.0	-5.587507	6.868196	-22.223000		-11.065000	-5.427000	-0.093000	23.229000
#TS_2_2_5		2441240.0	-5.249312	6.574343	-21.901400		-10.292900	-6.990000	-0.106000	25.419000
#TS_2_2_6		2441240.0	-11.296611	6.772314	-24.048200		-16.005600	-11.512500	-9.704000	9.701300
#TS_2_3_1		2441240.0	-9.805826	4.966600	-19.198400		-13.407100	-9.548600	-8.872500	1.755900
#TS_2_3_2		2441240.0	-11.778162	6.528402	-25.925200		-16.373600	-11.689100	-10.853700	5.141900
#TS_2_3_3		2441240.0	-10.729196	5.854499	-22.447100		-15.331100	-10.322600	-9.809800	3.081400
#TS_2_3_4		2441240.0	-10.343749	5.496603	-20.367800		-14.402500	-10.166200	-9.270900	3.202000
#TS_2_3_5		2441240.0	-10.244916	5.554786	-20.685300		-14.551900	-9.615000	-8.742800	2.911900
#TS_2_3_6		2441240.0	-10.587016	5.580946	-21.927200		-14.640200	-10.239500	-8.995200	2.947300
#P	        	2441240.0	0.022381	0.086476	0.000000		0.000000	0.000000	0.000000	5.503000
#H2O_MEAN		2441240.0	-1342.83356	2657.907842	-9089.996319	0.326297	3.684906	3.684906	21.047164
#H2O_STD    	2441240.0	908.820992	1848.977086	0.000000		0.402464	0.496981	0.496981	5277.931325
#H2O        	2441240.0	1.128872	2.800040	-0.114200		0.000000	0.000000	0.000000	34.665985
#FC	        	2441240.0	-0.347858	2.498192	-87.339926		-0.818113	0.033374	0.403652	35.034619
#SWC_1_1_1		2441240.0	24.138536	26.748149	-0.420365		0.507871	7.296000	51.976000	95.376089
#SWC_1_1_2		2441240.0	24.865874	29.855438	-0.523000		2.534696	10.232692	46.566750	100.000000
#SWC_1_1_3		2441240.0	30.714429	28.391120	0.000000		4.700000	13.401000	60.400000	100.347200
#SWC_1_1_4		2441240.0	39.748796	36.400212	3.530000		5.841000	17.200000	78.700000	91.427530
#SWC_1_1_5		2441240.0	39.622339	34.767995	3.420000		7.600000	20.655000	82.900000	87.500000
#SWC_1_1_6		2441240.0	5.944257	4.913053	3.590000		4.300000	4.780000	4.910000	52.140000
#SWC_1_2_1		2441240.0	6.249867	5.648614	3.770000		4.900000	5.310000	5.450000	43.720000
#SWC_1_2_2		2441240.0	6.148201	8.728736	0.970000		3.710000	4.610000	5.090000	52.170000
#SWC_1_2_3		2441240.0	2.016762	3.753615	0.000000		0.000000	0.000000	3.620000	52.140000
#SWC_1_2_4		2441240.0	7.456244	5.042052	3.410000		5.560000	6.700000	7.240000	39.650000
#SWC_1_2_5		2441240.0	4.783614	5.751039	2.180000		2.540000	3.470000	4.850000	52.170000
#SWC_1_2_6		2441240.0	4.823020	6.472847	0.770000		2.810000	4.180000	5.320000	52.180000
#WTD_1_1_1		2441240.0	-0.256127	0.498422	-41.663177		-0.294235	-0.251388	-0.197579	38.730000
#WTD_2_1_1		2441240.0	-0.166526	0.084358	-0.419513		-0.182408	-0.177365	-0.143416	0.101803
#WTD_3_1_1		2441240.0	-0.201460	0.053191	-0.514551		-0.232697	-0.195987	-0.195987	0.015637
#TW_1_1_1		2441240.0	0.546933	0.612445	-1.445000		0.090620	0.396000	1.062000	10.000000
#TW_2_1_1		2441240.0	0.324193	1.137814	-0.990989		-0.757230	0.194141	0.873618	9.992849
#D_SNOW	    	2441240.0	29.666221	22.233278	-5.600000		10.700000	30.461748	44.200000	161.000000
#LAI        	2441240.0	0.571146	0.287920	0.000000		0.192500	0.762990	0.762990	4.103644
#SOIL_PCAR		2441240.0	3.400164	0.081159	3.400000		3.400000	3.400000	3.400000	43.500000
#NEE        	2441240.0	-24.274624	23.817989	-123.134742		-41.103618	-41.103618	8.095337	95.498533
#GPP	       	2441240.0	-68.797773	49.251730	-308.095015		-103.168878	-103.168878	-2.287701	30.097280
#RECO    		2441240.0	44.520055	25.529208	0.000000		10.383038	62.065259	62.065259	288.973895
#ALT	        2441240.0	47.373584	15.105199	0.000000		33.000000	43.000000	57.000000	271.00000#0

In [None]:
sns.kdeplot(ex2.ALT)

In [None]:
sns.kdeplot(ex2.ALT[~(ex2.ALT < 20) & ~(ex2.ALT > 80)], shade=True)

In [None]:
newdf=ex.WS_MEAN[~(ex.WS_MEAN < -1993) & ~(ex.WS_MEAN > 12.5)]
newdf=newdf.WD_MEAN[~(newdf.WD_MEAN < -500)]
newdf=newdf.FCH4_1[~(newdf.FCH4_1 < 0) & ~(newdf.FCH4_1 > 100)]
newdf=newdf.CO2_1_1_1[~(newdf.CO2_1_1_1 < 300) & ~(newdf.CO2_1_1_1 > 550)]
newdf=newdf.CO2_1_4_1[~(newdf.CO2_1_4_1 < 300) & ~(newdf.CO2_1_4_1 > 650)]
newdf=newdf.CH4_1_1_1[~(newdf.CH4_1_1_1 < 1550) & ~(newdf.CH4_1_1_1 > 2750)]
newdf=newdf.ALT[~(newdf.ALT < 20) & ~(newdf.ALT > 100)]

#newdf=df[~(df['TS_MEAN'] < 0)]
#newdf=df[~(df['WS_MEAN'] < -1993)]
#newdf=newdf[~(newdf['WS_MEAN'] > 7.283)]
#newdf=newdf[~(newdf['WD_MEAN'] < -414)]
#newdf=newdf[~(newdf['FCH4_1'] < 0)]
#newdf=newdf[~(newdf['FCH4_1'] > 3.778)]
#newdf=newdf[~(newdf['CO2_1_1_1'] < 345)]
#newdf=newdf[~(newdf['CO2_1_1_1'] > 422)]
#newdf=newdf[~(newdf['CO2_1_4_1'] < 351)]
#newdf=newdf[~(newdf['CO2_1_4_1'] > 470)]
#newdf=newdf[~(newdf['CH4_1_1_1'] < 1904)]
#newdf=newdf[~(newdf['CH4_1_1_1'] > 2029)]
newdf=newdf[~(newdf['ALT'] > 94)]
newdf=newdf[~(newdf['H'] < -17)]
newdf=newdf[~(newdf['H'] > 25)]
newdf=newdf[~(newdf['LE'] < -30)]
newdf=newdf[~(newdf['LE'] > 49)]
newdf=newdf[~(newdf['SW_IN_MEAN'] > 0.45)]
newdf=newdf[~(newdf['SW_IN_MEAN'] < -1.57)]
newdf=newdf[~(newdf['TS_1_1_2'] < -10.46)]
newdf=newdf[~(newdf['TS_1_1_2'] > 12.71)]
newdf=newdf[~(newdf['FC'] > 1.09)]
newdf=newdf[~(newdf['FC'] < -1.23)]
newdf=newdf[~(newdf['WTD_1_1_1'] > -0.06)]
newdf=newdf[~(newdf['NETRAD'] < -124)]
newdf=newdf[~(newdf['NETRAD'] > 183)]

# newdf=newdf[~(newdf['CH4_1_1_1'] < 1688)]
# newdf=newdf[~(newdf['CH4_1_1_1'] > 2058)]
# newdf=newdf[~(newdf['CH4_1_1_1'] < 1809)]
# newdf=newdf[~(newdf['CH4_1_1_1'] > 2052)]

In [None]:
#                   count	    mean	        std	            min	            25%	            50%	        75%	        max
#WS_STD	            112494.0	1703.475873	    2.170889e+03	0.040415	    1.198322	    1.198322	4471.876575	5478.237236
#WD_STD	            112494.0	2155.259125	    2.728994e+03	0.311127	    15.148940	    15.148940	5634.911590	5634.911590
#CO2_MEAN	        112494.0	-1467.466375    2.331839e+03	-9052.221105    -4316.922520	413.442133	413.442133	436.002861
#CO2_STD	        112494.0	2074.511490	    2.638423e+03	0.000000	    4.849716	    4.849716	5440.175373	5478.957689
#soil_[CO2]_5cm	    112494.0	902.135125	    3.033223e+02	101.100000	    840.000000	    942.000000	942.000000	4642.000000
#soil_[CO2]_15cm    112494.0	1156.166513	    5.714284e+02	102.700000	    1017.000000	    1167.000000	1167.000000	5898.000000
#soil_[CO2]_25cm    112494.0	1413.499433	    6.228226e+02	-4.835000	    1172.000000	    1605.000000	1605.000000	5928.000000
#CH4_STD	        112494.0	0.064617	    3.090046e-03	0.006000	    0.065000	    0.065000	0.065000	0.065000

In [None]:
newdf.describe().T.head(10)

In [None]:
#df['H'].max()
newdf['CH4_MEAN'].max()
#newdf[~(newdf['CO2_1_1_1'] > 460)].CO2_1_1_1.max()
#newdf[~(newdf['ALT'] < 0)].ALT.min()

In [None]:
newdf.shape

In [None]:
plt.plot(newdf['CH4_MEAN'].values)

In [None]:
#sns.boxplot(df[~(df['CH4_1_1_1'] < 1688)].CH4_1_1_1)
#sns.boxplot(newdf[~(newdf['CH4_1_1_1'] > 2058)].CH4_1_1_1)
sns.boxplot(newdf['TS_MEAN'])

In [None]:
#sns.boxplot(newdf[~(newdf['CH4_STD'] < -124)].CH4_STD)
sns.boxplot(newdf[~(newdf['CH4_STD'] > 0.1)].CH4_STD)
#sns.boxplot(newdf[~(newdf['WS_MEAN'] > 7.283)].WS_MEAN)
#sns.boxplot(newdf[~(newdf['CH4_1_1_1'] > 2055)].CH4_1_1_1)

In [None]:
plt.plot(df['CO2_1_2_1'].values)

In [None]:
plt.plot(newdf.SW_IN_MEAN.values)

In [None]:
newdf[~(newdf['SW_IN_MEAN'] > 3000)].dropna().SW_IN_MEAN.max()

In [None]:
df[~(df['WD_MEAN'] < -2000)].dropna().WD_MEAN.min()

In [None]:
sns.histplot(newdf[~(newdf['SW_IN_MEAN'] > 2150)].dropna().SW_IN_MEAN)#, kde=True)

In [None]:
#sns.boxplot(df[~(df['WD_MEAN'] < -1725)].dropna().WD_MEAN.values)
#sns.boxplot(newdf[~(newdf['WS_MEAN'] > 8)].dropna().WS_MEAN.values)
#sns.boxplot(newdf[~(newdf['FCH4_1'] < 0)].dropna().FCH4_1.values)
#sns.boxplot(newdf[~(newdf['FCH4_1'] > 6)].dropna().FCH4_1.values)
#sns.boxplot(newdf[~(newdf['CO2_MEAN'] < 398)].dropna().CO2_MEAN.values)
#sns.boxplot(newdf[~(newdf['CO2_MEAN'] > 439)].dropna().CO2_MEAN.values)#, kde=True)
sns.boxplot(newdf[~(newdf['SW_IN_MEAN'] < 2050)].dropna().SW_IN_MEAN.values)
#sns.boxplot(newdf[~(newdf['CH4_MIXING_RATIO_MEAN'] > 2100)].dropna().CH4_MIXING_RATIO_MEAN.values)

SW_IN_MEAN, G_1_1_1, G_1_1_3

In [None]:
sns.boxplot(df[~(df['WS_MEAN'] > 1725)].dropna().WS_MEAN.values)

#### Archived

In [None]:
#df[df[cols].isin(df < (out_low))]
#df.query(cols[0])
#df[np.where(df.C | df.D, True, False)]

In [None]:
df[(df < (out_low))].any(axis=0)

In [None]:
df[df < out_low].

In [None]:
#df[~(df < (out_low))].any(axis=0)
for col in cols:
    df2=df[df[col] < df[col].quantile(0.95)]

#for col in cols:
#    df.query('df[col] < df[col].quantile(.95)')

In [None]:
df2

In [None]:
df[~((df < (out_low))).any(axis=1)]

In [None]:
newdf=df[~((df < (out_low)) | (df > (out_high))).any(axis=1)]

In [None]:
#[True if x < out_low | x > out_high else False for x in df]
df2=df[~((df < out_low) | (df > out_high)).any(axis=1)]
#df[((df < out_low) | (df > out_high)).any(axis=1)]


In [None]:
for column in df2:
    df2[column] = np.where(iqr[column] == True,np.nan,df2[column])

In [None]:
cols = df2.columns
df2[cols] = df2[cols].apply(pd.to_numeric, errors='coerce')

In [None]:
# df3=df.interpolate(method='nearest', axis=0).bfill().ffill()
df2=df2.dropna()

In [None]:
df2.sort_index(inplace=True)

In [None]:
plt.figure(figsize=(20, 20),dpi=100)
sns.heatmap(df2.corr(), cmap='coolwarm')

## Significance Testing

In [None]:
#newdata.corr()

In [None]:
rho=newdata.corr()
s = rho.unstack()
so = s.sort_values(kind="quicksort")
so2=pd.DataFrame(so[['ALT']])
so2.head(20)

In [None]:
#Top 5 (Negative)
#soil_[CO2]_15cm	-0.311942
#G_1_1_4	        -0.267481
#TW_1_1_1	        -0.219022
#TA_1_2_1	        -0.205626
#TA_1_3_1	        -0.180273

#Top 5 (Positive)
#FCH4_1	            0.334563
#FCH4_2	            0.309447
#TA_1_5_1	        0.263813
#CO2_1_1_1	        0.243649
#WTD_2_1_1	        0.237545

In [None]:
rm=df.rolling(30).mean()
rs=df.rolling(30).std()
rm.shape

In [None]:
df.sample(frac=0.7, replace=True, random_state=1)

## ADF/KPSS Stationarity Testing

In [None]:
adf_results = {}
kpss_results = {}
for col in newdata.columns.values:
    sum = ADF(newdata[col],method='aic',low_memory=True)
    adf_results[col] = list([sum.stat,sum.pvalue,sum.lags,sum.trend,sum.critical_values,sum.null_hypothesis,sum.alternative_hypothesis])
    sum2 = KPSS(newdata[col], trend='ct')
    kpss_results[col] = list([sum2.stat,sum2.pvalue,sum2.lags,sum2.trend,sum2.critical_values,sum2.null_hypothesis,sum2.alternative_hypothesis])

In [None]:
dfadf=pd.DataFrame.from_dict(adf_results, orient='index')

In [None]:
dfkpss=pd.DataFrame.from_dict(kpss_results, orient='index')

In [None]:
colnames=list(['ADF_TestStatistic','ADF_PValue','ADF_Lags','ADF_Trend','ADF_CriticalValues','ADF_NullHypothesis','ADF_AlternativeHypothesis'])

In [None]:
colnames2=list(['KPSS_TestStatistic','KPSS_PValue','KPSS_Lags','KPSS_Trend','KPSS_CriticalValues','KPSS_NullHypothesis','KPSS_AlternativeHypothesis'])

In [None]:
dfadf.columns=colnames

In [None]:
dfkpss.columns=colnames2

In [None]:
dfstats=pd.concat([dfadf,dfkpss], axis=1)

In [None]:
dfstats.to_csv('/Users/bgay/Downloads/stats.csv')

In [None]:
#dfstats.loc[:,'ADF_CriticalValues'].apply(pd.Series)
#dfstats.loc[:,'KPSS_CriticalValues'].apply(pd.Series)
#list(dfstats.loc[:,'ADF_CriticalValues'][0].values())
#list(dfstats.loc[:,'KPSS_CriticalValues'][0].values())

#dfstats.loc['ALT']
#dfstats.loc['CO2_1_1_1']
#dfstats.loc['FCH4_1']

In [None]:
dfstats['KPSS_CriticalValues']

In [None]:
#dfstats2=dfstats[:-3]
#list(dfstats2.where(dfstats2['KPSS_TestStatistic']<0.1193).dropna().index)
#['TS_1_1_2']

In [None]:
ls=[]
ls2=[]
for i in range(len(newdata.columns)):
    ls=np.append(ls,newdata.iloc[:,i].min())
    ls2=np.append(ls2,newdata.iloc[:,i].max())

In [None]:
n=[0,1,2,11,14,22,28,30]
ls[n]

In [None]:
# newdata.iloc[:,0] #WS_MEAN
# newdata.iloc[:,1] #WD_MEAN
# newdata.iloc[:,2] #CO2_MEAN
# newdata.iloc[:,11] #FCH4_1
# newdata.iloc[:,14] #CH4_MIXING_RATIO_MEAN
# newdata.iloc[:,22] #SW_IN_MEAN
# newdata.iloc[:,28] #G_1_1_1
# newdata.iloc[:,30] #G_1_1_3

In [None]:
#CHANGE LABELS AND SWITCH WS_MEAN AND WS_MEAN!!!

In [None]:
newdata2=newdata[~(newdata['FCH4_1'] < -500)].dropna()
newdata3=newdata2[~(newdata2['FCH4_1'] > 500)].dropna()
newdata4=newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna()
#newdata4=newdata3[~(newdata3['WD_MEAN'] < -360)].dropna()
#newdata5=newdata4[~(newdata4['WD_MEAN'] > 360)].dropna()
newdata6=newdata5[~(newdata5['WS_MEAN'] < 0)].dropna()

In [None]:
newdata.WS_MEAN.mean(), newdata.WD_MEAN.mean()a

In [None]:
#plt.hist(newdata3['WD_MEAN'].values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna().WD_MEAN.values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] > 1500)].dropna().WD_MEAN.values)
plt.hist(newdata3['WS_MEAN'].values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna().WD_MEAN.values)

In [None]:
newdata5=newdata4[~(newdata4['WD_MEAN'] <= -361)].dropna()
newdata6=newdata5[(newdata5['WD_MEAN'] > 360)].dropna()

In [None]:
newdata[~(newdata['WD_MEAN'] > 360)].WD_MEAN

In [None]:
len(newdata['WD_MEAN'])

In [None]:
newdata[~(newdata['WD_MEAN'] <= -361)].WD_MEAN

In [None]:
newdata4[(newdata4['WD_MEAN'] < 360)].dropna().WD_MEAN.plot()

In [None]:
#newdata=newdata.drop(columns=['ALT_diff','CO2_diff','CH4_diff'],axis=1)
newdata2=newdata[~(newdata['WD_MEAN'] <= -361)].dropna()
newdata3=newdata2[~(newdata2['WS_MEAN'] <= -1993)].dropna()
newdata4=newdata3[~(newdata3['CO2_MEAN'] <= 0)].dropna()
newdata6=newdata5[~(newdata5['CH4_MIXING_RATIO_MEAN'] <= -1018)].dropna()
newdata7=newdata6[~(newdata6['SW_IN_MEAN'] <= -6)].dropna()
newdata8=newdata7[~(newdata7['G_1_1_1'] <= 0)].dropna()
newdata9=newdata8[~(newdata8['G_1_1_3'] <= 0)].dropna()
newdata10=newdata9[~(newdata9['CO2_MEAN'] > 586)]
del newdata2, newdata3, newdata4, newdata5, newdata6, newdata7, newdata8, newdata9

In [None]:
newdata[~(newdata['WD_MEAN'] <= -361)]

In [None]:
newdata[~(newdata['WD_MEAN'] <= -1725)].dropna().WD_MEAN.min()

In [None]:
newdata3.max()

In [None]:
#newdata10[~(newdata10['CO2_MEAN'] < 0)].dropna().CO2_MEAN.min()
newdata[~(newdata['FCH4_1'] <= -500)].dropna().FCH4_1.min()
newdata[~(newdata['FCH4_1'] > 500)].dropna().FCH4_1.max()

In [None]:
sns.kdeplot(newdata.WD_MEAN)

In [None]:
plt.plot(newdata3.FCH4_1.values)

In [None]:
plt.hist(newdata3['FCH4_1'].values)

In [None]:
newdata3['FCH4_1'].std()

In [None]:
newdata.drop(columns=['CO2_MEAN'],axis=1)

# Step 5: ADF/KPSS Stationarity Testing

#### Archived

In [None]:
#ex=newdata.copy()
#s = ex['ALT']
#for i in range(150): s = s.diff()
#ex['Lag 150'] = s

#sns.kdeplot(newdata9['CO2_MEAN'])

#newdata['FCH4_1'].min(),newdata['FCH4_2'].min(),newdata['FCH4_3'].min() (-2255.04473, -2.79822976, -2.705822784)
#newdata['FCH4_1'].max(),newdata['FCH4_2'].max(),newdata['FCH4_3'].max() (1805.15, 4.517780285, 7.846163338)

#sns.kdeplot(newdata[~(newdata['G_1_1_1'] < 0)].dropna().G_1_1_1.values)

#newdata[~(newdata['CO2_MEAN'] > 586)].dropna().CO2_MEAN.max()
#newdata['WS_MEAN'].where(newdata['WS_MEAN']<-799,np.nan).dropna().min()
#dfstats2['KPSS_TestStatistic']<0.1193).dropna().index)

In [None]:
#newdata['ALT_diff'] = newdata['ALT'].diff().fillna(method='bfill').fillna(method='ffill')

In [None]:
#features.corr()

In [None]:
#ALT
#plt.plot(dataframe['ALT'].replace(-9999,np.nan).dropna())

In [None]:
#dataframe.index=dataframe['DATE'].sort_values().values

In [None]:
plt.figure(figsize=(8,5))
sns.lineplot(features['ALT'])
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.lineplot(features['FCH4_1']['2011':])
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.lineplot(features['CO2_1_1_1']['2003':'2021'])
plt.grid()
plt.show()

In [None]:
cols=newdf.columns.tolist()
for col in cols:
    ls=newdf[newdf[col].isna()].index.to_list()

In [None]:
plt.plot(pd.DataFrame(nointdf_2['ALT'].dropna().groupby(nointdf_2['ALT'].dropna().index).max()))
plt.plot(pd.DataFrame(nointdf_2['ALT'].dropna().groupby(nointdf_2['ALT'].dropna().index).mean()))
plt.plot(pd.DataFrame(nointdf_2['ALT'].dropna().groupby(nointdf_2['ALT'].dropna().index).min()))

In [None]:
plt.plot(pd.DataFrame(nointdf_2['CO2_1_1_1'].dropna().groupby(nointdf_2['CO2_1_1_1'].dropna().index).max()))
plt.plot(pd.DataFrame(nointdf_2['CO2_1_1_1'].dropna().groupby(nointdf_2['CO2_1_1_1'].dropna().index).mean()))
plt.plot(pd.DataFrame(nointdf_2['CO2_1_1_1'].dropna().groupby(nointdf_2['CO2_1_1_1'].dropna().index).min()))

In [None]:
plt.plot(pd.DataFrame(nointdf_2['FCH4_1'].dropna().groupby(nointdf_2['FCH4_1'].dropna().index).max()))
plt.plot(pd.DataFrame(nointdf_2['FCH4_1'].dropna().groupby(nointdf_2['FCH4_1'].dropna().index).mean()))
plt.plot(pd.DataFrame(nointdf_2['FCH4_1'].dropna().groupby(nointdf_2['FCH4_1'].dropna().index).min()))

In [None]:
#nointdf_2['ALT'].dropna()
plt.plot(nointdf_2['ALT'].dropna().values)
plt.plot(nointdf_2['ALT'].dropna().rolling(window=30).mean().values)

In [None]:
plt.plot(newdf['ALT'].dropna().values)
plt.plot(newdf['ALT'].dropna().rolling(window=30).mean().values)

In [None]:
plt.plot(nointdf_2['CO2_1_1_1'].dropna().values)
plt.plot(nointdf_2['CO2_1_1_1'].dropna().rolling(window=30).mean().values)

In [None]:
plt.plot(newdf['CO2_1_1_1'].dropna().values)
plt.plot(newdf['CO2_1_1_1'].dropna().rolling(window=30).mean().values)

In [None]:
plt.plot(nointdf_2['FCH4_1'].dropna().values)
plt.plot(nointdf_2['FCH4_1'].dropna().rolling(window=30).mean().values)

In [None]:
plt.plot(newdf['FCH4_1'].dropna().values)
plt.plot(newdf['FCH4_1'].dropna().rolling(window=30).mean().values)

In [None]:
#nointdf_2.index=pd.to_datetime(nointdf_2.index.values())
plt.plot(nointdf_2['ALT'].dropna())
plt.plot(nointdf_2['ALT'].dropna().rolling(window=3).mean())#.dropna().values

#### Create new dataframes for appending

In [None]:
newnoint=nointdf.copy()
newdf=df2.copy()

#### Initial ADF Testing

In [None]:
# newnoint['ALT'].hist() #left skew/kurtosis, non-Gaussian distribution (shifted lognormal)
# newnoint['CO2_1_1_1'].hist() #broken Gaussian-like distribution with gaps
# newnoint['FCH4_1'].hist() #broken Gaussian-like (constant) distribution with gaps and much of the data localized

In [None]:
# adfuller(nointdf_2['ALT'].values, autolag='AIC')
#ADF((newnoint['ALT'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)
# adfuller(newdf['ALT'].replace(-9999,np.nan).dropna().values, autolag='AIC')

##### Raw Dataset

In [None]:
ADF((newnoint['ALT'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newnoint['CO2_1_1_1'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newnoint['FCH4_1'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

##### Gap-Filled Dataset

In [None]:
ADF((newdf['ALT'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newdf['CO2_1_1_1'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newdf['FCH4_1'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

#### Initial KPSS Testing

In [None]:
#kpss((newnoint['ALT'].dropna()),regression='ct',nlags="auto")
#kpss((newnoint['ALT'].dropna()),regression='ct',nlags="auto")
#kpss((newnoint['ALT'].dropna()),regression='ct',nlags="auto")

##### Raw Dataset

In [None]:
kpss((newnoint['ALT'].dropna().values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

In [None]:
kpss((newnoint['CO2_1_1_1'].dropna().values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

In [None]:
kpss((newnoint['FCH4_1'].dropna().values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

##### Gap-Filled Dataset

In [None]:
kpss((newdf['ALT'].values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

In [None]:
kpss((newdf['CO2_1_1_1'].values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

In [None]:
kpss((newdf['FCH4_1'].values),regression='ct',nlags="auto")
# FAIL TO REJECT NULL (NON-STATIONARY)

In [None]:
# plt.plot(np.log(newnoint['ALT'].resample('D').mean().dropna()))
# plt.plot(np.log(newnoint['ALT'].dropna()),alpha=0.4)
# plt.plot(np.log(newnoint['ALT'].dropna()).diff(),alpha=0.4)

#### Create new dataframes for difference stationarity appending

In [None]:
newnoint2=nointdf.copy()
newdf2=newdf.copy()
# newnoint2.drop_duplicates(inplace=True)
# newdf2.drop_duplicates(inplace=True)

#### Differencing, 64 time steps

##### Raw Dataset

In [None]:
#RAW DATASET (NEWNOINT): TIME SERIES IS DIFFERENCE STATIONARY
cols=newnoint2.columns.tolist()
for col in cols:
    newnoint2[col+'_diff'] = newnoint[col].diff()

##### Gap-Filled Dataset

In [None]:
#GAP-FILLED DATASET (NEWDF): TIME SERIES IS DIFFERENCE STATIONARY
cols=newdf2.columns.tolist()
for col in cols:
    newdf2[col+'_diff'] = newdf[col].diff().fillna(method='bfill').fillna(method='ffill')

#### ADF Testing

##### Raw Dataset

In [None]:
ADF((newnoint2['ALT_diff'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newnoint2['CO2_1_1_1_diff'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newnoint2['FCH4_1_diff'].dropna().values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

##### Gap-Filled Dataset

In [None]:
ADF((newdf2['ALT_diff'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newdf2['CO2_1_1_1_diff'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

In [None]:
ADF((newdf2['FCH4_1_diff'].values),method='aic',low_memory=True)
#REJECT NULL (STATIONARY)

#### KPSS Testing

##### Raw Dataset

In [None]:
kpss((newnoint2['ALT_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

In [None]:
kpss((newnoint2['CO2_1_1_1_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

In [None]:
kpss((newnoint2['FCH4_1_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

##### Gap-Filled Dataset

In [None]:
kpss((newdf2['ALT_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

In [None]:
kpss((newdf2['CO2_1_1_1_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

In [None]:
kpss((newdf2['FCH4_1_diff'].dropna().values),regression='ct',nlags="auto")
#REJECT NULL (STATIONARY)

In [None]:
#dfdet=signal.detrend(newdf2['ALT_diff'].dropna().values, type='constant')

In [None]:
# #TIME SERIES IS DIFFERENCE STATIONARY
# nointdf_2=nointdf.copy()
# nointdf_2['ALT_diff']=nointdf_2['ALT'].diff()
# nointdf_2['ALT_shif']=nointdf_2['ALT'].shift()

# plt.plot(nointdf_2['ALT'].dropna())
# plt.plot(nointdf_2['ALT_diff'].dropna())
# plt.plot(nointdf_2['ALT_shif'].dropna())
# #plt.plot(np.log(nointdf['ALT'].dropna()))

# dfnewdf=nointdf_2.copy()
# newdf_3=newdf.copy()
# #nointdf_2['ALT'].dropna()
# #nointdf_2['ALT'].dropna().rolling(window=30).mean().dropna()#.fillna(method='bfill').fillna(method='ffill')
# dfnewdf['ALT_diff'] = nointdf_2['ALT'].diff(30).fillna(method='bfill').fillna(method='ffill')
# newdf_3['ALT_diff'] = newdf['ALT'].diff(30).fillna(method='bfill').fillna(method='ffill')

### Archived

In [None]:
plt.plot(newdf4['ALT'].replace(0,np.nan).dropna().values)

In [None]:
plt.plot(newdf.diff(64)['ALT'].dropna().values)

In [None]:
plt.plot(newdf.diff(64)['CO2_1_1_1']['2003':'2021'].dropna().values)

In [None]:
plt.plot(newdf.diff(64)['FCH4_1']['2011':].dropna().values)

In [None]:
#ADF((newdf3.loc[:,[('ALT', 't+1')]]),method='aic',low_memory=True)

In [None]:
#ADF((nointdf3.loc[:,[('ALT', 't+1')]].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((dfnewdf[('ALT', 't+1')].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((newdf_3['ALT_diff'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
kpss((dfnewdf['ALT_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
kpss((newdf_3['ALT_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
plt.plot(newdf_3['ALT_diff'].values)

In [None]:
ADF((dfnewdf['CO2_1_1_1'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((newdf_3['CO2_1_1_1'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
kpss((dfnewdf['CO2_1_1_1'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
kpss((newdf_3['CO2_1_1_1'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
dfnewdf['CO2_diff'] = nointdf_2['CO2_1_1_1'].diff(30).fillna(method='bfill').fillna(method='ffill')
newdf_3['CO2_diff'] = newdf['CO2_1_1_1'].diff(30).fillna(method='bfill').fillna(method='ffill')

In [None]:
ADF((dfnewdf['CO2_diff'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((newdf_3['CO2_diff'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
kpss((dfnewdf['CO2_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
kpss((newdf_3['CO2_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
plt.plot(dups4_2['CO2_diff'].values)

In [None]:
ADF((dfnewdf['FCH4_1'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((newdf_3['FCH4_1'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
kpss((dfnewdf['FCH4_1'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
kpss((newdf_3['FCH4_1'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
dfnewdf['CH4_diff'] = nointdf_2['FCH4_1'].diff(30).fillna(method='bfill').fillna(method='ffill')
newdf_3['CH4_diff'] = newdf['FCH4_1'].diff(30).fillna(method='bfill').fillna(method='ffill')

In [None]:
ADF((dfnewdf['CH4_diff'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
ADF((newdf_3['CH4_diff'].replace(-9999,np.nan).dropna()),method='aic',low_memory=True)

In [None]:
kpss((dfnewdf['CH4_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
kpss((newdf_3['CH4_diff'].replace(-9999,np.nan).dropna()),regression='ct',nlags="auto")

In [None]:
plt.plot(newdf_3['CH4_diff'].values)

In [None]:
dfnew=newdf_3

##### ALT

In [None]:
# Creating a data structure with 72 timestamps and 1 output
X_train = []
y_train = []
n_future = 5 # Number of days we want to predict into the future.
n_past = 49 # Number of past days we want to use to predict future.
for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i,
                   0:dataset_train.shape[1]])
    y_train.append(training_set_scaled[i+n_future-1:i+n_future, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

In [None]:
dups4_2

In [None]:
plt.plot(dups4_2['ALT'].values)

In [None]:
plt.plot(dups4_2['ALT_diff'].values)

In [None]:
inputs = keras.Input(shape=(trainXscaltref.shape[1],trainXscaltref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
inputs.shape

In [None]:
trainXscaltref.shape

In [None]:
inputs

##### CH4

In [None]:
# trainXch4arr=pd.DataFrame(trainXch4[-365:,:,:,:,0].flatten()).dropna().to_numpy().flatten() #Last year of training
# validXch4arr=pd.DataFrame(validXch4[-365:,:,:,:,0].flatten()).dropna().to_numpy().flatten() #Last year of validation
# testXch4arr=pd.DataFrame(testXch4[:,:,:,:,0].flatten()).dropna().to_numpy().flatten() #Last year of testing

In [None]:
Xch4train=trainXch4; Xch4valid=validXch4; Xch4test=testXch4

In [None]:
trainych4.shape

In [None]:
ych4train=trainych4[0:3285,0,0,0,0].reshape(3285,); ych4valid=validych4[0:1095,0,0,0,0].reshape(1095,); ych4test=testych4[0:365,0,0,0,0].reshape(365,)

In [None]:
Xch4train.shape,ych4train.shape

In [None]:
inputs = keras.Input(shape=(Xch4train.shape[0], Xch4train.shape[1],Xch4train.shape[2]))#, Xch4train.shape[3], Xch4train.shape[4]))

In [None]:
inputs.shape

In [None]:
#inp = layers.Input(shape=(None, *trainXch4.shape[2:]))

In [None]:
# # We will construct 3 `ConvLSTM2D` layers with batch normalization,
# # followed by a `Conv3D` layer for the spatiotemporal outputs.
# x = layers.ConvLSTM2D(
#     filters=64,
#     kernel_size=(5, 5),
#     padding="same",
#     return_sequences=True,
#     activation="relu",
# )(inp)
# x = layers.BatchNormalization()(x)
# x = layers.ConvLSTM2D(
#     filters=64,
#     kernel_size=(3, 3),
#     padding="same",
#     return_sequences=True,
#     activation="relu",
# )(x)
# x = layers.BatchNormalization()(x)
# x = layers.ConvLSTM2D(
#     filters=64,
#     kernel_size=(1, 1),
#     padding="same",
#     return_sequences=True,
#     activation="relu",
# )(x)
# x = layers.Conv3D(
#     filters=1, kernel_size=(3, 3, 3), activation="sigmoid", padding="same"
# )(x)

# # Next, we will build the complete model and compile it.
# model = keras.models.Model(inp, x)
# model.compile(
#     loss=keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.legacy.Adam(),
# )

In [None]:
# # Define some callbacks to improve training.
# early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10)
# reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5)

# # Define modifiable training hyperparameters.
# epochs = 20
# batch_size = 5

# # Fit the model to the training data.
# model.fit(
#     x_train,
#     y_train,
#     batch_size=batch_size,
#     epochs=epochs,
#     validation_data=(x_val, y_val),
#     callbacks=[early_stopping, reduce_lr],
# )

##### CO2

In [None]:
inputs = keras.Input(shape=(trainXscco2ref.shape[1],trainXscco2ref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
inputs.shape

In [None]:
trainXscco2ref.shape

In [None]:
inputs

##### Archived

In [None]:
#adfuller(newdata['FCH4_1']['2011':'2022'],autolag='AIC')

adf=ADF(newdata['FCH4_1']['2011':'2022'],method='aic',low_memory=True)
print(adf.summary().as_text())
print(adf)

In [None]:
# (-4.620970051848073,
#  0.0001181491350146937,
#  146,
#  2136124,
#  {'1%': -3.4303530612960005,
#   '5%': -2.861541353059084,
#   '10%': -2.566770720183526},
#  16204075.623475585)

# Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
kpss(newdata['FCH4_1']['2011':'2022'],regression='ct',nlags="auto")

In [None]:
# (3.005297413155853,
#  0.01,
#  804,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Reject Null (p<0.05), Non-Stationary

In [None]:
#CH4
#ADF Stationary, KPSS Non-Stationary --> Difference Stationary
newdata['CH4_diff'] = newdata['FCH4_1'].diff().fillna(method='bfill').fillna(method='ffill')
#newdata['ALT_diff'].dropna().plot(figsize=(12, 8))
newdata

In [None]:
adfuller(newdata['CH4_diff']['2011':],autolag='AIC')

In [None]:
# (-136.37684055825162,
#  0.0,
#  146,
#  2136124,
#  {'1%': -3.4303530612960005,
#   '5%': -2.861541353059084,
#   '10%': -2.566770720183526},
#  16204081.553529952)

# Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
kpss(newdata['CH4_diff']['2011':],regression='ct',nlags="auto")

In [None]:
# (0.013679439557372875,
#  0.1,
#  3857,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Fail to Reject Null (p>0.05), Stationary

In [None]:
ch4diff=pd.DataFrame(newdata['CH4_diff'])#['2011':])

In [None]:
plt.plot(newdata['FCH4_1']['2011':].values)
plt.plot(newdata['CH4_diff']['2011':].values)

In [None]:
plt.figure(figsize=(8,5))
plt.plot(newdf.iloc[:,0].values, label='ALT_diff')
plt.plot(newdf.iloc[:,1].values, label='CO2_diff')
plt.plot(newdf.iloc[:,2].values, label='CH4_diff')
plt.legend(loc='lower left')
plt.axis('tight')
plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.plot(newdata['ALT'].values, label='ALT')
plt.plot(newdata['CO2_1_1_1'].values, label='CO2')
plt.plot(newdata['FCH4_1'].values, label='CH4')
plt.legend(loc='lower left')
plt.axis('tight')
plt.show()

In [None]:
# altdiff=altdiff
# co2diff=co2diff['2003':'2021']
# ch4diff=ch4diff['2011':]

In [None]:
altdiff.shape, ch4diff.shape, co2diff.shape

In [None]:
newdata.shape

In [None]:
#38, 11, 5
train=newdf2[:'2006']
valid=newdf2['2007':'2017']
test=newdf2['2018':]

In [None]:
newalt=pd.DataFrame(newdf.iloc[:,0].dropna())
newco2=pd.DataFrame(newdf.iloc[:,1].dropna())
newch4=pd.DataFrame(newdf.iloc[:,2].dropna())

In [None]:
newalt.shape == newco2.shape == newch4.shape

In [None]:
# Select columns (features) to be involved intro training and predictions
dataset_train = newdf2.reset_index()
cols = list(dataset_train)
# Extract dates (will be used in visualization)
datelist_train = list(dataset_train.index)
datelist_train = [date for date in datelist_train]
training_set = dataset_train.values

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)
sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

In [None]:
#IN SITU
print(trainXscaltref.shape, type(trainXscaltref), trainXscch4ref.shape, type(trainXscch4ref), trainXscco2ref.shape, type(trainXscco2ref))
print(trainyscaltref.shape, type(trainyscaltref), trainyscch4ref.shape, type(trainyscch4ref), trainyscco2ref.shape, type(trainyscco2ref))

###### CH4

In [None]:
#adfuller(newdata['FCH4_1']['2011':'2022'],autolag='AIC')

adf=ADF(newdata['FCH4_1']['2011':],method='aic',low_memory=True)
print(adf.summary().as_text())
print(adf)

In [None]:
# (-4.620970051848073,
#  0.0001181491350146937,
#  146,
#  2136124,
#  {'1%': -3.4303530612960005,
#   '5%': -2.861541353059084,
#   '10%': -2.566770720183526},
#  16204075.623475585)

# Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
kpss(newdata['FCH4_1']['2011':],regression='ct',nlags="auto")

In [None]:
# (3.005297413155853,
#  0.01,
#  804,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Reject Null (p<0.05), Non-Stationary

In [None]:
#CH4
#ADF Stationary, KPSS Non-Stationary --> Difference Stationary
newdata['CH4_diff'] = newdata['FCH4_1'].diff().fillna(method='bfill').fillna(method='ffill')
#newdata['ALT_diff'].dropna().plot(figsize=(12, 8))
newdata

In [None]:
adfuller(newdata['CH4_diff']['2011':],autolag='AIC')

In [None]:
# (-136.37684055825162,
#  0.0,
#  146,
#  2136124,
#  {'1%': -3.4303530612960005,
#   '5%': -2.861541353059084,
#   '10%': -2.566770720183526},
#  16204081.553529952)

# Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
kpss(newdata['CH4_diff']['2011':],regression='ct',nlags="auto")

In [None]:
# (0.013679439557372875,
#  0.1,
#  3857,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Fail to Reject Null (p>0.05), Stationary

In [None]:
plt.plot(newdata['FCH4_1']['2011':].values)
plt.plot(newdata['CH4_diff']['2011':].values)

###### ALT

In [None]:
# #adfuller(newdata['ALT'],autolag='AIC')
# adf=ADF(newdata['ALT'],method='aic',low_memory=True)
# print(adf.summary().as_text())

In [None]:
# (-11.716346804997753,
#  1.448176932250584e-21,
#  150,
#  2441089,
#  {'1%': -3.430352678848201,
#   '5%': -2.861541184021449,
#   '10%': -2.5667706302110043},
#  11885640.7931901)

# REJECT NULL HYPOTHESIS (P<0.05); LARGE NEGATIVE TEST STATISTIC FURTHER SUPPORTS CONCLUSION OF REJECTING NULL HYPOTHESIS WITH SIGNIFICANCE LEVEL LESS THAN 1% (LOW PROBABILITY
# THAT RESULT IS A STATISTICAL FLUKE)
##==>  SERIES IS STATIONARY WITHOUT UNIT ROOT; NO TIME-DEPENDENT STRUCTURE (DOES NOT NEED TO BE DIFFERENCED)

In [None]:
# #kpss(newdata['ALT'],nlags="auto")
# KPSS(newdata['ALT'], trend="ct")

In [None]:
# (22.16700015677629,
#  0.01,
#  860,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Reject Null (p<0.05), Non-Stationary

In [None]:
#ALT
#ADF Stationary, KPSS Non-Stationary --> Difference Stationary
#newdata['ALT_diff'] = newdata['ALT'].diff().fillna(method='bfill').fillna(method='ffill')
#newdata['ALT_diff'].dropna().plot(figsize=(12, 8))

In [None]:
adfuller(newdata['ALT_diff'],autolag='AIC')

In [None]:
# (-167.13383560303836,
#  0.0,
#  150,
#  2441089,
#  {'1%': -3.430352678848201,
#   '5%': -2.861541184021449,
#   '10%': -2.5667706302110043},
#  11885772.46788086)

In [None]:
adf=ADF(newdata['ALT_diff'],method='aic',low_memory=True)
print(adf.summary().as_text())

In [None]:
#kpss(newdata['ALT'],nlags="auto")
#kpss(newdata['ALT_diff'],nlags="auto")
#kpss(newdata['ALT_diff'],regression='ct',nlags="auto")

#KPSS(newdata['ALT'])
#KPSS(newdata['ALT'],trend='ct')
#KPSS(newdata['ALT_diff'])
KPSS(newdata['ALT_diff'], trend="ct")
#print(kpss.summary().as_text())

In [None]:
# (0.007076816839760557,
#  0.1,
#  2285,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Fail to Reject Null (p>0.05), Stationary

In [None]:
plt.plot(newdata['ALT'])
plt.plot(newdata['ALT_diff'])

###### CO2

In [None]:
#adfuller(newdata['CO2_1_1_1']['2003':'2021'].values,autolag='AIC')
#adfuller(newdata['CO2_1_1_1']['2003':'2021'],autolag='AIC',regression='ct')
#adfuller(newdata['CO2_1_1_1'].round(1),autolag='AIC')

adf=ADF(newdata['CO2_1_1_1']['2003':'2021'],method='aic',low_memory=True)
print(adf.summary().as_text())

In [None]:
#    Augmented Dickey-Fuller Results   
# =====================================
# Test Statistic                -12.567
# P-value                         0.000
# Lags                              149
# -------------------------------------

# Trend: Constant
# Critical Values: -3.43 (1%), -2.86 (5%), -2.57 (10%)
# Null Hypothesis: The process contains a unit root.
# Alternative Hypothesis: The process is weakly stationary.

# # Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
#kpss(newdata['CO2_1_1_1']['2003':'2021'],regression='ct',nlags="auto")
KPSS(newdata['CO2_1_1_1']['2003':'2021'],trend='ct')

In [None]:
# (10.064566631105404,
#  0.01,
#  852,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Reject Null (p<0.05), Non-Stationary

In [None]:
#CO2
#ADF Stationary, KPSS Non-Stationary --> Difference Stationary
newdata['CO2_diff'] = newdata['CO2_1_1_1'].diff().fillna(method='bfill').fillna(method='ffill')
#newdata['ALT_diff'].dropna().plot(figsize=(12, 8))

In [None]:
#adfuller(newdata['CO2_diff']['2003':'2021'],autolag='AIC')

adf=ADF(newdata['CO2_diff']['2003':'2021'],method='aic',low_memory=True)
print(adf.summary().as_text())

In [None]:
#    Augmented Dickey-Fuller Results   
# =====================================
# Test Statistic               -153.211
# P-value                         0.000
# Lags                              149
# -------------------------------------

# Trend: Constant
# Critical Values: -3.43 (1%), -2.86 (5%), -2.57 (10%)
# Null Hypothesis: The process contains a unit root.
# Alternative Hypothesis: The process is weakly stationary.

# Reject Null (p<0.05), Stationary with significance level less than 1%

In [None]:
#kpss(newdata['CO2_diff']['2003':'2021'],regression='ct',nlags="auto")
KPSS(newdata['CO2_diff']['2003':'2021'],trend='ct')

In [None]:
# (0.003048804712509359,
#  0.1,
#  4378,
#  {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216})

# Fail to Reject Null (p>0.05), Stationary

In [None]:
plt.plot(newdata['CO2_1_1_1']['2003':'2021'].values)
plt.plot(newdata['CO2_diff']['2003':'2021'].values)

# Step 6: Framing Problem(s), Part I | Supervision

In [None]:
#newdf2, newnoint2
#plt.plot(newnoint2.ALT_diff.dropna().values)
#plt.plot(newnoint2.ALT.dropna().values)
#plt.plot(newdf2.ALT.dropna().values)
#plt.plot(newdf2.ALT_diff.dropna().values)

### Isolating differenced raw and gap-filled datasets to the differenced data

In [None]:
nidiff=newnoint2.iloc[:,-56:]
gfdiff=newdf2.iloc[:,-56:]

### Frame problem into supervision

In [None]:
dfsup=series_to_supervised(gfdiff,3,2)
dfsup=dfsup.sort_index()

In [None]:
#dfsup

### ALT (Framing ALT with lookback and forecast)

In [None]:
# reframed_alt2=reframed_alt.drop(reframed_alt.loc[:,[('ALT_diff', 't+1')]],axis=1)
# reframed_alt2=pd.concat([reframed_alt2,reframed_alt.loc[:,[('ALT_diff', 't+1')]]],axis=1)
# reframed_alt.drop(reframed_alt.iloc[:,-91:-1],axis=1, inplace=True)

In [None]:
#dfsup.iloc[:,:-1]
#dfsup.iloc[:,-1:]
alt=dfsup.drop(dfsup.loc[:,[('ALT_diff', 't+1')]],axis=1)
alt=pd.concat([alt,dfsup.loc[:,[('ALT_diff', 't+1')]]],axis=1)

In [None]:
Xalt=alt.iloc[:,:-1] #features
yalt=pd.DataFrame(alt.iloc[:,-1:]) #target

In [None]:
dfsup.shape, Xalt.shape, yalt.shape

### CH4 (Framing CH4 with lookback and forecast)

In [None]:
# reframed_ch42=reframed_alt.drop(reframed_alt.loc[:,[('CH4_diff', 't+1')]],axis=1)
# reframed_ch42=pd.concat([reframed_ch42,reframed_alt.loc[:,[('CH4_diff', 't+1')]]],axis=1)

# reframed_ch4.drop(reframed_ch4.iloc[:,-91:-78],axis=1, inplace=True)
# reframed_ch4.drop(reframed_ch4.iloc[:,-66:],axis=1, inplace=True)
# reframed_ch4.drop(reframed_ch4.iloc[:,-12:-6],axis=1, inplace=True)
# reframed_ch4.drop(reframed_ch4.iloc[:,-5:],axis=1, inplace=True)

In [None]:
#dfsup.iloc[:,:-46]
#dfsup.iloc[:,-45:]
ch4=dfsup.drop(dfsup.loc[:,[('FCH4_1_diff', 't+1')]],axis=1)
ch4=pd.concat([ch4,dfsup.loc[:,[('FCH4_1_diff', 't+1')]]],axis=1)

In [None]:
Xch4=ch4.iloc[:,:-1] #features
ych4=pd.DataFrame(ch4.iloc[:,-1:]) #target

In [None]:
ch4.shape, Xch4.shape, ych4.shape

### CO2 (Framing CO2 with lookback and forecast)

In [None]:
# reframed_co22=reframed_alt.drop(reframed_alt.loc[:,[('CO2_diff', 't+1')]],axis=1)
# reframed_co22=pd.concat([reframed_co22,reframed_alt.loc[:,[('CO2_diff', 't+1')]]],axis=1)

# reframed_co2.drop(reframed_co2.iloc[:,-91:-87],axis=1,inplace=True)
# reframed_co2.drop(reframed_co2.iloc[:,-78:],axis=1,inplace=True)
# reframed_co2.drop(reframed_co2.iloc[:,-9:-6],axis=1,inplace=True)
# reframed_co2.drop(reframed_co2.iloc[:,-5:],axis=1,inplace=True)

In [None]:
#dfsup.iloc[:,:-54]
#dfsup.iloc[:,-53:]
co2=dfsup.drop(dfsup.loc[:,[('CO2_1_1_1_diff', 't+1')]],axis=1)
co2=pd.concat([co2,dfsup.loc[:,[('CO2_1_1_1_diff', 't+1')]]],axis=1)

In [None]:
Xco2=co2.iloc[:,:-1] #features
yco2=pd.DataFrame(co2.iloc[:,-1:]) #target

In [None]:
co2.shape, Xco2.shape, yco2.shape

#### Archive (Saving sets and playground area)

In [None]:
# # df1=dfnew.iloc[:,:17];
# # df2=dfnew.iloc[:,29:];
# # df1=df.iloc[:,:15];
# # df2=df.iloc[:,16:];
# df1=reframed.iloc[:,:19];
# df2=reframed.iloc[:,20:];
# df3=pd.concat([df1,df2],axis=1);

# Xch4=df3 #features
# ych4=pd.DataFrame(reframed.loc[:,'CH4_1_1_1'])  #target

#//

# df1=df.iloc[:,:10];
# df2=df.iloc[:,11:];
# df3=pd.concat([df1,df2],axis=1);

# Xco2=df3 #features
# yco2=pd.DataFrame(df.loc[:,'CO2_1_1_1'])  #target

##### Save

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/Xalt.pkl')
with open(file, 'wb') as f:
    pickle.dump(Xalt,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/yalt.pkl')
with open(file, 'wb') as f:
    pickle.dump(yalt,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/Xch4.pkl')
with open(file, 'wb') as f:
    pickle.dump(Xch4,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/ych4.pkl')
with open(file, 'wb') as f:
    pickle.dump(ych4,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/Xco2.pkl')
with open(file, 'wb') as f:
    pickle.dump(Xco2,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/yco2.pkl')
with open(file, 'wb') as f:
    pickle.dump(yco2,f)

##### Continue

In [None]:
#Xalt.shape, Xch4.shape, Xco2.shape
#yalt.shape, ych4.shape, yco2.shape
#ych4.min()
#X, y = Xalt, yalt

In [None]:
# X=X.replace(-9999,np.nan).fillna(method='bfill').fillna(method='ffill')
# y=y.replace(-9999,np.nan).fillna(method='bfill').fillna(method='ffill')

In [None]:
#1969-2018:1772964/2441246#0.7262537245324724
#1969-2019:2043752/2441246#0.8371757700780667
#1969-2020:2226109/2441246#0.9118741003569488

# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False,
#                                    test_size=0.20, random_state=40)
# X_train=X.loc[:'2019',:]; y_train=y.loc[:'2019'];
# X_test=X.loc['2020':,:]; y_test=y.loc['2020':];

In [None]:
# X_train=X.loc[:'2018',:]; y_train=y.loc[:'2018'];
# X_valid=X.loc['2019':'2020',:]; y_valid=y.loc['2019':'2020'];
# X_test=X.loc['2021':,:]; y_test=y.loc['2021':];

In [None]:
Xalt.shape, Xch4.shape, Xco2.shape

In [None]:
yalt.shape, ych4.shape, yco2.shape

In [None]:
ych4.min()

In [None]:
X, y = Xalt, yalt

##### Train/Test

In [None]:
#train, valid, test = reframed_alt.loc['1969':'2018',:],reframed_alt.loc['2019':'2020', :], reframed_alt.loc['2021':, :]

In [None]:
#ALT
X_train, y_train, X_valid, y_valid, X_test, y_test = reframed_alt.loc['1969':'2018',:], reframed_alt.loc['1969':'2018',:], \
reframed_alt.loc['2019':'2020', :], reframed_alt.loc['2019':'2020',:], reframed_alt.loc['2021':'2022', :], reframed_alt.loc['2021':'2022',:]
#never outputs last mentioned key 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)#, random_state=1)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
#CH4
X_trainch4, y_trainch4, X_validch4, y_validch4, X_testch4, y_testch4 = Xch4.loc['2003':'2018',:], ych4.loc['2003':'2018',:], Xch4.loc['2019':'2020',:], \
ych4.loc['2019':'2020',:], Xch4.loc['2021':,:], ych4.loc['2021':,:]
# X_trainch4, X_testch4, y_trainch4, y_testch4 = train_test_split(Xch4, ych4, test_size=0.1)#, random_state=1)
# X_trainch4, X_validch4, y_trainch4, y_validch4 = train_test_split(X_trainch4, y_trainch4, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
#CO2
X_trainco2, y_trainco2, X_validco2, y_validco2, X_testco2, y_testco2 = Xco2.loc['2003':'2018',:], yco2.loc['2003':'2018',:], \
Xco2.loc['2019':'2020',:], yco2.loc['2019':'2020',:], Xco2.loc['2021':,:], yco2.loc['2021':,:]
# X_trainco2, X_testco2, y_trainco2, y_testco2 = train_test_split(Xco2, yco2, test_size=0.1)#, random_state=1)
# X_trainco2, X_validco2, y_trainco2, y_validco2 = train_test_split(X_trainco2, y_trainco2, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
# # split the data
# from fast_ml.model_development import train_valid_test_split
# X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'ALT', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CH4_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CO2_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)

In [None]:
X_train.shape, X_valid.shape, X_test.shape

In [None]:
X_trainch4.shape, X_validch4.shape, X_testch4.shape

In [None]:
X_trainco2.shape, X_validco2.shape, X_testco2.shape

##### Save

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/X_testalt.pkl')
with open(file, 'wb') as f:
    pickle.dump(X_test,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/y_testalt.pkl')
with open(file, 'wb') as f:
    pickle.dump(y_test,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/X_testch4.pkl')
with open(file, 'wb') as f:
    pickle.dump(X_testch4,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/y_testch4.pkl')
with open(file, 'wb') as f:
    pickle.dump(y_testch4,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/X_testco2.pkl')
with open(file, 'wb') as f:
    pickle.dump(X_testco2,f)

In [None]:
file = os.path.join('/Users/bradleygay/Downloads/y_testco2.pkl')
with open(file, 'wb') as f:
    pickle.dump(y_testco2,f)

##### Playground

In [None]:
# # split the data
# from fast_ml.model_development import train_valid_test_split
# X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'ALT', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CH4_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CO2_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)

In [None]:
#X=X_train.sort_index(); y=y_train.sort_index()

In [None]:
plt.plot(y_train.replace(-9999,np.nan).dropna().values)
plt.plot(y_valid.replace(-9999,np.nan).dropna().values)
plt.plot(y_test.replace(-9999,np.nan).dropna().values)
plt.grid()
plt.show()

In [None]:
plt.plot(y_train.sort_index().replace(-9999,np.nan).dropna().values)#1713754
plt.plot(y_valid.sort_index().replace(-9999,np.nan).dropna().values)#483367
plt.plot(y_test.sort_index().replace(-9999,np.nan).dropna().values)#244125
plt.grid()
plt.show()

In [None]:
#df.loc['2021':'2022']
#1772964/2441246#0.7262537245324724
#614894/2441246#0.25187711521083905
#453145/2441246#0.18562037582447652
#215137/2441246#0.08812589964305113

In [None]:
#df.loc['2021':'2021','CH4_1_1_1']
#1413747/2369279#0.5966992490120412
#1754385/2369279#0.7404721014283249
#453145/2369279#0.1912586065212244
#270788/2369279
#161749/2369279

In [None]:
#df.loc['2021':'2021','CO2_1_1_1']
#1413747/2369279#0.5966992490120412
#1754385/2369279#0.7404721014283249
#453145/2369279#0.1912586065212244
#270788/2369279
#161749/2369279

In [None]:
X_train=df.iloc[:,-1].replace(-9999,np.nan);
X_trainch4=X_trainch4.replace(-9999,np.nan);
X_trainco2=X_trainco2.replace(-9999,np.nan);

In [None]:
from pandas import read_csv
from matplotlib import pyplot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
plt.figure(figsize=(9,6),dpi=1000)
plot_acf(df.loc[:,'CO2_1_1_1'].replace(-9999,np.nan).dropna())
plt.grid()
plt.show()

In [None]:
# plt.plot(X_trainch4.replace(-9999,np.nan).dropna().values)
# plt.show()

In [None]:
plt.figure(figsize=(9,6),dpi=1000)
plot_pacf(df.loc[:,'CO2_1_1_1'].replace(-9999,np.nan).dropna(), lags=50)
plt.grid()
plt.show()

In [None]:
# #Walk forward validation
# series = read_csv('sunspots.csv', header=0, index_col=0)
# X = series.values
# n_train = 500
# n_records = len(X)
# for i in range(n_train, n_records):
#  train, test = X[0:i], X[i:i+1]
#  print('train=%d, test=%d' % (len(train), len(test)))

In [None]:
plt.figure(figsize=(9,6),dpi=1000)
rolling_mean = df.loc[:,'ALT'].replace(-9999,np.nan).dropna().rolling(window = 12).mean().values
rolling_std = df.loc[:,'ALT'].replace(-9999,np.nan).dropna().rolling(window = 12).std().values
plt.plot(df.loc[:,'ALT'].replace(-9999,np.nan).dropna().values, color = 'blue', label = 'Original')
plt.plot(rolling_mean, color = 'red', label = 'Rolling Mean')#.replace(-9999,np.nan).dropna()
plt.plot(rolling_std, color = 'black', label = 'Rolling Std')
plt.legend(loc = 'best')
plt.title('Rolling Mean & Rolling Standard Deviation')
plt.grid()
plt.show()

In [None]:
plt.plot(dfnew.loc[:,'ALT'].replace(-9999,np.nan).dropna())

In [None]:
from statsmodels.tsa.stattools import adfuller
# Dickey–Fuller test:
result = adfuller(df.loc[:,'CO2_1_1_1'].replace(-9999,np.nan).dropna())
print('ADF Statistic: {}'.format(result[0]))
print('p-value: {}'.format(result[1]))
print('Critical Values:')
for key, value in result[4].items():
    print('\t{}: {}'.format(key, value))

In [None]:
get_stationarity()

In [None]:
dfnew[iloc-:,1]

In [None]:
#X_train

In [None]:
X_train=X_train.drop(X_train.columns[-92:],axis=1)

In [None]:
#X=X_train;
X_train.shape, y_train.shape

In [None]:
nans(X_trainch4)

In [None]:
X_trainch4=X_trainch4.drop(X_trainch4.columns[-92:],axis=1)

In [None]:
#X_trainch4
X_trainch4.shape, y_trainch4.shape

In [None]:
nans(X_trainco2)

In [None]:
X_trainco2=X_trainco2.drop(X_trainco2.columns[-92:],axis=1)

In [None]:
#X_trainco2
X_trainco2.shape, y_trainco2.shape

# Step 7: Framing Problem(s), Part II | Scale and Supervision

In [None]:
#alt.groupby(alt.index).mean()

In [None]:
print(alt.shape, Xalt.shape, yalt.shape)
print(ch4.shape, Xch4.shape, ych4.shape)
print(co2.shape, Xco2.shape, yco2.shape)

### ALT

In [None]:
# trainXalt, trainyalt=reframed_alt.loc[:'2017', reframed_alt.columns != ('ALT', 't')], \
# reframed_alt.loc[:'2017', reframed_alt.columns == ('ALT', 't'):]
# validXalt, validyalt=reframed_alt.loc['2018':'2020', reframed_alt.columns != ('ALT', 't')], \
# reframed_alt.loc['2018':'2020', reframed_alt.columns == ('ALT', 't'):]
# testXalt, testyalt=reframed_alt.loc['2021':, reframed_alt.columns != ('ALT', 't')], \
# reframed_alt.loc['2021':, reframed_alt.columns == ('ALT', 't'):]


# from fast_ml.model_development import train_valid_test_split
# trainX, trainy, validX, validy, testX, testy = train_valid_test_split(reframed_alt, target = ('ALT', 't+1'), 
#                                                                             train_size=0.7, valid_size=0.2, test_size=0.1)        

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)#, random_state=1)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

# # split the data
# from fast_ml.model_development import train_valid_test_split
# X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'ALT', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CH4_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CO2_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)

# #SUPERVISION
# trainXsup=series_to_supervised(trainX, lags=3, forecasting_steps=1, dropna=True)
# # X_valid_scaled_sup=series_to_supervised(X_valid_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_test_scaled_sup=series_to_supervised(X_test_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_train_scaled_sup.shape, X_valid_scaled_sup.shape, X_test_scaled_sup.shape
# validXsup=series_to_supervised(validX, lags=3, forecasting_steps=1, dropna=True)
# testXsup=series_to_supervised(testX, lags=3, forecasting_steps=1, dropna=True)
# trainXsup.shape, validXsup.shape, testXsup.shape

# #SUPERVISION
# trainysup=series_to_supervised(trainy, lags=3, forecasting_steps=1, dropna=True)
# validysup=series_to_supervised(validy, lags=3, forecasting_steps=1, dropna=True)
# testysup=series_to_supervised(testy, lags=3, forecasting_steps=1, dropna=True)
# trainysup.shape, validysup.shape, testysup.shape

In [None]:
#1969-2022 (54 years)
#yalt.loc[:'2018']
#1432318/2441236 #0.5867183672533094
#1772956/2441236 #0.7262534224466622 #Use 1969-2018 for threshold based on distribution of points, but keeping time series order consistent (training)

#yalt.loc['2019':'2020']
#614893/2441236 #0.251877737342887
#453144/2441236 #0.18562072654999354 #Use 2019-2020 for threshold based on distribution of points, but keeping time series order consistent (validation)

#yalt.loc['2021':]
#215136/2441236 #0.0881258510033442 #Use 2021-2022 for threshold based on distribution of points, but keeping time series order consistent (validation)

In [None]:
trainXalt, trainyalt=Xalt.loc[:'2018'],yalt.loc[:'2018']
validXalt, validyalt=Xalt.loc['2019':'2020'],yalt.loc['2019':'2020']
testXalt, testyalt=Xalt.loc['2021':],yalt.loc['2021':]

In [None]:
print(trainXalt.shape, trainyalt.shape);
print(validXalt.shape, validyalt.shape);
print(testXalt.shape, testyalt.shape);

In [None]:
# Need to scale the features for neural networks, otherwise the training doesn't converge.
Xscaleralt = StandardScaler()
trainXscalt = Xscaleralt.fit_transform(trainXalt)
validXscalt = Xscaleralt.transform(validXalt)
testXscalt = Xscaleralt.transform(testXalt)

In [None]:
trainXscalt.shape, validXscalt.shape, testXscalt.shape

In [None]:
yscaleralt = StandardScaler()
trainyscalt = yscaleralt.fit_transform(pd.DataFrame(trainyalt).to_numpy().reshape(1426178,1))
validyscalt = yscaleralt.transform(pd.DataFrame(validyalt).to_numpy().reshape(422779,1))
testyscalt = yscaleralt.transform(pd.DataFrame(testyalt).to_numpy().reshape(213347,1))

In [None]:
trainyscalt.shape, validyscalt.shape, testyscalt.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainXscaltref=trainXscalt.reshape(1426178, 1, 279);
validXscaltref=validXscalt.reshape(422779, 1, 279);
testXscaltref=testXscalt.reshape(213347, 1, 279);

In [None]:
trainXscaltref.shape, validXscaltref.shape, testXscaltref.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainyscaltref=trainyscalt.reshape(1426178, 1, 1);
validyscaltref=validyscalt.reshape(422779, 1, 1);
testyscaltref=testyscalt.reshape(213347, 1, 1);

In [None]:
trainyscaltref.shape, validyscaltref.shape, testyscaltref.shape

### CH4

In [None]:
# #df
# #df.loc[:, df.columns != 'CH4_1_1_2']
# trainXch4, trainych4=reframed_ch4.loc[:'2017',reframed_ch4.columns != 'CH4_1_1_2'],reframed_ch4.loc[:'2017',reframed_ch4.columns == 'CH4_1_1_2']
# validXch4, validych4=reframed_ch4.loc['2018':'2020',reframed_ch4.columns != 'CH4_1_1_2'],reframed_ch4.loc['2018':'2020',reframed_ch4.columns == 'CH4_1_1_2']
# testXch4, testych4=reframed_ch4.loc['2021':,reframed_ch4.columns != 'CH4_1_1_2'],reframed_ch4.loc['2021':,reframed_ch4.columns == 'CH4_1_1_2']

# trainXch4, trainych4=reframed_ch4.iloc[:,:-1][:'2017'],reframed_ch4.iloc[:,-1:][:'2017']
# validXch4, validych4=reframed_ch4.iloc[:,:-1]['2018':'2020'],reframed_ch4.iloc[:,-1:]['2018':'2020']
# testXch4, testych4=reframed_ch4.iloc[:,:-1]['2021':],reframed_ch4.iloc[:,-1:]['2021':]

#reframed_ch4.iloc[:,-1].name
#reframed_ch4.loc[:'2017',:('CH4_1_1_2', 't+1')]

# #plt.plot(reframed_ch4.iloc[:,-1]['2011':].values)
# reframed_ch4.iloc[:,-1]['2011':].shape
# #2136270/11
# 11*.1

# trainXch4=X_trainch4; trainych4=y_trainch4; validXch4=X_validch4; validych4=y_validch4; testXch4=X_testch4; 
# testych4=y_testch4

# trainXch4, trainych4=reframed_ch4.loc[:'2017',:('CH4_1_1_2', 't+1')],reframed_ch4.loc[:'2017',('CH4_1_1_2', 't+1'):]
# validXch4, validych4=reframed_ch4.loc['2018':'2020',:('CH4_1_1_2', 't+1')],reframed_ch4.loc['2018':'2020',('CH4_1_1_2', 't+1'):]
# testXch4, testych4=reframed_ch4.loc['2021',:('CH4_1_1_2', 't+1')],reframed_ch4.loc['2021',('CH4_1_1_2', 't+1'):]

# trainXch4, trainych4=reframed_ch4.iloc[:,:-1]['2011':'2018'],reframed_ch4.iloc[:,-1:]['2011':'2018']
# validXch4, validych4=reframed_ch4.iloc[:,:-1]['2019':'2020'],reframed_ch4.iloc[:,-1:]['2019':'2020']
# testXch4, testych4=reframed_ch4.iloc[:,:-1].loc['2021'],reframed_ch4.iloc[:,-1:].loc['2021']

# trainXch4, trainych4=reframed_ch4.iloc[:,:-1][:'2017'],reframed_ch4.iloc[:,-1:][:'2017']
# validXch4, validych4=reframed_ch4.iloc[:,:-1]['2018':'2020'],reframed_ch4.iloc[:,-1:]['2018':'2020']
# testXch4, testych4=reframed_ch4.iloc[:,:-1]['2021':],reframed_ch4.iloc[:,-1:]['2021':]

# from fast_ml.model_development import train_valid_test_split
# trainX, trainy, validX, validy, testX, testy = train_valid_test_split(reframed_alt, target = ('ALT', 't+1'), 
#                                                                             train_size=0.7, valid_size=0.2, test_size=0.1)        

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)#, random_state=1)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

# # split the data
# from fast_ml.model_development import train_valid_test_split
# X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'ALT', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CH4_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)
# #X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target = 'CO2_1_1_1', train_size=0.7, valid_size=0.2, test_size=0.1)

In [None]:
#2011-2021 (11 years)
#ych4.loc['2011':'2018']
#1467990/2136270 #0.6871743740257552 #Use 2011-2018 for threshold based on distribution of points, but keeping time series order consistent (training)
#ych4.loc['2019':'2020']
#453144/2136270 #0.21211925458860537 #Use 2019-2020 for threshold based on distribution of points, but keeping time series order consistent (training)
#ych4.loc['2021':]
#215136/2136270 #0.10070637138563945 #Use 2021 for threshold based on distribution of points, but keeping time series order consistent (training)

In [None]:
#ych4['2011':]
#1803847
#ych4['2011':'2018']
#1167721/1803847 #0.6473503573196618
#1409922/1803847 #0.7816195054236862 #Use 2011-2018 for training
#ych4['2019':'2020']
#422779/1803847 #0.23437630796846962 #Use 2019-2020 for validation
#ych4.loc['2021':,:]
#213347/1990868 #0.1071628053693163 #Use 2021-2022 for testing

In [None]:
trainXch4, trainych4=Xch4.loc['2011':'2018'],ych4.loc['2011':'2018']
validXch4, validych4=Xch4.loc['2019':'2020'],ych4.loc['2019':'2020']
testXch4, testych4=Xch4.loc['2021':],ych4.loc['2021':]

In [None]:
print(trainXch4.shape, trainych4.shape);
print(validXch4.shape, validych4.shape);
print(testXch4.shape, testych4.shape);

In [None]:
# # Need to scale the features for neural networks, otherwise the training doesn't converge.
# Xscalerch4 = StandardScaler()
# Xscalerch4.fit(X_trainch4)
# X_train_scaledch4 = Xscalerch4.transform(X_trainch4)
# X_valid_scaledch4 = Xscalerch4.transform(X_validch4)
# X_test_scaledch4 = Xscalerch4.transform(X_testch4)

# yscalerch4 = StandardScaler()
# yscalerch4.fit(y_trainch4)
# y_train_scaledch4 = yscalerch4.transform(y_trainch4)
# y_valid_scaledch4 = yscalerch4.transform(y_validch4)
# y_test_scaledch4 = yscalerch4.transform(y_testch4)

In [None]:
# #SUPERVISION
# X_trainch4_scaled_sup=series_to_supervised(X_trainch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_validch4_scaled_sup=series_to_supervised(X_validch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_testch4_scaled_sup=series_to_supervised(X_testch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_trainch4_scaled_sup.shape, X_validch4_scaled_sup.shape, X_testch4_scaled_sup.shape
# X_validch4_sup=series_to_supervised(X_validch4, lags=3, forecasting_steps=1, dropna=True)
# X_testch4_sup=series_to_supervised(X_testch4, lags=3, forecasting_steps=1, dropna=True)
# X_trainch4_scaled_sup.shape, X_validch4_sup.shape, X_testch4_sup.shape

# #SUPERVISION
# y_trainch4_scaled_sup=series_to_supervised(y_trainch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_validch4_scaled_sup=series_to_supervised(y_validch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_testch4_scaled_sup=series_to_supervised(y_testch4_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_trainch4_scaled_sup.shape, y_validch4_scaled_sup.shape, y_testch4_scaled_sup.shape

In [None]:
# Need to scale the features for neural networks, otherwise the training doesn't converge.
Xscalerch4 = StandardScaler()
trainXscch4 = Xscalerch4.fit_transform(trainXch4)
validXscch4 = Xscalerch4.transform(validXch4)
testXscch4 = Xscalerch4.transform(testXch4)

In [None]:
trainXscch4.shape, validXscch4.shape, testXscch4.shape

In [None]:
yscalerch4 = StandardScaler()
trainyscch4 = yscalerch4.fit_transform(pd.DataFrame(trainych4).to_numpy().reshape(1167721,1))
validyscch4 = yscalerch4.transform(pd.DataFrame(validych4).to_numpy().reshape(422779,1))
testyscch4 = yscalerch4.transform(pd.DataFrame(testych4).to_numpy().reshape(213347,1))

In [None]:
trainyscch4.shape, validyscch4.shape, testyscch4.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainXscch4ref=trainXscch4.reshape(1167721, 1, 279);
validXscch4ref=validXscch4.reshape(422779, 1, 279);
testXscch4ref=testXscch4.reshape(213347, 1, 279);

In [None]:
trainXscch4ref.shape, validXscch4ref.shape, testXscch4ref.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainyscch4ref=trainyscch4.reshape(1167721, 1, 1);
validyscch4ref=validyscch4.reshape(422779, 1, 1);
testyscch4ref=testyscch4.reshape(213347, 1, 1);

In [None]:
trainyscch4ref.shape, validyscch4ref.shape, testyscch4ref.shape

### CO2

In [None]:
# #df
# #df.loc[:, df.columns != 'CO2_1_2_1']
# trainXco2, trainyco2=df.loc[:'2017',df.columns != 'CO2_1_2_1'],df.loc[:'2017',df.columns == 'CO2_1_2_1']
# validXco2, validyco2=df.loc['2018':'2020',df.columns != 'CO2_1_2_1'],df.loc['2018':'2020',df.columns == 'CO2_1_2_1']
# testXco2, testyco2=df.loc['2021':,df.columns != 'CO2_1_2_1'],df.loc['2021':,df.columns == 'CO2_1_2_1']

# trainXco2=X_trainco2; trainyco2=y_trainco2; validXco2=X_validco2; validyco2=y_validco2; testXco2=X_testco2; 
# testyco2=y_testco2

# trainXco2, trainyco2=reframed_co2.loc[:'2017',:('CO2_1_2_1', 't+1')],reframed_co2.loc[:'2017',('CO2_1_2_1', 't+1'):]
# validXco2, validyco2=reframed_co2.loc['2018':'2020',:('CO2_1_2_1', 't+1')],reframed_co2.loc['2018':'2020',('CO2_1_2_1', 't+1'):]
# testXco2, testyco2=reframed_co2.loc['2021',:('CO2_1_2_1', 't+1')],reframed_co2.loc['2021',('CO2_1_2_1', 't+1'):]

# trainXco2, trainyco2=reframed_co2.iloc[:,:-1]['2006':'2016'],reframed_co2.iloc[:,-1:]['2006':'2016']
# validXco2, validyco2=reframed_co2.iloc[:,:-1]['2017':'2018'],reframed_co2.iloc[:,-1:]['2017':'2018']
# testXco2, testyco2=reframed_co2.iloc[:,:-1].loc['2019'],reframed_co2.iloc[:,-1:].loc['2019']

# trainXco2, trainyco2=reframed_co2.iloc[:,:-1][:'2017'],reframed_co2.iloc[:,-1:][:'2017']
# validXco2, validyco2=reframed_co2.iloc[:,:-1]['2018':'2020'],reframed_co2.iloc[:,-1:]['2018':'2020']
# testXco2, testyco2=reframed_co2.iloc[:,:-1]['2021':],reframed_co2.iloc[:,-1:]['2021':]

#len(reframed_co2.iloc[:,:-1]['2006':'2017'])#1019750#1354203
#len(reframed_co2.iloc[:,:-1]['2018'])#675091
#len(reframed_co2.iloc[:,:-1]['2019':])#668281
#len(reframed_co2)#2441237
#1354203/2441237

In [None]:
#2006-2019 (14 years)
#yco2.loc['2006':'2019']
#829750/1965628 #0.4221297213918402
#1019750/1965628 #0.5187909411139849
#yco2.loc['2006':'2017']
#1354203/1965628 #0.6889416512178296 #Use 2006-2017 for threshold based on distribution of points, but keeping time series order consistent (training)
#yco2.loc['2018']
#340638/1965628 #0.17329728717743134 #Use 2018 for threshold based on distribution of points, but keeping time series order consistent (training)
#yco2.loc['2019']
#270787/1965628 #0.13776106160473905 #Use 2019 for threshold based on distribution of points, but keeping time series order consistent (training)

In [None]:
#yco2['2003':'2021']
#1990868
#yco2['2003':'2018']
#711575/1990868
#1407684/1990868 #0.7070704838291639 #Use 2003-2018 for training
#yco2['2019':'2020']
#422779/1990868 #0.21235913179577953 #Use 2019-2020 for validation
#yco2.loc['2021',:]
#160405/1990868 #0.08057038437505651 #Use 2021 for testing

In [None]:
trainXco2, trainyco2=Xco2.loc['2003':'2018'],yco2.loc['2003':'2018']
validXco2, validyco2=Xco2.loc['2019':'2020'],yco2.loc['2019':'2020']
testXco2, testyco2=Xco2.loc['2021':],yco2.loc['2021':]

In [None]:
print(trainXco2.shape, trainyco2.shape);
print(validXco2.shape, validyco2.shape);
print(testXco2.shape, testyco2.shape);

In [None]:
# Need to scale the features for neural networks, otherwise the training doesn't converge.
Xscalerco2 = StandardScaler()
trainXscco2 = Xscalerco2.fit_transform(trainXco2)
validXscco2 = Xscalerco2.transform(validXco2)
testXscco2 = Xscalerco2.transform(testXco2)

In [None]:
trainXscco2.shape, validXscco2.shape, testXscco2.shape

In [None]:
yscalerco2 = StandardScaler()
trainyscco2 = yscalerco2.fit_transform(pd.DataFrame(trainyco2).to_numpy().reshape(1407684,1))
validyscco2 = yscalerco2.transform(pd.DataFrame(validyco2).to_numpy().reshape(422779,1))
testyscco2 = yscalerco2.transform(pd.DataFrame(testyco2).to_numpy().reshape(213347,1))

In [None]:
trainyscco2.shape, validyscco2.shape, testyscco2.shape

In [None]:
# # Need to scale the features for neural networks, otherwise the training doesn't converge.
# Xscalerco2 = StandardScaler()
# Xscalerco2.fit(X_trainco2)
# X_train_scaledco2 = Xscalerco2.transform(X_trainco2)
# X_valid_scaledco2 = Xscalerco2.transform(X_validco2)
# X_test_scaledco2 = Xscalerco2.transform(X_testco2)

# yscalerco2 = StandardScaler()
# yscalerco2.fit(y_trainco2)
# y_train_scaledco2 = yscalerco2.transform(y_trainco2)
# y_valid_scaledco2 = yscalerco2.transform(y_validco2)
# y_test_scaledco2 = yscalerco2.transform(y_testco2)

In [None]:
# #SUPERVISION
# X_trainco2_scaled_sup=series_to_supervised(X_trainco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_validco2_scaled_sup=series_to_supervised(X_validco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_testco2_scaled_sup=series_to_supervised(X_testco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# # X_trainco2_scaled_sup.shape, X_validco2_scaled_sup.shape, X_testco2_scaled_sup.shape
# X_validco2_sup=series_to_supervised(X_validco2, lags=3, forecasting_steps=1, dropna=True)
# X_testco2_sup=series_to_supervised(X_testco2, lags=3, forecasting_steps=1, dropna=True)
# X_trainco2_scaled_sup.shape, X_validco2_sup.shape, X_testco2_sup.shape

# #SUPERVISION
# y_trainco2_scaled_sup=series_to_supervised(y_trainco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_validco2_scaled_sup=series_to_supervised(y_validco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_testco2_scaled_sup=series_to_supervised(y_testco2_scaled, lags=3, forecasting_steps=1, dropna=True)
# y_trainco2_scaled_sup.shape, y_validco2_scaled_sup.shape, y_testco2_scaled_sup.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainXscco2ref=trainXscco2.reshape(1407684, 1, 279);
validXscco2ref=validXscco2.reshape(422779, 1, 279);
testXscco2ref=testXscco2.reshape(213347, 1, 279);

In [None]:
trainXscco2ref.shape, validXscco2ref.shape, testXscco2ref.shape

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainyscco2ref=trainyscco2.reshape(1407684, 1, 1);
validyscco2ref=validyscco2.reshape(422779, 1, 1);
testyscco2ref=testyscco2.reshape(213347, 1, 1);

In [None]:
trainyscco2ref.shape, validyscco2ref.shape, testyscco2ref.shape

### Archive

In [None]:
# scaler=StandardScaler()
# scaled_train=scaler.fit_transform(train)
# scaled_valid=scaler.transform(valid)
# scaled_test=scaler.transform(test)

In [None]:
# scaled_train=pd.DataFrame(scaled_train)
# scaled_valid=pd.DataFrame(scaled_valid)
# scaled_test=pd.DataFrame(scaled_test)

In [None]:
# scaled_train.index=train.index
# scaled_valid.index=valid.index
# scaled_test.index=test.index

In [None]:
#df
df=df.sort_index()

In [None]:
X_trainalt, y_trainalt, X_validalt, y_validalt, X_testalt, y_testalt = Xalt.loc['1969':'2017',:], yalt.loc['1969':'2017',:], Xalt.loc['2018':'2020', :], \
yalt.loc['2018':'2020',:], Xalt.loc['2021':, :], yalt.loc['2021':,:]
#never outputs last mentioned key 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)#, random_state=1)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
X_trainch4, y_trainch4, X_validch4, y_validch4, X_testch4, y_testch4 = Xch4.loc['2011':'2018',:], ych4.loc['2011':'2018',:], Xch4.loc['2019':'2020',:], \
ych4.loc['2019':'2020',:], Xch4.loc['2021':,:], ych4.loc['2021':,:]
# X_trainch4, X_testch4, y_trainch4, y_testch4 = train_test_split(Xch4, ych4, test_size=0.1)#, random_state=1)
# X_trainch4, X_validch4, y_trainch4, y_validch4 = train_test_split(X_trainch4, y_trainch4, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
X_trainco2, y_trainco2, X_validco2, y_validco2, X_testco2, y_testco2 = Xco2.loc['2006':'2016',:], yco2.loc['2006':'2016',:], \
Xco2.loc['2017':'2018',:], yco2.loc['2017':'2018',:], Xco2.loc['2019':,:], yco2.loc['2019':,:]
# X_trainco2, X_testco2, y_trainco2, y_testco2 = train_test_split(Xco2, yco2, test_size=0.1)#, random_state=1)
# X_trainco2, X_validco2, y_trainco2, y_validco2 = train_test_split(X_trainco2, y_trainco2, test_size=0.22)#, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
#plt.plot(yalt['1969':'2022'])#54 years
#plt.plot(ych4['2011':'2021'])#11 years
#plt.plot(yco2['2006':'2019'])#14 years
#yco2['2006':'2017']#540085
#1354203/2441237

In [None]:
trainXalt, trainyalt=df.loc[:'2017',df.columns != 'ALT'],df.loc[:'2017',df.columns == 'ALT']
validXalt, validyalt=df.loc['2018':'2020',df.columns != 'ALT'],df.loc['2018':'2020',df.columns == 'ALT']
testXalt, testyalt=df.loc['2021':,df.columns != 'ALT'],df.loc['2021':,df.columns == 'ALT']

# Step 8: Data Structure | Model Tuning and Development

In [None]:
# #IN SITU
# reframed_alt.shape, type(reframed_alt), reframed_ch4.shape, type(reframed_ch4), reframed_co2.shape, type(reframed_co2)

# #SIBBORK
# sibbork.shape, type(sibbork)

# #TCFM
# ch4df.to_numpy().shape, type(ch4df.to_numpy())
# co2df.to_numpy().shape, type(co2df.to_numpy())

# #AVIRISNG
# aviris_arr_list
# aviris_arr_list[0].shape, type(aviris_arr_list[0])
# #aviris_arr_list[0].shape, type(aviris_arr_list[0])
# #aviris_arr_list[1].shape, type(aviris_arr_list[0])
# #aviris_arr_list[2].shape, type(aviris_arr_list[0])
# #aviris_arr_list[3].shape, type(aviris_arr_list[0])
# #aviris_raster_list[0].shape, aviris_arr_list[0].shape #filtered by goodbandlist

# #UAVSAR
# #plt.imshow(uavsar_arr_list[0][0])
# uavsar_arr_list
# uavsar_arr_list[0].shape, type(uavsar_arr_list[0])

### Workflow

In [None]:
#ConvLSTM1D Input shape
# If data_format='channels_first' 4D tensor with shape: (samples, time, channels, rows)
# If data_format='channels_last' 4D tensor with shape: (samples, time, rows, channels)
#'channels_last' (FEATURES)

### Restructure Data

In [None]:
trainXscaltref = np.reshape(trainXscalt, (trainXscalt.shape[0], 1, trainXscalt.shape[1]))
validXscaltref = np.reshape(validXscalt, (validXscalt.shape[0], 1, validXscalt.shape[1]))
testXscaltref = np.reshape(testXscalt, (testXscalt.shape[0], 1, testXscalt.shape[1]))
trainXscch4ref = np.reshape(trainXscch4, (trainXscch4.shape[0], 1, trainXscch4.shape[1]))
validXscch4ref = np.reshape(validXscch4, (validXscch4.shape[0], 1, validXscch4.shape[1]))
testXscch4ref = np.reshape(testXscch4, (testXscch4.shape[0], 1, testXscch4.shape[1]))
trainXscco2ref = np.reshape(trainXscco2, (trainXscco2.shape[0], 1, trainXscco2.shape[1]))
validXscco2ref = np.reshape(validXscco2, (validXscco2.shape[0], 1, validXscco2.shape[1]))
testXscco2ref = np.reshape(testXscco2, (testXscco2.shape[0], 1, testXscco2.shape[1]))

In [None]:
trainyscaltref = np.reshape(trainyscalt, (trainyscalt.shape[0], 1, trainyscalt.shape[1]))
validyscaltref = np.reshape(validyscalt, (validyscalt.shape[0], 1, validyscalt.shape[1]))
testyscaltref = np.reshape(testyscalt, (testyscalt.shape[0], 1, testyscalt.shape[1]))
trainyscch4ref = np.reshape(trainyscch4, (trainyscch4.shape[0], 1, trainyscch4.shape[1]))
validyscch4ref = np.reshape(validyscch4, (validyscch4.shape[0], 1, validyscch4.shape[1]))
testyscch4ref = np.reshape(testyscch4, (testyscch4.shape[0], 1, testyscch4.shape[1]))
trainyscco2ref = np.reshape(trainyscco2, (trainyscco2.shape[0], 1, trainyscco2.shape[1]))
validyscco2ref = np.reshape(validyscco2, (validyscco2.shape[0], 1, validyscco2.shape[1]))
testyscco2ref = np.reshape(testyscco2, (testyscco2.shape[0], 1, testyscco2.shape[1]))

In [None]:
print(trainXscaltref.shape, validXscaltref.shape, testXscaltref.shape)
print(trainyscaltref.shape, validyscaltref.shape, testyscaltref.shape)
print(trainXscch4ref.shape, validXscch4ref.shape, testXscch4ref.shape)
print(trainyscch4ref.shape, validyscch4ref.shape, testyscch4ref.shape)
print(trainXscco2ref.shape, validXscco2ref.shape, testXscco2ref.shape)
print(trainyscco2ref.shape, validyscco2ref.shape, testyscco2ref.shape)

In [None]:
#print(list(divisorGenerator(1426178)))

In [None]:
# inputsalt = keras.Input(shape=(trainXscaltref.shape[1],trainXscaltref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
# inputsch4 = keras.Input(shape=(trainXscch4ref.shape[0], trainXscch4ref.shape[1],trainXscch4ref.shape[2]))#, Xch4train.shape[3], Xch4train.shape[4]))
# inputsco2 = keras.Input(shape=(trainXscco2ref.shape[1],trainXscco2ref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
# inputsalt.shape, inputsch4.shape, inputsco2.shape

### Quick check on data structure and simple model

In [None]:
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience = 2, verbose=1,factor=0.75, min_lr=0.00001)

In [None]:
model = Sequential()
model.add(ConvLSTM1D(filters=64, kernel_size=(1), input_shape=(1, 1, 279)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', 
              loss='mse',
              metrics=['mae', 'mse'])

In [None]:
model.summary()

In [None]:
#model1=model; history1=history

In [None]:
model.save("convlstm1d.h5")

In [None]:
history = model.fit(
    trainXscaltref, trainyscaltref,
    epochs=10,
    batch_size=256,
    validation_data=(validXscaltref, validyscaltref),
    callbacks=[learning_rate_reduction]
)

In [None]:
plt.plot(history.history["loss"])
#plt.plot(history.history["val_loss"])
plt.title("Loss Function")
plt.legend(["Training Data (ALT)" , "Validation Data (ALT)"])
plt.show()

In [None]:
plt.plot(history.history["mae"])
#plt.plot(history.history["val_mae"])
plt.title("Error Metrics (MAE)")
plt.legend(["Training Data (ALT)" , "Validation Data (ALT)"])
plt.show()

In [None]:
plt.plot(history.history["mse"])
#plt.plot(history.history["val_mse"])
plt.title("Error Metrics (MSE)")
plt.legend(["Training Data (ALT)" , "Validation Data (ALT)"])
plt.show()

In [None]:
history2 = model.fit(
    trainXscch4ref, trainyscch4ref,
    epochs=10,
    batch_size=256,
    validation_data=(validXscch4ref, validyscch4ref),
    callbacks=[learning_rate_reduction]
)

In [None]:
plt.plot(history2.history["loss"])
#plt.plot(history.history["val_loss"])
plt.title("Loss Function")
plt.legend(["Training Data (CH4)" , "Validation Data (CH4)"])
plt.show()

In [None]:
plt.plot(history2.history["mae"])
#plt.plot(history.history["val_mae"])
plt.title("Error Metrics (MAE)")
plt.legend(["Training Data (CH4)" , "Validation Data (CH4)"])
plt.show()

In [None]:
plt.plot(history2.history["mse"])
#plt.plot(history.history["val_mse"])
plt.title("Error Metrics (MSE)")
plt.legend(["Training Data (CH4)" , "Validation Data (CH4)"])
plt.show()

In [None]:
history3 = model.fit(
    trainXscco2ref, trainyscco2ref,
    epochs=10,
    batch_size=256,
    validation_data=(validXscco2ref, validyscco2ref),
    callbacks=[learning_rate_reduction]
)

In [None]:
plt.plot(history3.history["loss"])
#plt.plot(history.history["val_loss"])
plt.title("Loss Function")
plt.legend(["Training Data (CO2)" , "Validation Data (CO2)"])
plt.show()

In [None]:
plt.plot(history2.history["mae"])
#plt.plot(history.history["val_mae"])
plt.title("Error Metrics (MAE)")
plt.legend(["Training Data (CH4)" , "Validation Data (CH4)"])
plt.show()

In [None]:
plt.plot(history2.history["mse"])
#plt.plot(history.history["val_mse"])
plt.title("Error Metrics (MSE)")
plt.legend(["Training Data (CH4)" , "Validation Data (CH4)"])
plt.show()

In [None]:
######## HAVE TO INVERSE_SCALER BEFORE THIS ########

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import log

yhat = model.predict(testXscaltref)

In [None]:
r2 = r2_score(testyscaltref.reshape(213347,1),yhat)
mse = mean_squared_error(testyscaltref.reshape(213347,1), yhat)
mae = mean_absolute_error(testyscaltref.reshape(213347,1),yhat)
rmse = np.sqrt(mse)

print("MSE :", mse)
print("MAE :" ,mae)
print("RMSE :" ,rmse)
print("R2_score :",r2)

In [None]:
x= trainXscaltref[-10000:]
print(x.shape)
x = x.reshape((10000,1,1,279))
y_pred = model.predict(x)[-10000:]
y = trainyscaltref[-10000:]
print(y.shape)

In [None]:
plt.plot(y_pred , "-")
plt.plot(y.reshape(10000,))
plt.legend(["pred","actual"])
plt.show()

#### Exploratory Plotting

In [None]:
dups3

In [None]:
dups3_2

In [None]:
dups4

In [None]:
dups4_2

In [None]:
plt.plot(dups3_2['ALT_diff'].replace(-9999,np.nan).dropna().values)

In [None]:
plt.plot(dups4_2['ALT_diff'].replace(-9999,np.nan).dropna().values)

In [None]:
dups3_mean=dups3_2.groupby(dups3_2.index).mean()
dups3_std=dups3_2.groupby(dups3_2.index).std()

In [None]:
plt.plot(dups3_mean['ALT'])

In [None]:
plt.plot(dups3_mean['ALT_diff'])

In [None]:
dups4_mean=dups4_2.groupby(dups4_2.index).mean()
dups4_std=dups4_2.groupby(dups4_2.index).std()

In [None]:
plt.plot(dups4_mean['ALT'])

In [None]:
plt.plot(dups4_mean['ALT_diff'])

##### Continue

In [None]:
# Need to scale the features for neural networks, otherwise the training doesn't converge.
scale = StandardScaler()
exsc = scale.fit_transform(ex_mean)
exscdf=pd.DataFrame(exsc)
exscdf.columns=ex_mean.columns
exscdf.index=ex_mean.index

In [None]:
altdf=pd.DataFrame(ex['ALT'])

In [None]:
plt.plot(ex_mean['ALT']['2002':], label='ALT')
plt.legend()
plt.show()

#ex_mean['ALT'] #1969-2022

In [None]:
plt.plot(ex['ALT']['2002':], label='ALT')
plt.legend()
plt.show()

In [None]:
d1=pd.DataFrame(ex['CH4_1_1_2'])
d2=pd.DataFrame(ex['CH4_1_2_1'])
d3=pd.DataFrame(ex['CH4_1_3_1'])
d4=pd.DataFrame(ex['CH4_1_4_1'])
d5=pd.concat([d1,d2,d3,d4], axis=1)
ind=d1.index.append(d1.index).append(d1.index).append(d1.index)
d6=pd.DataFrame(d5.stack(level=0).reset_index(level=0, drop=True))
d6.index=ind
d6=d6.sort_index()
d6.columns=['CH4']
ch4df=d6

In [None]:
d1=pd.DataFrame(exscdf['CH4_1_1_2'])
d2=pd.DataFrame(exscdf['CH4_1_2_1'])
d3=pd.DataFrame(exscdf['CH4_1_3_1'])
d4=pd.DataFrame(exscdf['CH4_1_4_1'])
d5=pd.concat([d1,d2,d3,d4], axis=1)
ind=d1.index.append(d1.index).append(d1.index).append(d1.index)
d6=pd.DataFrame(d5.stack(level=0).reset_index(level=0, drop=True))
d6.index=ind
d6=d6.sort_index()
d6.columns=['CH4']
ch4dfmean=d6

In [None]:
plt.plot(ex_mean['CH4_1_1_1']['2002':], label='CH4_1_1_1')
plt.plot(ex_mean['CH4_1_1_2']['2002':], label='CH4_1_1_2')
plt.plot(ex_mean['CH4_1_2_1']['2002':], label='CH4_1_2_1')
plt.plot(ex_mean['CH4_1_3_1']['2002':], label='CH4_1_3_1')
plt.plot(ex_mean['CH4_1_4_1']['2002':], label='CH4_1_4_1')
plt.legend()
plt.show()

####ex_mean['CH4_1_1_1'] #2002-2022 
#ex_mean['CH4_1_1_2'] #2010-2022 
#ex_mean['CH4_1_2_1'] #2005-2020 
#ex_mean['CH4_1_3_1'] #2002-2020 
#ex_mean['CH4_1_4_1'] #2005-2019 

In [None]:
plt.plot(ex['CH4_1_1_1']['2002':], label='CH4_1_1_1')
plt.plot(ex['CH4_1_1_2']['2002':], label='CH4_1_1_2')
plt.plot(ex['CH4_1_2_1']['2002':], label='CH4_1_2_1')
plt.plot(ex['CH4_1_3_1']['2002':], label='CH4_1_3_1')
plt.plot(ex['CH4_1_4_1']['2002':], label='CH4_1_4_1')
plt.legend()
plt.show()

In [None]:
d1=pd.DataFrame(ex['CO2_1_2_1'])
d2=pd.DataFrame(ex['CO2_1_3_1'])
d3=pd.DataFrame(ex['CO2_1_4_1'])
d4=pd.concat([d1,d2,d3], axis=1)
ind=d1.index.append(d1.index).append(d1.index)
d5=pd.DataFrame(d4.stack(level=0).reset_index(level=0, drop=True))
d5.index=ind
d5=d5.sort_index()
d5.columns=['CO2']
co2df=d5

In [None]:
d1=pd.DataFrame(exscdf['CO2_1_2_1'])
d2=pd.DataFrame(exscdf['CO2_1_3_1'])
d3=pd.DataFrame(exscdf['CO2_1_4_1'])
d4=pd.concat([d1,d2,d3], axis=1)
ind=d1.index.append(d1.index).append(d1.index)
d5=pd.DataFrame(d4.stack(level=0).reset_index(level=0, drop=True))
d5.index=ind
d5=d5.sort_index()
d5.columns=['CO2']
co2dfmean=d5

In [None]:
plt.plot(ex_mean['CO2_1_1_1']['2002':], label='CO2_1_1_1')
plt.plot(ex_mean['CO2_1_2_1']['2002':], label='CO2_1_2_1')
plt.plot(ex_mean['CO2_1_3_1']['2002':], label='CO2_1_3_1')
plt.plot(ex_mean['CO2_1_4_1']['2002':], label='CO2_1_4_1')
plt.legend()
plt.show()

####ex_mean['CO2_1_1_1'] #2002-2022 
#ex_mean['CO2_1_2_1'] #2005-2020 
#ex_mean['CO2_1_3_1'] #2002-2020 
#ex_mean['CO2_1_4_1'] #2005-2022

In [None]:
plt.plot(ex['CO2_1_1_1']['2002':], label='CO2_1_1_1')
plt.plot(ex['CO2_1_2_1']['2002':], label='CO2_1_2_1')
plt.plot(ex['CO2_1_3_1']['2002':], label='CO2_1_3_1')
plt.plot(ex['CO2_1_4_1']['2002':], label='CO2_1_4_1')
plt.legend()
plt.show()

In [None]:
plt.plot(altdfmean['ALT'], label='ALT')
plt.plot(ch4dfmean['CH4'], label='CH4')
plt.plot(co2dfmean['CO2'], label='CO2')
plt.legend()
plt.show()

In [None]:
altdfmean=pd.DataFrame(exscdf['ALT'])
ch4dfmean2=ch4dfmean.groupby(ch4dfmean.index).mean()
co2dfmean2=co2dfmean.groupby(co2dfmean.index).mean()
newdf=pd.concat([altdfmean,ch4dfmean2,co2dfmean2], axis=1)

In [None]:
plt.plot(ch4df['2002':].values) #2002-2022
plt.show()

In [None]:
plt.plot(co2df['2002':].values) #2002-2022
plt.show()

In [None]:
plt.plot(altdf['2002':].values) #2002-2022
plt.show()

In [None]:
#dataf=pd.concat([altdf,ch4df,co2df],axis=1)
dataf=pd.concat([altdf,ch4df,co2df]).reset_index()
dataf.index=dataf['index']
dataf=dataf.drop(['index'], axis=1)
dataf.index.name=None

In [None]:
newind=dataf.index.strftime('%Y')
newind=pd.DataFrame(newind)[0].astype(int).values
dataf.index=newind

In [None]:
dataf

In [None]:
pd.DataFrame(ex['ALT'])

In [None]:
newex=ex.drop(ex.iloc[:,-87:-66],axis=1)
newex=ex.drop(ex.iloc[:,-1:],axis=1)

In [None]:
#plt.plot(ex['FCH4_1'])
#plt.plot(ex['FCH4_2'])
#THOSE BELOW WERE ELIMINATED VIA VIF
#plt.plot(exscdf['FCH4_3'])
#plt.plot(exscdf['CH4_1_1_1'])
#plt.plot(exscdf['CH4_1_3_1'])

In [None]:
# ex['ALT'] #1969-2022 (54 years)
# ex['FCH4_1']['2011':] #2011-2022 (12 years)
# ex['CO2_1_1_1']['2003':'2021'] #2003-2021 (19 years)

In [None]:
# X=newdata['CO2_1_1_1']['2003':'2021'].values
# X1=pd.DataFrame(X[0:round(len(X)/2)]).dropna().values
# X2=pd.DataFrame(X[round(len(X)/2):]).dropna().values
# #X1=X[0:round(len(X)/2)]

# Xl=np.log(newdata['CO2_1_1_1']['2003':'2021']).replace([np.inf,-np.inf], np.nan).dropna().values
# X1l=pd.DataFrame(Xl[:round(len(Xl)/2)])
# X2l=pd.DataFrame(Xl[round(len(Xl)/2):])

#X.shape, Xl.shape
#((2369273,), (2337947,))
#X1.shape, X2.shape
#((1184636, 1), (1184637, 1))
#X1.max(), X1.min(), X2.max(), X2.min()
#(796.6, -7.945502, 986.0, -3.083879976)
#X1l.shape, X2l.shape
#((1168974, 1), (1168973, 1))
#X1l.max(), X1l.min(), X2l.max(), X2l.min()
#(6.680352670747544, -6.437751649736401, 6.893656354602635, -4.000854219134761)

#plt.plot(X)

#plt.plot(Xl)

#adfuller(Xl, autolag='AIC')

#adf=ADF(X,method='aic',low_memory=True)
#adf=ADF(Xl,method='aic',low_memory=True)
# adf=ADF(X,method='aic',low_memory=True)
# print(adf.summary().as_text())
# adf

In [None]:
newdata.iloc[:,-57:-51]

In [None]:
newdata.iloc[:,-57:-51]['CO2_1_1_1'].plot()
#newdata.iloc[:,-57:-51]['CO2_1_3_1'].plot()
#newdata.iloc[:,-57:-51]['CO2_1_4_1'].plot()

In [None]:
newdata.iloc[:,-51:-43]

In [None]:
#newdata.iloc[:,-51:-43]['CH4_1_1_1'].plot()
#newdata.iloc[:,-51:-43]['CH4_1_3_1'].plot()

newdata.iloc[:,-51:-43]['FCH4_1'].plot()
#newdata.iloc[:,-51:-43]['FCH4_2'].plot()
#newdata.iloc[:,-51:-43]['FCH4_3'].plot()

In [None]:
#newdata.iloc[:,-51:-43]['FCH4_1'].plot()
#newdata.iloc[:,-51:-43]['FCH4_2'].plot()
#newdata.iloc[:,-51:-43]['FCH4_3'].plot()

In [None]:
newdata.iloc[:,-1:]

In [None]:
newdata.iloc[:,-1:]['ALT'].plot()

In [None]:
#newdata[['CH4_1_1_1','CH4_1_3_1','FCH4_1','FCH4_2','FCH4_3','CH4_MEAN','CH4_MIXING_RATIO_MEAN','CH4_MIXING_RATIO_STD','CO2_MEAN','CO2_1_1_1','CO2_1_3_1','CO2_1_4_1','soil_[CO2]_5cm','soil_[CO2]_15cm','ALT']].corr()
#rho=newdata[['CH4_1_1_1','CH4_1_3_1','FCH4_1','FCH4_2','FCH4_3','CO2_MEAN','CO2_1_1_1','CO2_1_3_1','CO2_1_4_1','ALT']].corr()
#newdata[['CH4_1_1_1','CH4_1_3_1','FCH4_1','FCH4_2','FCH4_3','CO2_MEAN','CO2_1_1_1','CO2_1_3_1','CO2_1_4_1','ALT']].corr(method=lambda x, y: pearsonr(x, y)[1]) - np.eye(*rho.shape)
#pearsonr(newdata['FCH4_1'],newdata['ALT'])

In [None]:
ls=[]
ls2=[]
for i in range(len(newdata.columns)):
    ls=np.append(ls,newdata.iloc[:,i].min())
    ls2=np.append(ls2,newdata.iloc[:,i].max())

In [None]:
n=[0,1,2,11,14,22,28,30]
ls[n]

In [None]:
# newdata.iloc[:,0] #WS_MEAN
# newdata.iloc[:,1] #WD_MEAN
# newdata.iloc[:,2] #CO2_MEAN
# newdata.iloc[:,11] #FCH4_1
# newdata.iloc[:,14] #CH4_MIXING_RATIO_MEAN
# newdata.iloc[:,22] #SW_IN_MEAN
# newdata.iloc[:,28] #G_1_1_1
# newdata.iloc[:,30] #G_1_1_3

In [None]:
#CHANGE LABELS AND SWITCH WS_MEAN AND WS_MEAN!!!

In [None]:
newdata2=newdata[~(newdata['FCH4_1'] < -500)].dropna()
newdata3=newdata2[~(newdata2['FCH4_1'] > 500)].dropna()
newdata4=newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna()
#newdata4=newdata3[~(newdata3['WD_MEAN'] < -360)].dropna()
#newdata5=newdata4[~(newdata4['WD_MEAN'] > 360)].dropna()
newdata6=newdata5[~(newdata5['WS_MEAN'] < 0)].dropna()

In [None]:
newdata.WS_MEAN.mean(), newdata.WD_MEAN.mean()a

In [None]:
#plt.hist(newdata3['WD_MEAN'].values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna().WD_MEAN.values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] > 1500)].dropna().WD_MEAN.values)
plt.hist(newdata3['WS_MEAN'].values)
#plt.hist(newdata3[~(newdata3['WD_MEAN'] < -1725)].dropna().WD_MEAN.values)

In [None]:
newdata5=newdata4[~(newdata4['WD_MEAN'] <= -361)].dropna()
newdata6=newdata5[(newdata5['WD_MEAN'] > 360)].dropna()

In [None]:
newdata[~(newdata['WD_MEAN'] > 360)].WD_MEAN

In [None]:
len(newdata['WD_MEAN'])

In [None]:
newdata[~(newdata['WD_MEAN'] <= -361)].WD_MEAN

In [None]:
newdata4[(newdata4['WD_MEAN'] < 360)].dropna().WD_MEAN.plot()

In [None]:
#newdata=newdata.drop(columns=['ALT_diff','CO2_diff','CH4_diff'],axis=1)
newdata2=newdata[~(newdata['WD_MEAN'] <= -361)].dropna()
newdata3=newdata2[~(newdata2['WS_MEAN'] <= -1993)].dropna()
newdata4=newdata3[~(newdata3['CO2_MEAN'] <= 0)].dropna()
newdata6=newdata5[~(newdata5['CH4_MIXING_RATIO_MEAN'] <= -1018)].dropna()
newdata7=newdata6[~(newdata6['SW_IN_MEAN'] <= -6)].dropna()
newdata8=newdata7[~(newdata7['G_1_1_1'] <= 0)].dropna()
newdata9=newdata8[~(newdata8['G_1_1_3'] <= 0)].dropna()
newdata10=newdata9[~(newdata9['CO2_MEAN'] > 586)]
del newdata2, newdata3, newdata4, newdata5, newdata6, newdata7, newdata8, newdata9

In [None]:
newdata[~(newdata['WD_MEAN'] <= -361)]

In [None]:
newdata[~(newdata['WD_MEAN'] <= -1725)].dropna().WD_MEAN.min()

In [None]:
newdata3.max()

In [None]:
#newdata10[~(newdata10['CO2_MEAN'] < 0)].dropna().CO2_MEAN.min()
newdata[~(newdata['FCH4_1'] <= -500)].dropna().FCH4_1.min()
newdata[~(newdata['FCH4_1'] > 500)].dropna().FCH4_1.max()

In [None]:
sns.kdeplot(newdata.WD_MEAN)

In [None]:
plt.plot(newdata3.FCH4_1.values)

In [None]:
plt.hist(newdata3['FCH4_1'].values)

In [None]:
newdata3['FCH4_1'].std()

In [None]:
newdata.drop(columns=['CO2_MEAN'],axis=1)

## Model Tuning

### Archived

#### Archived

In [None]:
Xalt.shape

In [None]:
inputs = keras.Input(shape=(trainXscaltref.shape[1],trainXscaltref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
inputs.shape

inputs = keras.Input(shape=(trainXscch4ref.shape[0], trainXscch4ref.shape[1],trainXscch4ref.shape[2]))#, Xch4train.shape[3], Xch4train.shape[4]))
inputs.shape

inputs = keras.Input(shape=(trainXscco2ref.shape[1],trainXscco2ref.shape[2]))#, X_train_reframed_sup.shape[2]))#, X_train_reframed_sup.shape[3]))
inputs.shape

In [None]:
validx.shape, validy.shape

In [None]:
inputs = keras.Input(shape=(trainx.shape[0], trainx.shape[1],trainx.shape[4], trainx.shape[2], trainx.shape[3]))
inputs.shape

In [None]:
targets = keras.Input(shape=(trainy.shape[0],trainy.shape[1],trainy.shape[4], trainy.shape[2],trainy.shape[3]))
targets.shape

In [None]:
inputs2 = keras.Input(shape=(validx.shape[0],validx.shape[1],validx.shape[4], validx.shape[2],validx.shape[3]))
inputs2.shape

In [None]:
targets2 = keras.Input(shape=(validy.shape[0],validy.shape[1],validy.shape[4], validy.shape[2],validy.shape[3]))
targets2.shape

In [None]:
inputs3 = keras.Input(shape=(testx.shape[0],testx.shape[1],testx.shape[4], testx.shape[2],testx.shape[3]))
inputs3.shape

In [None]:
targets3 = keras.Input(shape=(testy.shape[0],testy.shape[1],testy.shape[4], testy.shape[2],testy.shape[3]))
targets3.shape

In [None]:
inputs.shape, trainx.shape, targets.shape, trainy.shape

In [None]:
validx.shape, validy.shape

In [None]:
trainx=tf.reshape(trainx,[1,trainx.shape[0], trainx.shape[1],trainx.shape[4],trainx.shape[2], trainx.shape[3]])

In [None]:
validx=tf.reshape(validx,[1,validx.shape[0], validx.shape[1],validx.shape[4], validx.shape[2], validx.shape[3]])

In [None]:
testx=tf.reshape(testx,[1,testx.shape[0], testx.shape[1],testx.shape[4], testx.shape[2], testx.shape[3]])

In [None]:
trainy=tf.reshape(trainy,[1,trainy.shape[0], trainy.shape[1],trainy.shape[4], trainy.shape[2], trainy.shape[3]])

In [None]:
validy=tf.reshape(validy,[1,trainy.shape[0], validy.shape[1],validy.shape[4], validy.shape[2], validy.shape[3]])

In [None]:
testy=tf.reshape(testy,[1,testy.shape[0], testy.shape[1],testy.shape[4], testy.shape[2], testy.shape[3]])

In [None]:
trainXscrefr.shape

#### Start

In [None]:
print(list(divisorGenerator(132*8*8291)))

In [None]:
initinputs=Xtrainsc_sup.values.reshape(8291, 53, 94, 20, 1)#15,30,7129,154,1)

In [None]:
initinputs.shape

In [None]:
#keras.Input(shape=(inputs.shape[0],inputs.shape[1], inputs.shape[2],inputs.shape[3], inputs.shape[4]))#, df2.shape[5]))
#inputs

In [None]:
# model.layers[0].input_shape

In [None]:
#########If data_format='channels_first' 6D tensor with shape: (samples, time, channels, rows, cols, depth)
#Input shape:
#If data_format='channels_last' 5D tensor with shape: (samples <inputs takes care of batch_size for us with None>, 
#time, rows, cols, depth, channels)

None, 382, None, 552, 4, 6, 128 </br>
timestep, width, height, number ofthe spectral band, kernel size and depth

#### Start here

In [None]:
file = os.path.join('/Users/bgay/Downloads/tcfmch4.pkl')
with open(file, 'rb') as f:
    df=pickle.load(f)

In [None]:
df

In [None]:
file = os.path.join('/Users/bgay/Downloads/tcfmch4.pkl')
with open(file, 'rb') as f:
    df2=pickle.load(f)

In [None]:
df2

In [None]:
df2=df.resample('D').mean()

In [None]:
plt.plot(df2.ch4)

In [None]:
df

In [None]:
df2

In [None]:
ch4=series_to_supervised(df2,30,30)

In [None]:
ch4=ch4.sort_index()

In [None]:
df2

In [None]:
ch4

In [None]:
df3=df2.values

In [None]:
X, y = split_sequence(df3, 365)

In [None]:
X.shape, y.shape

In [None]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [None]:
X.shape, y.shape

In [None]:
ch4

In [None]:
ch4.iloc[:,-30:]

#### Prepare

In [None]:
df2

In [None]:
trainX,testX,trainy,testy = train_test_split(df, list(range(df.shape[0])), test_size=0.1, shuffle=False)
trainX, validX, trainy, validy = train_test_split(trainX, trainy, test_size=0.2, shuffle=False)

In [None]:
trainy=np.array(trainy).reshape(3416,1)
validy=np.array(validy).reshape(854,1)
testy=np.array(testy).reshape(475,1)

In [None]:
# Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.1, random_state=1)
# Xtrain, Xvalid, ytrain, yvalid = train_test_split(Xtrain, ytrain, test_size=0.05, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
# trainX, testX, trainy, testy = train_test_split(df2.iloc[:,:-365], df2.iloc[:,-365:], test_size=0.1)
# trainX, validX, trainy, validy = train_test_split(trainX, trainy, test_size=0.2)

In [None]:
# trainX, trainy=df.T.iloc[:,:3285],df.T.iloc[:,3285:]
# validX, validy=df.T.iloc[:,3285:4380],df.T.iloc[:,3285:4380]
# testX, testy=df.T.iloc[:,:-365],df.T.iloc[:,-365:]

In [None]:
print(trainX.shape), print(trainy.shape); print(validX.shape), print(validy.shape); print(testX.shape), print(testy.shape);

In [None]:
# Need to scale the features for neural networks, otherwise the training doesn't converge.
scaler = StandardScaler()
trainXsc = scaler.fit_transform(trainX)

In [None]:
validXsc = scaler.transform(validX)

In [None]:
testXsc = scaler.transform(testX)

In [None]:
print(trainXsc.shape), print(trainy.shape); print(validXsc.shape), print(validy.shape); print(testXsc.shape), print(testy.shape);

In [None]:
file = os.path.join('/Users/bgay/tcfm_ch4_2.pkl')
with open(file, 'rb') as f:
    ch4=pickle.load(f)

In [None]:
ch4_2=ch4.unstack()

In [None]:
del ch4

In [None]:
ch4_2

In [None]:
del ch4_2

In [None]:
ch4_2.y.shape, ch4_2.x.shape

In [None]:
2188*871

In [None]:
# Reshaped for LSTM later on...renamed to reframed to allow rescaled venture through regression
trainXscref=trainXsc.reshape(3416, 628766, 1, 1);
trainyref=trainy.reshape(3416, 1, 1, 1);
validXscref=validXsc.reshape(854, 628766, 1, 1);
validyref=validy.reshape(854, 1, 1, 1);
testXscref=testXsc.reshape(475, 628766, 1, 1);
testyref=testy.reshape(475, 1, 1, 1);

In [None]:
trainXscref.shape, trainyref.shape, validXscref.shape, validyref.shape, testXscref.shape, testyref.shape

In [None]:
trainXscref=np.expand_dims(trainXscref,axis=-1)
trainyref=np.expand_dims(trainyref,axis=-1)
validXscref=np.expand_dims(validXscref,axis=-1)
validyref=np.expand_dims(validyref,axis=-1)
testXscref=np.expand_dims(testXscref,axis=-1)
testyref=np.expand_dims(testyref,axis=-1)

In [None]:
# trainXscrefr=np.expand_dims(trainXscref,2)
# trainyrefr=np.expand_dims(trainyref,2)
# validXscrefr=np.expand_dims(validXscref,2)
# validyrefr=np.expand_dims(validyref,2)
# testXscrefr=np.expand_dims(testXscref,2)
# testyrefr=np.expand_dims(testyref,2)

In [None]:
trainXscref.shape

In [None]:
trainyref.shape

#### Windowing

In [None]:
train_df=pd.DataFrame(trainXch4.reshape(3285,1856*35*35))
train_num_features = train_df.shape[1]
valid_df=pd.DataFrame(validXch4.reshape(730,1856*35*35))
valid_num_features = valid_df.shape[1]
test_df=pd.DataFrame(testXch4.reshape(730,1856*35*35))
test_num_features = test_df.shape[1]

In [None]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=valid_df, test_df=test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
        self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}
    #self.column_indices = {name: i for i, name in
    #                       enumerate(train_df.columns)}
    #self.column_indices = {name: i for i, name in
    #                       enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

In [None]:
#predict next day
w1 = WindowGenerator(input_width=3284, label_width=1, shift=1)
w1

In [None]:
w1.input_width

In [None]:
train_df.shape, trainXch4.shape

In [None]:
def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels

WindowGenerator.split_window = split_window

In [None]:
# Stack three slices, the length of the total window.
#example_window = tf.stack([np.array(trainXch4df[:w1.total_window_size])])

example_inputs, example_labels = w1.split_window(tf.reshape(train_df,[1856,3321,1225]))

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'Labels shape: {example_labels.shape}')


# # Stack three slices, the length of the total window.
# example_window = tf.stack([np.array(trainXch4df[-w1.total_window_size:]),
#                            np.array(trainXch4df[2-w1.total_window_size:]),
#                            np.array(trainXch4df[3-w1.total_window_size:])])

# example_inputs, example_labels = w1.split_window(example_window)

# print('All shapes are: (batch, time, features)')
# print(f'Window shape: {example_window.shape}')
# print(f'Inputs shape: {example_inputs.shape}')
# print(f'Labels shape: {example_labels.shape}')

In [None]:
w1.example = example_inputs, example_labels

In [None]:
def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=False,
      batch_size=1,)

  ds = ds.map(self.split_window)

  return ds

WindowGenerator.make_dataset = make_dataset

In [None]:
@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

@property
def example(self):
  """Get and cache an example batch of `inputs, labels` for plotting."""
  result = getattr(self, '_example', None)
  if result is None:
    # No example batch was found, so get one from the `.train` dataset
    result = next(iter(self.train))
    # And cache it for next time
    self._example = result
  return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

In [None]:
for example_inputs, example_labels in w1.train.take(1):
    #print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    #print(f'Labels shape (batch, time, features): {example_labels.shape}')
    print(f'Inputs shape (batch, time, features): {[3285,1856,35,35,1]}')
    print(f'Labels shape (batch, time, features): {[3285,1856,35,35,1]}')

In [None]:
single_step_window = WindowGenerator(
    input_width=3284, label_width=1, shift=1)
single_step_window

In [None]:
for example_inputs, example_labels in single_step_window.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')

In [None]:
class Baseline(tf.keras.Model):
  def __init__(self, label_index=None):
    super().__init__()
    self.label_index = label_index

  def call(self, inputs):
    if self.label_index is None:
      return inputs
    result = inputs[:, :, self.label_index]
    return result[:, :, tf.newaxis]

In [None]:
baseline = Baseline(label_index=column_indices)

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.val)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)

In [None]:
wide_window = WindowGenerator(
    input_width=2920, label_width=24, shift=1)

wide_window

In [None]:
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', baseline(wide_window.example[0]).shape)

### Hyperparameters

#### In Situ Modeling

In [None]:
hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = trainXscaltref.shape[2], step=32)
#units=hp.Int("units", min_value=32, max_value = trainXscch4ref.shape[2], step=32)
#units=hp.Int("units", min_value=32, max_value = trainXscco2ref.shape[2], step=32)
batch_size=hp.Int("batch_size", min_value = 32, max_value = 512, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4])

#conv1d_kernel_size=hp.Int("conv1d_kernel_size", min_value = 32, max_value = 1024, step=32)
conv1d_filters=hp.Int("conv1d_filters",min_value=hp['units'],max_value=2*hp['units'])
conv1d_kernel_size=hp.Int("conv1d_kernel_size", min_value = 1, max_value = 5, step=1)
conv1d_activation=hp.Choice("conv1d_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                  'softmax', 'swish'])

bilstm_units = hp.Int("bilstm_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm_activation=hp.Choice("bilstm_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                  'softmax', 'swish'])
bilstm_rec_activation=hp.Choice("bilstm_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                          'softmax', 'swish'])
bilstm_dropout=hp.Choice("bilstm_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm_rec_dropout=hp.Choice("bilstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm_bias=hp.Boolean("bilstm_use_bias")
bilstm_f_bias=hp.Boolean("bilstm_forgot_bias")

bilstm2_units = hp.Int("bilstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm2_activation=hp.Choice("bilstm2_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                    'softmax', 'swish'])
bilstm2_rec_activation=hp.Choice("bilstm2_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                            'softmax', 'swish'])
bilstm2_dropout=hp.Choice("bilstm2_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm2_rec_dropout=hp.Choice("bilstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm2_bias=hp.Boolean("bilstm2_use_bias")
bilstm2_f_bias=hp.Boolean("bilstm2_forgot_bias")

lstm_units = hp.Int("lstm_units", min_value = hp['units'], max_value = 2*hp['units'])
lstm_activation=hp.Choice("lstm_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                          'softmax', 'swish'])
lstm_rec_activation=hp.Choice("lstm_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                      'softmax', 'swish'])
lstm_dropout=hp.Choice("lstm_dropout", [0.1, 0.2, 0.3, 0.4])
lstm_rec_dropout=hp.Choice("lstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])

lstm2_units = hp.Int("lstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
lstm2_activation=hp.Choice("lstm2_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                'softmax', 'swish'])
lstm2_rec_activation=hp.Choice("lstm2_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                        'softmax', 'swish'])
lstm2_dropout=hp.Choice("lstm2_dropout", [0.1, 0.2, 0.3, 0.4])
lstm2_rec_dropout=hp.Choice("lstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])

bilstm3_units = hp.Int("bilstm3_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm3_activation=hp.Choice("bilstm3_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                    'softmax', 'swish'])
bilstm3_rec_activation=hp.Choice("bilstm3_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                            'softmax', 'swish'])
bilstm3_dropout=hp.Choice("bilstm3_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm3_rec_dropout=hp.Choice("bilstm3_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm3_bias=hp.Boolean("bilstm3_use_bias")
bilstm3_f_bias=hp.Boolean("bilstm3_forgot_bias")

bilstm4_units = hp.Int("bilstm4_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm4_activation=hp.Choice("bilstm4_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                    'softmax', 'swish'])
bilstm4_rec_activation=hp.Choice("bilstm4_rec_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                            'softmax', 'swish'])
bilstm4_dropout=hp.Choice("bilstm4_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm4_rec_dropout=hp.Choice("bilstm4_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm4_bias=hp.Boolean("bilstm4_use_bias")
bilstm4_f_bias=hp.Boolean("bilstm4_forgot_bias")

#conv1d2_kernel_size=hp.Int("conv1d2_kernel_size", min_value = 32, max_value = 1024, step=32)
conv1d2_filters=hp.Int("conv1d2_filters", min_value = hp['units'], max_value=2*hp['units'])
conv1d2_kernel_size=hp.Int("conv1d2_kernel_size", min_value = 1, max_value = 5, step=1)
conv1d2_activation=hp.Choice("conv1d2_activation", ['relu','sigmoid', 'hard_sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 
                                                    'softmax', 'swish'])

#### Archive

In [None]:
x = Conv1D(16, 3, activation='relu', padding='same')(input_vec)
x = MaxPooling1D(2, padding='same')(x)
encoded = Conv1D(8, 3, activation='relu', padding='same')(x)

x = Conv1D(8, 3, activation='relu', padding='same')(encoded)
x = UpSampling1D(2)(x)
decoded = Conv1D(1, 3, activation='sigmoid', padding='same')(x)

autoencoder = Model(input_vec, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
# conv1d_filters=hp.Int("conv1d_filters",min_value=hp['units'],max_value=2*hp['units'])
# conv1d_kernel_size=hp.Int("conv1d_kernel_size", min_value = 3, max_value = 6, step=1)
# conv1d_activation=hp.Choice("conv1d_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv1d2_filters=hp.Int("conv1d2_filters", min_value = hp['units'], max_value=2*hp['units'])
# conv1d2_kernel_size=hp.Int("conv1d2_kernel_size", min_value = 3, max_value = 6, step=1)
# conv1d2_activation=hp.Choice("conv1d2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d3_filters=hp.Int("conv2d3_filters",min_value=hp['units'],max_value=2*hp['units'])
# conv2d3_kernel_size=hp.Int("conv2d3_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d3_activation=hp.Choice("conv2d3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d4_filters=hp.Int("conv2d4_filters", min_value = 64, max_value=2*hp['units'])
# conv2d4_kernel_size=hp.Int("conv2d4_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d4_activation=hp.Choice("conv2d4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d5_filters=hp.Int("conv2d5_filters", min_value = 64, max_value=2*hp['units'])
# conv2d5_kernel_size=hp.Int("conv2d5_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d5_activation=hp.Choice("conv2d5_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# bilstm_units = hp.Int("bilstm_units", min_value = hp['units'], max_value = 2*hp['units'])
# bilstm_activation=hp.Choice("bilstm_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm_rec_activation=hp.Choice("bilstm_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm_dropout=hp.Choice("bilstm_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm_rec_dropout=hp.Choice("bilstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm_bias=hp.Boolean("bilstm_use_bias")
# bilstm_f_bias=hp.Boolean("bilstm_forgot_bias")

# bilstm2_units = hp.Int("bilstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
# bilstm2_activation=hp.Choice("bilstm2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm2_rec_activation=hp.Choice("bilstm2_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm2_dropout=hp.Choice("bilstm2_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm2_rec_dropout=hp.Choice("bilstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm2_bias=hp.Boolean("bilstm2_use_bias")
# bilstm2_f_bias=hp.Boolean("bilstm2_forgot_bias")

# bilstm3_units = hp.Int("bilstm3_units", min_value = hp['units'], max_value = 2*hp['units'])
# bilstm3_activation=hp.Choice("bilstm3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm3_rec_activation=hp.Choice("bilstm3_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm3_dropout=hp.Choice("bilstm3_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm3_rec_dropout=hp.Choice("bilstm3_rec_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm3_bias=hp.Boolean("bilstm3_use_bias")
# bilstm3_f_bias=hp.Boolean("bilstm3_forgot_bias")

# bilstm4_units = hp.Int("bilstm4_units", min_value = hp['units'], max_value = 2*hp['units'])
# bilstm4_activation=hp.Choice("bilstm4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm4_rec_activation=hp.Choice("bilstm4_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# bilstm4_dropout=hp.Choice("bilstm4_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm4_rec_dropout=hp.Choice("bilstm4_rec_dropout", [0.1, 0.2, 0.3, 0.4])
# bilstm4_bias=hp.Boolean("bilstm4_use_bias")
# bilstm4_f_bias=hp.Boolean("bilstm4_forgot_bias")

# lstm_units = hp.Int("lstm_units", min_value = hp['units'], max_value = 2*hp['units'])
# lstm_activation=hp.Choice("lstm_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# lstm_rec_activation=hp.Choice("lstm_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# lstm_dropout=hp.Choice("lstm_dropout", [0.1, 0.2, 0.3, 0.4])
# lstm_rec_dropout=hp.Choice("lstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])

# lstm2_units = hp.Int("lstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
# lstm2_activation=hp.Choice("lstm2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# lstm2_rec_activation=hp.Choice("lstm2_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
# lstm2_dropout=hp.Choice("lstm2_dropout", [0.1, 0.2, 0.3, 0.4])
# lstm2_rec_dropout=hp.Choice("lstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])

In [None]:
# inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))
hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = 256, step=32)#inputs.shape[2], step=32)
batch_size=hp.Int("batch_size", min_value = 1, max_value = 256, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 1e-5])
#inputs2 = keras.Input(shape=(alt_Xtrainsc.shape[1], alt_Xtrainsc.shape[2]))
#units2=hp.Int("units2", min_value=64, max_value = inputs2.shape[2], step=64)
#padding=hp.Choice("padding", ['valid','same','causal'])
#n_layers=hp.Int("n_layers", min_value = 1, max_value = 9, step=3)
#n_layers=hp.Int("n_layers", min_value = 5, max_value = 15) 
#batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)

conv3d_filters=hp.Int("conv3d_filters",min_value=hp['units'],max_value=2*hp['units'])
conv3d_kernel_size=hp.Int("conv3d_kernel_size", min_value = 5, max_value = 6, step=1)
conv3d_activation=hp.Choice("conv3d_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d2_filters=hp.Int("conv3d2_filters", min_value = hp['units'], max_value=2*hp['units'])
conv3d2_kernel_size=hp.Int("conv3d2_kernel_size", min_value = 3, max_value = 6, step=1)
conv3d2_activation=hp.Choice("conv3d2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d3_filters=hp.Int("conv3d3_filters", min_value = hp['units'], max_value=2*hp['units'])
conv3d3_kernel_size=hp.Int("conv3d3_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d3_activation=hp.Choice("conv3d3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d4_filters=hp.Int("conv3d4_filters",min_value=hp['units'],max_value=2*hp['units'])
conv3d4_kernel_size=hp.Int("conv3d4_kernel_size", min_value = 3, max_value = 6, step=1)
conv3d4_activation=hp.Choice("conv3d4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

In [None]:
# inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))
hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = inputs.shape[2], step=16)
batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 1e-5])
#inputs2 = keras.Input(shape=(alt_Xtrainsc.shape[1], alt_Xtrainsc.shape[2]))
#units2=hp.Int("units2", min_value=64, max_value = inputs2.shape[2], step=64)
#padding=hp.Choice("padding", ['valid','same','causal'])
#n_layers=hp.Int("n_layers", min_value = 1, max_value = 9, step=3)
#n_layers=hp.Int("n_layers", min_value = 5, max_value = 15) 
#batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)

conv1d_filters=hp.Int("conv1d_filters",min_value=hp['units'],max_value=2*hp['units'])
conv1d_kernel_size=hp.Int("conv1d_kernel_size", min_value = 3, max_value = 6, step=1)
conv1d_activation=hp.Choice("conv1d_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv1d2_filters=hp.Int("conv1d2_filters", min_value = hp['units'], max_value=2*hp['units'])
conv1d2_kernel_size=hp.Int("conv1d2_kernel_size", min_value = 3, max_value = 6, step=1)
conv1d2_activation=hp.Choice("conv1d2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d3_filters=hp.Int("conv2d3_filters",min_value=hp['units'],max_value=2*hp['units'])
# conv2d3_kernel_size=hp.Int("conv2d3_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d3_activation=hp.Choice("conv2d3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d4_filters=hp.Int("conv2d4_filters", min_value = 64, max_value=2*hp['units'])
# conv2d4_kernel_size=hp.Int("conv2d4_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d4_activation=hp.Choice("conv2d4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

# conv2d5_filters=hp.Int("conv2d5_filters", min_value = 64, max_value=2*hp['units'])
# conv2d5_kernel_size=hp.Int("conv2d5_kernel_size", min_value = 3, max_value = 6, step=1)
# conv2d5_activation=hp.Choice("conv2d5_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = inputs.shape[2], step=16)
batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 1e-5])

bilstm_units = hp.Int("bilstm_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm_activation=hp.Choice("bilstm_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm_rec_activation=hp.Choice("bilstm_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm_dropout=hp.Choice("bilstm_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm_rec_dropout=hp.Choice("bilstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm_bias=hp.Boolean("bilstm_use_bias")
bilstm_f_bias=hp.Boolean("bilstm_forgot_bias")

bilstm2_units = hp.Int("bilstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm2_activation=hp.Choice("bilstm2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm2_rec_activation=hp.Choice("bilstm2_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm2_dropout=hp.Choice("bilstm2_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm2_rec_dropout=hp.Choice("bilstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm2_bias=hp.Boolean("bilstm2_use_bias")
bilstm2_f_bias=hp.Boolean("bilstm2_forgot_bias")

bilstm3_units = hp.Int("bilstm3_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm3_activation=hp.Choice("bilstm3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm3_rec_activation=hp.Choice("bilstm3_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm3_dropout=hp.Choice("bilstm3_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm3_rec_dropout=hp.Choice("bilstm3_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm3_bias=hp.Boolean("bilstm3_use_bias")
bilstm3_f_bias=hp.Boolean("bilstm3_forgot_bias")

bilstm4_units = hp.Int("bilstm4_units", min_value = hp['units'], max_value = 2*hp['units'])
bilstm4_activation=hp.Choice("bilstm4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm4_rec_activation=hp.Choice("bilstm4_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
bilstm4_dropout=hp.Choice("bilstm4_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm4_rec_dropout=hp.Choice("bilstm4_rec_dropout", [0.1, 0.2, 0.3, 0.4])
bilstm4_bias=hp.Boolean("bilstm4_use_bias")
bilstm4_f_bias=hp.Boolean("bilstm4_forgot_bias")

lstm_units = hp.Int("lstm_units", min_value = hp['units'], max_value = 2*hp['units'])
lstm_activation=hp.Choice("lstm_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
lstm_rec_activation=hp.Choice("lstm_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
lstm_dropout=hp.Choice("lstm_dropout", [0.1, 0.2, 0.3, 0.4])
lstm_rec_dropout=hp.Choice("lstm_rec_dropout", [0.1, 0.2, 0.3, 0.4])

lstm2_units = hp.Int("lstm2_units", min_value = hp['units'], max_value = 2*hp['units'])
lstm2_activation=hp.Choice("lstm2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
lstm2_rec_activation=hp.Choice("lstm2_rec_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])
lstm2_dropout=hp.Choice("lstm2_dropout", [0.1, 0.2, 0.3, 0.4])
lstm2_rec_dropout=hp.Choice("lstm2_rec_dropout", [0.1, 0.2, 0.3, 0.4])

In [None]:
# # We will construct 3 `ConvLSTM2D` layers with batch normalization,
# # followed by a `Conv3D` layer for the spatiotemporal outputs.
# x = layers.ConvLSTM2D(
#     filters=conv2d_filters,
#     kernel_size=(conv2d_kernel_size,conv2d_kernel_size),
#     padding="same",
#     return_sequences=True,
#     activation=conv2d_activation,
# )(inp)
# x = layers.BatchNormalization()(x)
# x = layers.ConvLSTM2D(
#     filters=conv2d2_filters,
#     kernel_size=(conv2d2_kernel_size, conv2d2_kernel_size),
#     padding="same",
#     return_sequences=True,
#     activation=conv2d2_activation,
# )(x)
# x = layers.BatchNormalization()(x)
# x = layers.ConvLSTM2D(
#     filters=conv2d3_filters,
#     kernel_size=(conv2d3_kernel_size, conv2d3_kernel_size),
#     padding="same",
#     return_sequences=True,
#     activation=conv2d3_activation,
# )(x)
# x = layers.Conv3D(
#     filters=conv3d_filters, kernel_size=(conv3d_kernel_size, conv3d_kernel_size, conv3d_kernel_size), activation=conv3d_activation, padding="same"
# )(x)

# # model.compile(loss='categorical_crossentropy',
# #               optimizer=keras.optimizers.Adam(learning_rate=0.0001),
# #               metrics=['accuracy'])
# # model.summary()

In [None]:
# # Next, we will build the complete model and compile it.
# model = keras.models.Model(inp, x)
# model.compile(
#     loss=keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.legacy.Adam(),
# )

# # Define some callbacks to improve training.
# early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10)
# reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5)

# # Define modifiable training hyperparameters.
# epochs = 20
# batch_size = 5

# # Fit the model to the training data.
# model.fit(
#     trainXch4,
#     trainych4,
#     batch_size=batch_size,
#     epochs=epochs,
#     validation_data=(x_val, y_val),
#     callbacks=[early_stopping, reduce_lr],
# )


In [None]:
# # Normalize the inputs
# inputs=tf.keras.Input(shape=((1, 279)))
# normalization_layer = tf.keras.layers.experimental.preprocessing.Normalization()
# normalization_layer.adapt(trainXscaltref)

# b=tf.keras.layers.Input([5])

# z=tf.keras.layers.BatchNormalization(trainXscaltref.flatten().tolist())

# tf.keras.Model(inputs=input, outputs=z)

#### Model

In [None]:
trainXscaltref.shape
#trainXscch4ref.shape
#trainXscco4ref.shape

In [None]:
class geocryoai(HyperModel):
    def build(self, hp):
        inputs=tf.keras.Input(shape=((1, 279)))
        model = Sequential([
            #tf.keras.layers.Conv1D(filters=256,kernel_size=(1),activation = hp['conv1d_activation'],padding='same',input_shape=(inputs.shape[1], inputs.shape[2])),
            tf.keras.layers.Conv1D(filters=hp['conv1d_filters'], kernel_size=(hp['conv1d_kernel_size']),activation = hp['conv1d_activation'], 
                                   padding='same',input_shape=(inputs.shape[1], inputs.shape[2])),
            #tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]), return_sequences = True, units = 128, activation = hp['bilstm_activation'], 
            tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]), return_sequences = True, units = hp['bilstm_units'], 
                                               activation = hp['bilstm_activation'], recurrent_activation = hp['bilstm_rec_activation'], 
                                               dropout=hp['bilstm_dropout'], use_bias = hp['bilstm_use_bias'], 
                                               unit_forget_bias = hp['bilstm_forgot_bias'], recurrent_dropout = hp['bilstm_rec_dropout'])),
            #tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 32,activation = hp['bilstm2_activation'], 
            tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True,  units = hp['bilstm2_units'], 
                                               activation = hp['bilstm2_activation'],recurrent_activation = hp['bilstm2_rec_activation'], 
                                               dropout=hp['bilstm2_dropout'], use_bias = hp['bilstm2_use_bias'], 
                                               unit_forget_bias = hp['bilstm2_forgot_bias'], recurrent_dropout = hp['bilstm2_rec_dropout'])),
            #tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = False, units = 8,activation = hp['lstm_activation'], 
            tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = False, units=hp['lstm_units'], 
                                 activation = hp['lstm_activation'],recurrent_activation=hp['lstm_rec_activation'],dropout=hp['lstm_dropout'], 
                                 recurrent_dropout = hp['lstm_rec_dropout']),
            tf.keras.layers.RepeatVector(trainXscaltref.shape[1]),
            #model.add(RepeatVector(trainXscaltref.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
            #tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 8,activation = hp['lstm2_activation'], 
            tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units=hp['lstm2_units'], 
                                 activation = hp['lstm2_activation'],recurrent_activation= hp['lstm2_rec_activation'], dropout=hp['lstm2_dropout'], 
                                 recurrent_dropout = hp['lstm2_rec_dropout']),
            #tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 32,activation = hp['bilstm3_activation'], 
            tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = hp['bilstm3_units'], 
                                               activation = hp['bilstm3_activation'], recurrent_activation = hp['bilstm3_rec_activation'], 
                                               dropout=hp['bilstm3_dropout'], use_bias = hp['bilstm3_use_bias'], 
                                               unit_forget_bias = hp['bilstm3_forgot_bias'], recurrent_dropout = hp['bilstm3_rec_dropout'])),
            #tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 128,activation = hp['bilstm4_activation'], 
            tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = hp['bilstm4_units'], 
                                               activation = hp['bilstm4_activation'], recurrent_activation = hp['bilstm4_rec_activation'], 
                                               dropout=hp['bilstm4_dropout'], use_bias = hp['bilstm4_use_bias'], 
                                               unit_forget_bias = hp['bilstm4_forgot_bias'], recurrent_dropout = hp['bilstm4_rec_dropout'])),
            #tf.keras.layers.Conv1DTranspose(filters=256, kernel_size=(1), activation = hp['conv1d2_activation'], padding='same', input_shape=(inputs.shape[1], inputs.shape[2])),
            tf.keras.layers.Conv1DTranspose(filters=hp['conv1d2_filters'], kernel_size=hp['conv1d2_kernel_size'], activation = hp['conv1d2_activation'], 
                                            padding='same', input_shape=(inputs.shape[1], inputs.shape[2])),
            tf.keras.layers.TimeDistributed(Dense(trainyscaltref.shape[1])),
            #tf.keras.layers.TimeDistributed(Dense(trainyscch4ref.shape[1])),
            #tf.keras.layers.TimeDistributed(Dense(trainyscco2ref.shape[1])),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(trainXscaltref.shape[1])
            #tf.keras.layers.Dense(trainXscch4ref.shape[1])
            #tf.keras.layers.Dense(trainXscco2ref.shape[1])
        ])
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        #loss_function = 'mean_squared_error'
        #model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = hp.get('learning_rate'), **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        #model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001), loss = 'mse', metrics = ['mae','mse'], )
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=hp.get('learning_rate')), loss = 'mse', metrics = ['mae','mse'], )
        return model

    def fit(self, hp, model, *args, **kwargs):
        #return model.fit(*args, batch_size = 512, **kwargs) #hp['batch_size'], **kwargs)
        return model.fit(*args, batch_size = hp.get('batch_size'), **kwargs) #hp['batch_size'], **kwargs)

In [None]:
#model=geocryoai.build(trainXscaltref,hp)
geocryoai.build(trainXscaltref,hp).summary()
#geocryoai.build(trainXscch4ref,hp).summary()
#geocryoai.build(trainXscco2ref,hp).summary()

In [None]:
bayesian_tuner = BayesianOptimization(
                    hypermodel = geocryoai(),
                    objective = "val_loss",
                    max_trials = 100,
                    num_initial_points = 64, #defaults to 3xdimensionality of hyperparameterization space used
                    alpha = 0.01, #0.01, #0.0001 #default; represents the expected amount of noise in the observed performances in Bayesian optimization.
                    beta = 10, #10, #2.6, #default;  the balancing factor of exploration and exploitation. The larger it is, the more explorative it is
                    hyperparameters = hp,
                    **{"tuner_id" : "BayesianOptimization_093023_ALT",
                    "project_name" : "Bayesian_optimization_093023_ALT"}
                    )

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience = 2, verbose=1,factor=0.75, min_lr=0.0001)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 2, min_delta = 1e-4, restore_best_weights = True)

In [None]:
trainXscaltref.shape, trainyscaltref.shape
#trainXscch4ref.shape, trainyscch4ref.shape
#trainXscco2ref.shape, trainyscco2ref.shape

In [None]:
#ALT
bayesian_tuner.search(trainXscaltref, trainyscaltref, #trainXscch4ref, trainyscch4ref, #trainXscco2ref, trainyscco2ref, 
                     #steps_per_epoch = None, 
                     shuffle = False, 
                     validation_data = (validXscaltref, validyscaltref), #(validXscch4ref, validyscco2ref), (validXscch4ref, validyscco2ref),
                     #validation_split = 0.15,#0.2,
                     verbose = 1, #2, #epoch, #1, #progress bar #0, #nothing
                     callbacks=[learning_rate_reduction],
                     #callbacks = [early_stopping, History(), TerminateOnNaN(),keras.callbacks.TensorBoard("/tmp/tb_logs")], 
                     use_multiprocessing = True)

In [None]:
bayesian_tuner.results_summary()

In [None]:
#HP ALT
# BEST MODEL: TRIAL24
# Trial 27 Complete [01h 42m 02s]
# val_loss: 0.10911799222230911

# Best val_loss So Far: 0.10056757181882858
# Total elapsed time: 16h 43m 38s

# Search: Running Trial #28

# Value             |Best Value So Far |Hyperparameter
# 0.0001            |0.0001            |learning_rate
# 64                |64                |batch_size
# 64                |64                |units
# 91                |96                |conv1d_filters
# 3                 |9                 |conv1d_kernel_size
# relu              |swish             |conv1d_activation
# 107               |97                |bilstm_units
# swish             |relu              |bilstm_activation
# relu              |relu              |bilstm_rec_activation
# 0.1               |0.4               |bilstm_dropout
# 0.1               |0.1               |bilstm_rec_dropout
# False             |False             |bilstm_use_bias
# True              |True              |bilstm_forgot_bias
# 88                |64                |lstm_units
# gelu              |relu              |lstm_activation
# softmax           |relu              |lstm_rec_activation
# 0.1               |0.1               |lstm_dropout
# 0.1               |0.1               |lstm_rec_dropout
# 128               |128               |lstm2_units
# relu              |relu              |lstm2_activation
# relu              |relu              |lstm2_rec_activation
# 0.1               |0.1               |lstm2_dropout
# 0.4               |0.4               |lstm2_rec_dropout
# 128               |128               |bilstm2_units
# relu              |relu              |bilstm2_activation
# swish             |sigmoid           |bilstm2_rec_activation
# 0.4               |0.4               |bilstm2_dropout
# 0.4               |0.4               |bilstm2_rec_dropout
# True              |True              |bilstm2_use_bias
# False             |False             |bilstm2_forgot_bias
# 91                |88                |conv1d2_filters
# 9                 |9                 |conv1d2_kernel_size
# swish             |swish             |conv1d2_activation

In [None]:
# Trial 80 Complete [00h 01m 45s]
# val_loss: 1.0023637507876304

# Best val_loss So Far: 0.19112859968453239
# Total elapsed time: 02h 04m 01s

# Search: Running Trial #81

# Value             |Best Value So Far |Hyperparameter
# 128               |32                |units
# 1                 |33                |batch_size
# 1e-05             |0.01              |learning_rate
# same              |causal            |padding
# 38                |48                |conv3d_filters
# 5                 |6                 |conv3d_kernel_size
# gelu              |gelu              |conv3d_activation
# 50                |44                |conv3d2_filters
# 3                 |4                 |conv3d2_kernel_size
# swish             |gelu              |conv3d2_activation
# 57                |36                |conv3d3_filters
# 4                 |4                 |conv3d3_kernel_size
# relu              |linear            |conv3d3_activation
# 42                |40                |conv3d4_filters
# 4                 |4                 |conv3d4_kernel_size
# tanh              |elu               |conv3d4_activation

In [None]:
# Hyperparameters:
# units: 32
# batch_size: 97
# learning_rate: 0.001
# conv3d_filters: 62
# conv3d_kernel_size: 5
# conv3d_activation: exponential
# conv3d2_filters: 33
# conv3d2_kernel_size: 5
# conv3d2_activation: relu
# conv3d3_filters: 33
# conv3d3_kernel_size: 6
# conv3d3_activation: linear
# conv3d4_filters: 61
# conv3d4_kernel_size: 3
# conv3d4_activation: sigmoid
# Score: 0.9776239395141602

In [None]:
####ALT
inputs=tf.keras.Input(shape=((1, 279)))
model = Sequential([
    tf.keras.layers.Conv1D(filters=256,kernel_size=(1),activation = 'elu',padding='same', input_shape=(inputs.shape[1], inputs.shape[2])),
    tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]), return_sequences = True, units = 128, activation = 'swish', 
                                       dropout=0.3)),
    tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 32, activation = 'tanh', 
                                       dropout=0.3)),                                               
    tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = False, units = 8, activation = 'softmax', dropout=0.3),
    tf.keras.layers.RepeatVector(trainXscaltref.shape[1]),
    tf.keras.layers.LSTM(input_shape=(inputs.shape[1], inputs.shape[2]), return_sequences = True, units = 8, activation = 'selu', dropout=0.2),
    tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 32, activation = 'selu', 
                                       dropout=0.2)),
    tf.keras.layers.Bidirectional(LSTM(input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences = True, units = 128, activation = 'linear', 
                                       dropout=0.1)),
    tf.keras.layers.Conv1DTranspose(filters=256, kernel_size=(1), activation = 'swish', padding='same', input_shape=(inputs.shape[1], inputs.shape[2])),
    tf.keras.layers.Dense(inputs.shape[1]),#trainXscaltref.shape[1]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(inputs.shape[1])#trainyscaltref.shape[1])
])
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.1), loss = 'mse', metrics = ['mae','mse'])
model.summary()

In [None]:
img_file = 'GeoCryoAI_Arch_093023_insituALT.png'
#img_file = 'GeoCryoAI_Arch_093023_insituCH4.png'
#img_file = 'GeoCryoAI_Arch_093023_insituCO2.png'
tf.keras.utils.plot_model(model, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000);

In [None]:
# model serialization
model_json = model.to_json()
with open("model_093023_insituALT.json", "w") as json_file:
#with open("model_093023_insituCH4.json", "w") as json_file:
#with open("model_093023_insituCO2.json", "w") as json_file:
    json_file.write(model_json)
# weight serialization
model.save_weights("model_093023_insituALT.h5")
#model.save_weights("model_093023_insituCH4.h5")
#model.save_weights("model_093023_insituCO2.h5")
print("Saved model to disk")

In [None]:
import tensorboard
tensorboard.__version__
# Clear any logs from previous runs
#rm -rf ./logs/
# Load the TensorBoard notebook extension.
#%load_ext tensorboard
%reload_ext tensorboard

import os
root_logdir = os.path.join(os.curdir, 'logs')

def get_run_logdir():
    import time
    run_id = time.strftime('run_%Y_%m_%d-%H_%M_%S')
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()

log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
filepath="weights_geocryoai.best_091123_ch4.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')

checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
reduce_loss = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5)

In [None]:
start_time = time.time()
historyalt = model.fit(
    trainXscaltref, trainyscaltref,
    epochs=100,
    batch_size=256,
    validation_data=(validXscaltref, validyscaltref),
    shuffle=False, 
    callbacks=[learning_rate_reduction]
)
elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
plt.plot(historyalt.history['loss'])

In [None]:
start_time = time.time()
historych4 = model.fit(
    trainXscch4ref, trainyscch4ref,
    epochs=100,
    batch_size=256,
    validation_data=(validXscch4ref, validyscch4ref),
    shuffle=False, 
    callbacks=[learning_rate_reduction]
)
elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
plt.plot(historych4.history['loss'])

In [None]:
start_time = time.time()
historyco2 = model.fit(
    trainXscco2ref, trainyscco2ref,
    epochs=100,
    batch_size=256,
    validation_data=(validXscco2ref, validyscco2ref),
    shuffle=False, 
    callbacks=[learning_rate_reduction]
)
elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
plt.plot(historyco2.history['loss'])

In [None]:
with open('/Users/bgay/Downloads/trainHistoryALT-091723', 'wb') as file_pi:
#with open('trainHistoryCH4-071523', 'wb') as file_pi:
#with open('trainHistoryCO2-071523', 'wb') as file_pi:
#with open('trainHistoryCH4-071523_model3b', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)
    #pickle.dump(history3.history, file_pi)
    #pickle.dump(history4.history, file_pi)
    #pickle.dump(history5.history, file_pi)

In [None]:
# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(history.history)

# save to json:  
hist_json_file = '/Users/bgay/Downloads/historyalt_091723.json' 
with open(hist_json_file, mode='w') as f:
    hist_df.to_json(f)

# or save to csv: 
hist_csv_file = '/Users/bgay/Downloads/historyalt_091723.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:
fig,ax = plt.subplots(figsize=(6,4), dpi=100)
l1=ax.plot(history2.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='CH4 Flux Training Loss (mgCH4m-2d-1)')
l2=ax.plot(history2.history['val_loss'], color='magenta', linestyle='solid', label='CH4 Flux Validation Loss (mgCH4m-2d-1)')
#ax2=ax.twinx();
#ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

lines = l1 + l2#+ ln2 + ln3 #+ ln4 #+ ln5# ln4 + ln5 + ln6# + ln7 + ln8
labs = [line.get_label() for line in lines];
ax.legend(lines, labs, loc='best', fontsize=8)#'lower left', fontsize=8)

ax.grid(linewidth=0.3);
ax.set_xlabel('Epochs', labelpad=8, fontsize=8);
ax.set_ylabel('Training Loss', labelpad=8, fontsize=8)
#ax.set(xticklabels=[])  # remove the tick labels
#ax.tick_params(left=False)  # remove the ticks
plt.title('GeoCryoAI TCFM-Arctic Module | ConvLSTM2D Autoencoder Loss Function \n CH4 Flux Simulations (2003-2015)', pad=10, fontsize=10)

#plt.xlabel('Year')
#plt.axis([0, 6, 0, 60])
#plt.legend(loc='best')
plt.show()
#plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_CO2_loss.png',dpi=1000)

In [None]:
# class geocryoai(HyperModel):
#     def build(self, hp):
#         #backend.clear_session()
#         #inputs = keras.Input(shape=(X_train_reframed.shape[1], X_train_reframed.shape[2]))
#         model = tf.keras.Sequential()
#         #for i in range(n_layers):
#         model.add(Conv1D(
#             filters=hp['conv1d_filters'], 
#             kernel_size=hp['conv1d_kernel_size'], 
#             activation = hp['conv1d_activation'],
#             padding='same', 
#             input_shape=(inputs.shape[1], inputs.shape[2])))
#         # model.add(MaxPool1D(pool_size=1))
#         #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
#         model.add(Bidirectional(LSTM(
#             input_shape=(inputs.shape[1], inputs.shape[2]),
#             return_sequences = True, 
#             #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
#             #name = f"1BiLSTM_layer_{i+1}",
#             units = hp['bilstm_units'],
#             activation = hp['bilstm_activation'],
#             #recurrent_activation = hp['bilstm_rec_activation'],
#             use_bias = hp['bilstm_use_bias'],
#             unit_forget_bias = hp['bilstm_forgot_bias'],
#             dropout=hp['bilstm_dropout'],
#             recurrent_dropout = hp['bilstm_rec_dropout'])))
#         model.add(Bidirectional(LSTM(
#             input_shape=(inputs.shape[1], inputs.shape[2]),
#             return_sequences = True, 
#             #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
#             #name = f"1BiLSTM_layer_{i+1}",
#             units = hp['bilstm2_units'],
#             activation = hp['bilstm2_activation'],
#             #recurrent_activation = hp['bilstm2_rec_activation'],
#             use_bias = hp['bilstm2_use_bias'],
#             unit_forget_bias = hp['bilstm2_forgot_bias'],
#             dropout=hp['bilstm2_dropout'],
#             recurrent_dropout = hp['bilstm2_rec_dropout'])))
#         #for i in range(hp['n_layers']):
#         model.add(LSTM(
#             units=hp['lstm_units'],
#             activation = hp['lstm_activation'],
#             #recurrent_activation=hp['lstm_rec_activation'],
#             return_sequences=False, 
#             dropout=hp['lstm_dropout'],
#             recurrent_dropout=hp['lstm_rec_dropout'],
#             input_shape=(inputs.shape[1], inputs.shape[2])))
#         #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
#         model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
#         model.add(LSTM(
#             units=hp['lstm2_units'], 
#             activation = hp['lstm2_activation'],
#             #recurrent_activation= hp['lstm2_rec_activation'],
#             return_sequences=True,
#             dropout=hp['lstm2_dropout'],
#             recurrent_dropout=hp['lstm2_rec_dropout'],
#             input_shape=(inputs.shape[1], inputs.shape[2])))
#         #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
#         model.add(Bidirectional(LSTM(
#             input_shape=(inputs.shape[1], inputs.shape[2]),
#             return_sequences = True, 
#             #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
#             #name = f"2BiLSTM_layer_{i+1}",
#             units = hp['bilstm3_units'],
#             activation = hp['bilstm3_activation'],
#             #recurrent_activation = hp['bilstm3_rec_activation'],
#             use_bias = hp['bilstm3_use_bias'],
#             unit_forget_bias = hp['bilstm3_forgot_bias'],
#             dropout=hp['bilstm3_dropout'],
#             recurrent_dropout = hp['bilstm3_rec_dropout'])))
#         model.add(Bidirectional(LSTM(
#             input_shape=(inputs.shape[1], inputs.shape[2]),
#             return_sequences = hp['bilstm4_units'], 
#             #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
#             #name = f"2BiLSTM_layer_{i+1}",
#             units = 32,
#             activation = hp['bilstm4_activation'],
#             #recurrent_activation = hp['bilstm4_rec_activation'],
#             use_bias = hp['bilstm4_use_bias'],
#             unit_forget_bias = hp['bilstm4_forgot_bias'],
#             dropout=hp['bilstm4_dropout'],
#             recurrent_dropout = hp['bilstm4_rec_dropout'])))
#         model.add(Conv1DTranspose(
#            filters=hp['conv1d2_filters'], 
#            kernel_size=hp['conv1d2_kernel_size'], 
#            activation = hp['conv1d2_activation'],
#            padding='same', 
#            input_shape=(inputs.shape[1], inputs.shape[2])))
#         model.add(TimeDistributed(Dense(trainyscalt.shape[1])))
#         #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
#         #model.add(TimeDistributed(Dense(trainyscco2ref.shape[1])))
#         model.add(Dense(trainXscaltref.shape[1]))
#         #model.add(Dense(trainXscch4ref.shape[1]))
#         #model.add(Dense(trainXscco2ref.shape[1]))
#         metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscalt.shape[1],))]
#         #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
#         #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
#         loss_function = 'mean_squared_error'
#         model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = hp.get('learning_rate'), **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
#         return model
    
#     def fit(self, hp, model, *args, **kwargs):
#         return model.fit(*args, batch_size = hp.get('batch_size'), **kwargs) #hp['batch_size'], **kwargs)

In [None]:
inputs=tf.keras.Input(shape=((1, 279)))

In [None]:
outputs=tf.keras.layers.Dense(1, activation='softmax')(inputs)

In [None]:
#ALT
class geocryoai(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        inputs = keras.Input(shape=(trainXscaltref.shape[1], trainXscaltref.shape[2]))
        #inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))        
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=96, 
            kernel_size=9, 
            activation = 'swish',
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = 97,
            activation = 'relu',
            #recurrent_activation = 'relu',
            use_bias = False,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(LSTM(
            units=64,
            activation = 'relu',
            #recurrent_activation = 'relu',
            return_sequences=False, 
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=128, 
            activation = 'relu',
            #recurrent_activation='relu',
            return_sequences=True,
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 128,
            activation = 'relu',
            #recurrent_activation = 'sigmoid',
            use_bias = True,
            unit_forget_bias = False,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(Conv1DTranspose(
           filters=88, 
           kernel_size=9, 
           activation = 'swish',
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        model.add(TimeDistributed(Dense(trainyscalt.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXscch4ref.shape[1]))
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscalt.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 64, **kwargs)

In [None]:
# add early stopping criteria t/Volumes/op training if validation score does not improve - cuts down on computational load/speed.
filepath="weights_geocryoai.best_alt_091723.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
# fit network
start_time = time.time()
history = model.fit(trainXscaltref,
                    trainyscalt,  
                    epochs=10, 
                    batch_size=64,#128,#512, 
                    validation_data=(validXscaltref,validyscalt),
                    steps_per_epoch = None,
                    shuffle=False, 
                    callbacks=[early_stopping, TerminateOnNaN(),  keras.callbacks.TensorBoard("/tmp/tb_logs")],
                    use_multiprocessing = True)

elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
score=model.evaluate(testXscaltref, testyscalt, verbose = 1)

In [None]:
#score_2b = model2.evaluate(testXscaltref, testyscaltref, verbose = 1) 
testXscref.shape

In [None]:
testXscrefr=np.expand_dims(testXscref,2)
testyscrefr=np.expand_dims(testyscref,2)

In [None]:
score = model.evaluate(testXscrefr, testyscrefr, verbose = 1) 
#score_3 = model3.evaluate(testXscch4ref, testyscch4ref, verbose = 1)
#score_4 = model4.evaluate(testXscco2ref, testyscco2ref, verbose = 1)
#score_3b = model3b.evaluate(testXscch4ref, testyscch4ref, verbose = 1)

In [None]:
print('Test MAE:', score[1])
print('Test MSE:', score[2])
print('Test RMSE:', np.sqrt(score[2]))

In [None]:
print(testXscref.shape, testyscref.shape)
print(testXscrefr.shape, testyscrefr.shape)

In [None]:
yhat=model2.predict(trainXscrefr[-32:].reshape(32,32,1,1,1))

In [None]:
yhatinv = yscaler.inverse_transform(yhat.reshape(32,32))

In [None]:
yhatinv=yhatinv.reshape(32*32,1)

In [None]:
plt.plot(yhatinv)

In [None]:
X_new_scaled = X_train[0]
print (X_new_scaled)
y_new = clf_sgd.predict(X_new_scaled)
print (y_new)

In [None]:
# make a forecast
def forecast(model, history, n_input):
 # flatten data
 data = array(history)
 data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
 # retrieve last observations for input data
 input_x = data[-n_input:, 0]
 # reshape into [1, n_input, 1]
 input_x = input_x.reshape((1, len(input_x), 1))
 # forecast the next week
 yhat = model.predict(input_x, verbose=0)
 # we only want the vector forecast
 yhat = yhat[0]
 return yhat

In [None]:
plt.plot(history.history['loss'])
#plt.plot(history3.history['loss'])
#plt.plot(history4.history['loss'])
#plt.plot(history5.history['loss'])

In [None]:
testyscrefrpred = model.predict(testXscrefr)

In [None]:
testyscrefrpred.shape

In [None]:
testypred=yscaler.inverse_transform(testyscrefrpred.reshape(365,1))

In [None]:
testyscref2=testyscrefr.reshape(len(testyscrefr),1)
testyref2=yscaler.inverse_transform(testyscref2)

In [None]:
plt.plot()
plt.plot(testyref2[:,0])
plt.plot(testypred[:,0])

In [None]:
rmse = np.sqrt(mean_squared_error(testyref2, testypred))
print('Test RMSE: %.3f' % rmse)

#### In Situ, RS, and Model Output Modeling

In [None]:
# inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))
hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = 256, step=16)#inputs.shape[2], step=32)
batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 1e-5])
#inputs2 = keras.Input(shape=(alt_Xtrainsc.shape[1], alt_Xtrainsc.shape[2]))
#units2=hp.Int("units2", min_value=64, max_value = inputs2.shape[2], step=64)
#padding=hp.Choice("padding", ['valid','same','causal'])
#n_layers=hp.Int("n_layers", min_value = 1, max_value = 9, step=3)
#n_layers=hp.Int("n_layers", min_value = 5, max_value = 15) 
#batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)

conv3d_filters=hp.Int("conv3d_filters",min_value=hp['units'],max_value=2*hp['units'])
conv3d_kernel_size=hp.Int("conv3d_kernel_size", min_value = 3, max_value = 6, step=1)
conv3d_activation=hp.Choice("conv3d_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d2_filters=hp.Int("conv3d2_filters", min_value = hp['units'], max_value=2*hp['units'])
conv3d2_kernel_size=hp.Int("conv3d2_kernel_size", min_value = 3, max_value = 6, step=1)
conv3d2_activation=hp.Choice("conv3d2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d3_filters=hp.Int("conv3d3_filters", min_value = hp['units'], max_value=2*hp['units'])
conv3d3_kernel_size=hp.Int("conv3d3_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d3_activation=hp.Choice("conv3d3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d4_filters=hp.Int("conv3d4_filters",min_value=hp['units'],max_value=2*hp['units'])
conv3d4_kernel_size=hp.Int("conv3d4_kernel_size", min_value = 3, max_value = 6, step=1)
conv3d4_activation=hp.Choice("conv3d4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

In [None]:
# inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))
#hp = HyperParameters()
#units=hp.Int("units", min_value=32, max_value = 256, step=16)#inputs.shape[2], step=32)
#batch_size=hp.Int("batch_size", min_value = 1, max_value = 256, step = 32)
#learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 1e-5])
#inputs2 = keras.Input(shape=(alt_Xtrainsc.shape[1], alt_Xtrainsc.shape[2]))
#units2=hp.Int("units2", min_value=64, max_value = inputs2.shape[2], step=64)
#padding=hp.Choice("padding", ['valid','same','causal'])
#n_layers=hp.Int("n_layers", min_value = 1, max_value = 9, step=3)
#n_layers=hp.Int("n_layers", min_value = 5, max_value = 15) 
#batch_size=hp.Int("batch_size", min_value = 32, max_value = 256, step = 32)

hp = HyperParameters()
units=hp.Int("units", min_value=32, max_value = 256, step=16)#inputs.shape[2], step=32)
batch_size=hp.Int("batch_size", min_value = 1, max_value = 256, step = 32)
learning_rate=hp.Choice("learning_rate", [1e-1, 1e-2, 1e-3, 1e-4])

conv3d_filters=hp.Int("conv3d_filters",min_value=32,max_value=256, step=16)
#conv3d_kernel_size=hp.Int("conv3d_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d_activation=hp.Choice("conv3d_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d2_filters=hp.Int("conv3d2_filters",min_value=32,max_value=256, step=16)
#conv3d2_kernel_size=hp.Int("conv3d2_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d2_activation=hp.Choice("conv3d2_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d3_filters=hp.Int("conv3d3_filters",min_value=32,max_value=256, step=16)
#conv3d3_kernel_size=hp.Int("conv3d3_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d3_activation=hp.Choice("conv3d3_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

conv3d4_filters=hp.Int("conv3d4_filters",min_value=32,max_value=256, step=16)
#conv3d4_kernel_size=hp.Int("conv3d4_kernel_size", min_value = 1, max_value = 6, step=1)
conv3d4_activation=hp.Choice("conv3d4_activation", ['relu','sigmoid', 'tanh', 'exponential', 'gelu', 'elu', 'linear', 'selu', 'softmax', 'swish'])

In [None]:
trainXscref.shape

#OLD, CHANGED DIMENSIONS TO ?, 3638, 1, 1, 1, 3 (BELOW) BECAUSE FOR VALIDATION, SHAPES MUST MATCH (ON THE LAST AXIS, THAT IS)
#inputs = keras.Input(shape=(trainXscrefr.shape[0], trainXscrefr.shape[1],trainXscrefr.shape[2], trainXscrefr.shape[3], trainXscrefr.shape[4]))
#inputs
#USE THE INPUTS BELOW

inputs = keras.Input(shape=(trainXscref.shape[0], trainXscref.shape[1],trainXscref.shape[2], trainXscref.shape[3], trainXscref.shape[4]))
#inputs = keras.Input(shape=(X.shape[0], X.shape[1],X.shape[2], X.shape[3], X.shape[4]))
inputs

In [None]:
trainyref.shape

In [None]:
inputs.shape

In [None]:
from keras.layers import Masking
class geocryoai(HyperModel):
    def build(self, hp):
        model = tf.keras.Sequential()
        #model.add(Masking(mask_value=-1, input_shape=(2, 1)))
        model.add(ConvLSTM2D(filters=hp['conv3d_filters'], 
                             kernel_size=(hp['conv3d_kernel_size'], hp['conv3d_kernel_size']),#, hp['conv3d_kernel_size']), 
                             strides=(1,1),
                             activation=hp['conv3d_activation'],#'relu', 
                             return_sequences=True,
                             padding='same',
                             data_format='channels_last',
                             input_shape=(inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
        #model.add(Reshape((3243,31,1,hp['conv3d_filters'])))
        #model.add(LeakyReLU())
        #model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(layers.TimeDistributed(layers.Dense(1)))
        #model.add(LeakyReLU())
        #model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((self.shape[0],self.shape[1],1,hp['conv3d_filters'])))
        model.add(ConvLSTM2D(filters=hp['conv3d2_filters'], 
                             kernel_size=(hp['conv3d2_kernel_size'], hp['conv3d2_kernel_size']),#, hp['conv3d2_kernel_size']), 
                             strides=(1,1),
                             activation=hp['conv3d2_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],hp['conv3d2_filters'])))
        #model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((hp['batch_size'],60,hp['conv3d_filters'])))
        #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
        #model.add(LeakyReLU())
        #model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],1,hp['conv3d2_filters'])))
        model.add(ConvLSTM2D(filters=hp['conv3d3_filters'], 
                             kernel_size=(hp['conv3d3_kernel_size'], hp['conv3d3_kernel_size']),#, hp['conv3d3_kernel_size']), 
                             strides=(1,1),
                             activation=hp['conv3d3_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],hp['conv3d3_filters'])))
        #model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((hp['batch_size'],60,hp['conv3d_filters'])))
        #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
        #model.add(LeakyReLU())
        #model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((hp['batch_size'],60,1,hp['conv3d_filters'])))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],1,hp['conv3d3_filters'])))
        model.add(ConvLSTM2D(filters=hp['conv3d4_filters'], 
                             kernel_size=(hp['conv3d4_kernel_size'], hp['conv3d4_kernel_size']),#, hp['conv3d4_kernel_size']), 
                             strides=(1,1),
                             activation=hp['conv3d4_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],hp['conv3d4_filters'])))
        #model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((hp['batch_size'],60,hp['conv3d_filters'])))
        #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,1,1))))
        #model.add(LeakyReLU())
        #model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(tf.keras.layers.TimeDistributed(Dense(1)))
        #model.add(Reshape((inputs.shape[1],inputs.shape[2],1,hp['conv3d4_filters'])))
        model.add(Conv3DTranspose(hp['conv3d4_filters'], (hp['conv3d4_kernel_size'],hp['conv3d4_kernel_size'],hp['conv3d4_kernel_size']), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(hp['conv3d3_filters'], (hp['conv3d3_kernel_size'],hp['conv3d3_kernel_size'],hp['conv3d3_kernel_size']), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(hp['conv3d2_filters'], (hp['conv3d2_kernel_size'],hp['conv3d2_kernel_size'],hp['conv3d2_kernel_size']), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(hp['conv3d_filters'], (hp['conv3d_kernel_size'],hp['conv3d_kernel_size'],hp['conv3d_kernel_size']), strides=(1,1,1), padding='same'))
        model.add(TimeDistributed(Dense(1)))
        #model.add(Reshape((3285, 928, 16, 4, 2), input_shape=(3285, 928, 128, 1)))
        #model.add(Conv3DTranspose(32, (4, 4, 4), strides=(2,2,2), padding='same'))
        #model.add(Flatten())
        #model.add(TimeDistributed(Dense(y_train[0])))
        #model.add(TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
        #model.add(Dense(trainXch4.shape[1]))
        #model.compile(loss='mean_squared_error', optimizer=keras.optimizers.legacy.Adam(hp['learning_rate']), metrics=['mse'])
        model.compile(loss='mean_squared_error',
                      optimizer=tf.keras.optimizers.legacy.Adam(hp['learning_rate']),
                      metrics=['mean_squared_error', 'mean_absolute_error'])#, 'mean_squared_logarithmic_error', 'cosine_similarity'])
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = hp['batch_size'], **kwargs)            

In [None]:
# from keras.layers import Masking
# class geocryoai(HyperModel):
#     def build(self, hp):
#         model = tf.keras.Sequential()
#         #model.add(Masking(mask_value=-1, input_shape=(2, 1)))
#         model.add(ConvLSTM3D(filters=hp['conv3d_filters'], 
#                              kernel_size=(hp['conv3d_kernel_size'], hp['conv3d_kernel_size'], hp['conv3d_kernel_size']), 
#                              strides=(1,1,1),
#                              activation=hp['conv3d_activation'],#'relu', 
#                              return_sequences=True,
#                              padding='same',
#                              data_format='channels_last',
#                              input_shape=(inputs.shape[1], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
#         #model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#         #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.2))
#         model.add(ConvLSTM3D(filters=hp['conv3d2_filters'], 
#                              kernel_size=(hp['conv3d2_kernel_size'], hp['conv3d2_kernel_size'], hp['conv3d2_kernel_size']), 
#                              strides=(1,1,1),
#                              activation=hp['conv3d2_activation'],#'relu', 
#                              return_sequences=True,
#                              padding='same'))
#                              #data_format='channels_last'))
#                              #input_shape=(inputs.shape[0], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
#         #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.2))
#         model.add(ConvLSTM3D(filters=hp['conv3d3_filters'], 
#                              kernel_size=(hp['conv3d3_kernel_size'], hp['conv3d3_kernel_size'], hp['conv3d3_kernel_size']), 
#                              strides=(1,1,1),
#                              activation=hp['conv3d3_activation'],#'relu', 
#                              return_sequences=True,
#                              padding='same'))
#                              #data_format='channels_last'))
#         #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.2))
#         model.add(ConvLSTM3D(filters=hp['conv3d4_filters'], 
#                              kernel_size=(hp['conv3d4_kernel_size'], hp['conv3d4_kernel_size'], hp['conv3d4_kernel_size']), 
#                              strides=(1,1,1),
#                              activation=hp['conv3d4_activation'],#'relu', 
#                              return_sequences=True,
#                              padding='same'))
#                              #data_format='channels_last'))
#         #model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#         model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,1,1))))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.2))
#         model.add(Reshape((3638, 1, 96, 1), input_shape=(3638, 1, 1, 3, 32)))
#         #model.add(Conv3DTranspose(16, (1,1,1), strides=(1,1,1), padding='same'))
#         #model.add(Conv3DTranspose(16, (1,1,1), strides=(1,1,2), padding='same'))
#         model.add(Conv3DTranspose(16, (1,1,1), strides=(1,2,2), padding='same'))
#         model.add(Conv3DTranspose(32, (1,1,1), strides=(1,2,2), padding='same'))
#         model.add(Conv3DTranspose(64, (1,1,1), strides=(1,2,2), padding='same'))
#         #model.add(Conv3DTranspose(64, (1,1,1), strides=(1,1,2), padding='same'))
#         #model.add(Conv3DTranspose(64, (1,1,1), strides=(1,2,1), padding='same'))
#         #model.add(Reshape((3285, 928, 16, 4, 2), input_shape=(3285, 928, 128, 1)))
#         #model.add(Conv3DTranspose(32, (4, 4, 4), strides=(2,2,2), padding='same'))
#         #model.add(Flatten())
#         #model.add(TimeDistributed(Dense(y_train[0])))
#         #model.add(TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
#         #model.add(Dense(trainXch4.shape[1]))
#         model.add(TimeDistributed(Dense(1)))
#         model.compile(loss='mean_squared_error',
#                       optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
#                       metrics=['mean_squared_error', 'mean_absolute_error', 'mean_squared_logarithmic_error', 'cosine_similarity'])
#         return model
    
#     #def fit(self, hp, model, *args, **kwargs):
#     #    return model.fit(*args, **kwargs)
            

In [None]:
model=geocryoai.build(inputs,hp)

In [None]:
model.summary()

In [None]:
model

In [None]:
import keras_tuner as kt
tuner = kt.BayesianOptimization(hypermodel=geocryoai(),
                                objective='val_loss',
                                max_trials=30,
                                #num_initial_points = 8, #defaults to 3xdimensionality of hyperparameterization space used
                                alpha = 0.0001, #0.01, #0.0001 #default; represents the expected amount of noise in the observed performances in Bayesian optimization.
                                beta = 2.6, #10, #2.6, #default;  the balancing factor of exploration and exploitation. The larger it is, the more explorative it is
                                hyperparameters = hp,
                                #**{"tuner_id" : "BayesianOptimization_071423_ALT",
                                #**{"tuner_id" : "BayesianOptimization_071223_CH4",
                                **{"tuner_id" : "BayesianOptimization_090223_tcfmCH4",
                                #**{"tuner_id" : "BayesianOptimization_071223_CO2",
                                #"overwrite" : False,
                                #"project_name" : "Bayesian_optimization_071423_ALT"}
                                #"project_name" : "bayesian_optimization_071223_CH4"}
                                "project_name" : "bayesian_optimization_090223_tcfmCH4"}
                                #"project_name" : "bayesian_optimization_071223_CO2"}
                                )

In [None]:
from keras.layers import Masking
model = tf.keras.Sequential()
model.add(Masking(mask_value=-1, input_shape=(2, 1)))
model.add(ConvLSTM3D(filters=30, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same',
                     data_format='channels_last',
                     input_shape=(inputs.shape[1], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=16, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
                     #input_shape=(inputs.shape[0], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=16, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=8, 
                     kernel_size=(1, 1, 128), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,1,1))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Reshape((45, 8291, 19, 8), input_shape=(8291, 6, 11, 2, 8)))
model.add(Conv3DTranspose(8, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(16, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(16, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(32, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(TimeDistributed(Dense(1)))
model.add(Flatten())
# model.add(TimeDistributed(Dense(y_train[0])))
# model.add(Dense(x_train.shape[1]))
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy'])
model.summary()


In [None]:
trainXscref.shape, trainXscrefr.shape, inputs.shape#, trainy.shape

In [None]:
from keras.layers import Masking
class geocryoai(HyperModel):
    def build(self,hp):
        #inputs = keras.Input(shape=(trainx.shape[1], trainx.shape[2],trainx.shape[3], trainx.shape[4], trainx.shape[0]))
        model = tf.keras.Sequential()
        model.add(layers.Input(shape=(inputs.shape[1], inputs.shape[2],inputs.shape[3], inputs.shape[4], inputs.shape[5])))
        #model.add(Masking(mask_value=-1, input_shape=(2, 1)))
        model.add(ConvLSTM3D(filters=64, 
                             kernel_size=(3, 3, 3), 
                             strides=(1,1,1),
                             activation=hp['conv3d_activation'],#'relu', 
                             return_sequences=True,
                             padding='same',
                             data_format='channels_last',
                             input_shape=(inputs.shape[1], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
        #model.add(MaxPooling3D(pool_size=(2, 2, 2)))
        model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,1,2))))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(ConvLSTM3D(filters=32, 
                             kernel_size=(3, 3, 3), 
                             strides=(1,1,1),
                             activation=hp['conv3d2_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
                             #data_format='channels_last'))
                             #input_shape=(inputs.shape[0], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
        model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,1,2))))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(ConvLSTM3D(filters=16, 
                             kernel_size=(3, 3, 3), 
                             strides=(1,1,1),
                             activation=hp['conv3d3_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
                             #data_format='channels_last'))
        model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,1,2))))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))
        model.add(ConvLSTM3D(filters=8, 
                             kernel_size=(1, 1, 1), 
                             strides=(1,1,1),
                             activation=hp['conv3d3_activation'],#'relu', 
                             return_sequences=True,
                             padding='same'))
                             #data_format='channels_last'))
        model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,1,1))))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))
        #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,2,2))))
        model.add(Reshape((-1, 276,3285, 32), input_shape=(-1, 276, 3285, 4, 8)))
        #model.add(Conv3DTranspose(16, (1,1,1), strides=(1,1,1), padding='same'))
        #model.add(Conv3DTranspose(16, (1,1,1), strides=(1,1,2), padding='same'))
        model.add(Conv3DTranspose(4, (1,1,1), strides=(1,1,2), padding='same'))
        model.add(Conv3DTranspose(8, (1,1,1), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(8, (1,1,1), strides=(1,2,2), padding='same'))
        model.add(Conv3DTranspose(16, (1,1,1), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(16, (1,1,1), strides=(1,2,2), padding='same'))
        model.add(Conv3DTranspose(32, (1,1,1), strides=(1,1,1), padding='same'))
        model.add(Conv3DTranspose(32, (1,1,1), strides=(1,2,2), padding='same'))
        model.add(Conv3DTranspose(32, (1,1,1), strides=(1,1,2), padding='same'))
        model.add(layers.TimeDistributed(layers.MaxPooling2D(pool_size=(1,1))))
        model.add(Reshape((1, 2208, 3285, 32, 32), input_shape=(-1, 2208, 105120, 32)))
        #model.add(Conv3D(8, (1,1,1), strides=(1,1,1), padding='same'))
        #model.add(Conv3DTranspose(64, (1,1,1), strides=(1,1,1), padding='same'))
        model.add(TimeDistributed(Dense(1)))
        #model.add(Flatten())
        #model.add(TimeDistributed(Dense(trainyscco2ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXch4.shape[0]))
        #model.add(Dense(trainXscco2ref.shape[1]))
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainy.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
        #loss_function = 'mean_squared_error'
        #model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        
        model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001), 
                      metrics=['mean_squared_error', 'mean_absolute_error'])
        model.summary()
        
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = hp['batch_size'], **kwargs)

In [None]:
105120/3285

In [None]:
13120/3280

In [None]:

        # model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
        #               metrics=['mean_squared_error', 'mean_absolute_error', 'mean_squared_logarithmic_error', 'cosine_similarity'])
    #     return model
    
    # def fit(self, hp, model, *args, **kwargs):
    #     return model.fit(*args, batch_size = 256, **kwargs)
        
#     #model.add(Conv3DTranspose(64, (1,1,1), strides=(1,1,2), padding='same'))
#     #model.add(Conv3DTranspose(64, (1,1,1), strides=(1,2,1), padding='same'))
#     #model.add(Reshape((3285, 928, 16, 4, 2), input_shape=(3285, 928, 128, 1)))
#     #model.add(Conv3DTranspose(32, (4, 4, 4), strides=(2,2,2), padding='same'))
#     #model.add(Flatten())
#     #model.add(TimeDistributed(Dense(y_train[0])))
#     #model.add(TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
#     #model.add(Dense(trainXch4.shape[1]))
#     model.add(TimeDistributed(Dense(1)))
#     model.compile(loss='mean_squared_error',
#                   optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
#                   metrics=['mean_squared_error', 'mean_absolute_error', 'mean_squared_logarithmic_error', 'cosine_similarity'])
#     return model

# #def fit(self, hp, model, *args, **kwargs):
# #    return model.fit(*args, **kwargs)

In [None]:
inputs.shape, inputs2.shape, targets.shape, targets2.shape

In [None]:
trainds.element_spec

In [None]:
trainx.shape, trainy.shape

In [None]:
ds = trainds.take(int(2208)).batch(3285)#.map(lambda x: x)

In [None]:
inputs.shape, trainx.shape

In [None]:
for batch_size in [32,64,128,256,512]:  
    print('\nbs ', batch_size)
    history = model.fit(trainx, trainy, epochs=3,
                        batch_size=batch_size,
                        verbose=1)

In [None]:
trainx.shape, inputs.shape

In [None]:
trainxnew=tf.reshape(trainx,[trainx.shape[0],trainx.shape[1],trainx.shape[2],trainx.shape[3],trainx.shape[4],1])

In [None]:
validxnew=tf.reshape(validx,[validx.shape[0],validx.shape[1],validx.shape[2],validx.shape[3],validx.shape[4],1])

In [None]:
testxnew=tf.reshape(testx,[1,testx.shape[0],testx.shape[1],testx.shape[2],testx.shape[3],testx.shape[4]])

In [None]:
trainynew=tf.reshape(trainy,[trainy.shape[0],trainy.shape[1],trainy.shape[2],trainy.shape[3],trainy.shape[4],1])

In [None]:
validynew=tf.reshape(validy,[validy.shape[0],validy.shape[1],validy.shape[2],validy.shape[3],validy.shape[4],1])

In [None]:
testynew=tf.reshape(testy,[1,testy.shape[4],testy.shape[1],testy.shape[2],testy.shape[3],testy.shape[0]])

In [None]:
inputs.shape,trainxnew.shape,targets.shape,trainynew.shape

In [None]:
trainds.

In [None]:
trainx.shape, trainy.shape, validx.shape, validy.shape

In [None]:
import tensorflow as tf
tf.keras.backend.clear_session()

In [None]:
#CH4
# add early stopping criteria t/Volumes/op training if validation score does not improve - cuts down on computational load/speed.
#filepath="weights_geocryoai2.best_071123.hdf5"
filepath="weights_geocryoai2.best_081423_ch4.hdf5"
#filepath="weights_geocryoai2.best_071223_co2.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
# fit network
start_time = time.time()
#history2 = model2.fit(trainXscaltref, #trainXscaltref #trainXscch4ref
history = model.fit(trainx, #trainXscaltref #trainXscco2ref
                    trainy, #trainyscaltref #trainXscco2ref
                    epochs=10, 
                    batch_size=1,#512, 
                    validation_data=(validx,validy), #(validXscaltref,validyscaltref) #(validXscco2ref,validyscco2ref)
                    steps_per_epoch = None,
                    shuffle=False, 
                    callbacks=[early_stopping, TerminateOnNaN(),  keras.callbacks.TensorBoard("/tmp/tb_logs")],
                    use_multiprocessing = True)

elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
# train_data = tf.data.Dataset.from_tensor_slices((trainXch4, trainych4))
# valid_data = tf.data.Dataset.from_tensor_slices((validXch4, validych4))

In [None]:
#model.fit(train_data, epochs=10, validation_data=valid_data)

### Reference Old

In [None]:
trainyscaltpred = alt_model.predict(trainXscaltref)
validyscaltpred = alt_model.predict(validXscaltref)
testyscaltpred = alt_model.predict(testXscaltref)
# make a prediction
yhat = model2.predict(testXscalt)
test_X = testXscalt.reshape((test_X.shape[0], test_X.shape[2]))
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)


fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(arr2.reshape(2441237,1), color='dodgerblue', linestyle='solid', label='Thaw Depth Observations, ALT (cm)');
#ax2=ax.twinx();
lns2=ax.plot(yscaleralt.inverse_transform(arr.reshape(2441237,1)), color='tomato', alpha=0.5, linestyle='solid', label='Thaw Depth Predictions, ALT (cm)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
#ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Active Layer Thickness (cm)', labelpad=12, fontsize=10);
#ax.tick_params(axis='y', labelcolor='springgreen')
#ax2.set_ylabel('Validation MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10)
#ax2.tick_params(axis='y', labelcolor='yellowgreen')
ax.tick_params(left=False)  # remove the ticks
#ax2.tick_params(right=False, labelright=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, ALT Observations v. Predictions | Alaska [1969-2022] \n Number of ALT Samples/Replicates: 2.441M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.axis([0, 2441237, 0, 300])
#plt.axis([2226100, 2441237, 0, 200])
plt.tight_layout()
plt.savefig('ALT_ObsVPred_1969-2022_071323.svg', dpi=1000)
plt.savefig('ALT_ObsVPred_1969-2022_071323.png', dpi=1000)

In [None]:
# Next, we will build the complete model and compile it.
model = keras.models.Model(inp, x)
model.compile(
    loss=keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.legacy.Adam(),
)

# Define some callbacks to improve training.
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5)

# Define modifiable training hyperparameters.
epochs = 20
batch_size = 5

# Fit the model to the training data.
model.fit(
    trainXch4,
    trainych4,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping, reduce_lr],
)


In [None]:
class geocryoai(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        #inputs = keras.Input(shape=(X_train_reframed.shape[1], X_train_reframed.shape[2]))
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=hp['conv1d_filters'], 
            kernel_size=hp['conv1d_kernel_size'], 
            activation = hp['conv1d_activation'],
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = hp['bilstm_units'],
            activation = hp['bilstm_activation'],
            #recurrent_activation = hp['bilstm_rec_activation'],
            use_bias = hp['bilstm_use_bias'],
            unit_forget_bias = hp['bilstm_forgot_bias'],
            dropout=hp['bilstm_dropout'],
            recurrent_dropout = hp['bilstm_rec_dropout'])))
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = hp['bilstm2_units'],
            activation = hp['bilstm2_activation'],
            #recurrent_activation = hp['bilstm2_rec_activation'],
            use_bias = hp['bilstm2_use_bias'],
            unit_forget_bias = hp['bilstm2_forgot_bias'],
            dropout=hp['bilstm2_dropout'],
            recurrent_dropout = hp['bilstm2_rec_dropout'])))
        #for i in range(hp['n_layers']):
        model.add(LSTM(
            units=hp['lstm_units'],
            activation = hp['lstm_activation'],
            #recurrent_activation=hp['lstm_rec_activation'],
            return_sequences=False, 
            dropout=hp['lstm_dropout'],
            recurrent_dropout=hp['lstm_rec_dropout'],
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=hp['lstm2_units'], 
            activation = hp['lstm2_activation'],
            #recurrent_activation= hp['lstm2_rec_activation'],
            return_sequences=True,
            dropout=hp['lstm2_dropout'],
            recurrent_dropout=hp['lstm2_rec_dropout'],
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = hp['bilstm3_units'],
            activation = hp['bilstm3_activation'],
            #recurrent_activation = hp['bilstm3_rec_activation'],
            use_bias = hp['bilstm3_use_bias'],
            unit_forget_bias = hp['bilstm3_forgot_bias'],
            dropout=hp['bilstm3_dropout'],
            recurrent_dropout = hp['bilstm3_rec_dropout'])))
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = hp['bilstm4_units'], 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 32,
            activation = hp['bilstm4_activation'],
            #recurrent_activation = hp['bilstm4_rec_activation'],
            use_bias = hp['bilstm4_use_bias'],
            unit_forget_bias = hp['bilstm4_forgot_bias'],
            dropout=hp['bilstm4_dropout'],
            recurrent_dropout = hp['bilstm4_rec_dropout'])))
        model.add(Conv1DTranspose(
           filters=hp['conv1d2_filters'], 
           kernel_size=hp['conv1d2_kernel_size'], 
           activation = hp['conv1d2_activation'],
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        model.add(TimeDistributed(Dense(trainyscaltref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscco2ref.shape[1])))
        model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXscch4ref.shape[1]))
        #model.add(Dense(trainXscco2ref.shape[1]))
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = hp.get('learning_rate'), **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 128, **kwargs) #hp['batch_size'], **kwargs)

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
reduce_loss = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5)

In [None]:
#trainXscaltref.shape, trainyscaltref.shape
#trainXscch4ref.shape, trainyscch4ref.shape
#trainXscco2ref.shape, trainyscco2ref.shape

#trainych4=trainych4.reshape(3285,1)
inputs.shape, targets.shape, trainXch4.shape, trainych4.shape

In [None]:
# #ALT
# bayesian_tuner.search(trainXscaltref, 
#                      trainyscaltref, 
#                      steps_per_epoch = None, 
#                      shuffle = False, 
#                      validation_data = (validXscaltref, validyscaltref),
#                      #validation_split = 0.15,#0.2,
#                      verbose = 1, #2, #epoch, #1, #progress bar #0, #nothing
#                      callbacks = [early_stopping, History(), TerminateOnNaN(),keras.callbacks.TensorBoard("/tmp/tb_logs")], 
#                      use_multiprocessing = True)

In [None]:
import itertools
import pandas as pd

dfs = []
new_matrix = trainXch4.reshape([3285,419,75*75*1])
for i in range(0,6):
   iterator = itertools.product(range(75), range(75), range(1))
   columns = ['var%i_' % i + '_'.join(map(str, x)) for x in iterator]
   dfs.append(pd.DataFrame(new_matrix[i]))

result = pd.concat(dfs)

In [None]:
result.shape#.iloc[3285,5625]#.dropna()

In [None]:
trainXch4=trainXch4.reshape(3285,1856,35,35)

In [None]:
tf.expand_dims(trainx, axis=0)

In [None]:
trainXch4tf=tf.data.Dataset.from_tensor_slices(trainXch4df.values)

In [None]:
trainXch4tf2=trainXch4tf.window(1, shift=1, drop_remainder=True).flat_map(lambda window: window.batch(1))
trainXch4tf2=trainXch4tf2.map(lambda window: (window[:-1], window[-1:]))

In [None]:
trainXch4tf2=trainXch4tf2.batch(1).prefetch(1)

In [None]:
for idx,(x,y) in enumerate(trainXch4tf2):
    print("x = ", x.numpy().shape)
    print("y = ", y.numpy().shape)
    break

In [None]:
dataset=tf.data.Dataset.from_tensor_slices(tf.range(10)).window(5, shift=1, drop_remainder=True).flat_map(lambda window: window.batch(5))
dataset=dataset.map(lambda window: (window[:-1], window[-1:]))
for X, y in dataset:
    print("Input:", X.numpy(), "Target:", y.numpy())

In [None]:
dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    trainXch4,
    trainych4,
    sequence_length=1856,
    shuffle=False,
)

In [None]:
dataset_valid = keras.preprocessing.timeseries_dataset_from_array(
    validXch4,
    validych4,
    sequence_length=3285,
    shuffle=False,
)

In [None]:
dataset_test = keras.preprocessing.timeseries_dataset_from_array(
    testXch4,
    testych4,
    sequence_length=3285,
    shuffle=False,
)

In [None]:
dataset_train, dataset_valid, dataset_test

In [None]:
for inputs, targets in dataset_train:
    inputs.numpy(), targets.numpy()

In [None]:
for inputs2, targets2 in dataset_valid:
    inputs2.numpy(),targets2.numpy()

In [None]:
for inputs3, targets3 in dataset_test:
    inputs3.numpy(),targets3.numpy()

In [None]:
inputs=tf.reshape(inputs,[3285,1856,35,35,1])

In [None]:
targets

In [None]:
from tensorflow.keras.layers import InputLayer
from keras.layers import Input

for batch in dataset_test:
  inputs, targets = batch
  assert np.array_equal(inputs[0], data[:10])  # First sequence: steps [0-9]
  assert np.array_equal(targets[0], data[10])  # Corresponding target: step 10
  break

In [None]:
for present_values, next_value in dataset_test.take(5):
    print(tf.squeeze(present_values).numpy(), '-->', next_value.numpy())

In [None]:
for batch in dataset_train:
    inputs, targets = batch

In [None]:
#ds = tf.keras.preprocessing.timeseries_dataset_from_array(features[:-365], labels[365:], 1, batch_size=1)

for inputs, targets in ds:
  print("Input:", inputs.numpy(), "Target:", targets.numpy())

In [None]:
batch_dataset(dataset_valid)

In [None]:
print("Input shape:", inputs.to_numpy(), "Target shape:", targets.to_numpy())

In [None]:
for batch in dataset_test.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

In [None]:
dataset_train.cache

In [None]:
trainXdf=pd.DataFrame(trainXch4.reshape(3285,2356875))

In [None]:
trainXdf=trainXdf.columns.values

In [None]:
trainX=trainXdf

In [None]:
trainydf=pd.DataFrame(trainych4.reshape(3285,2356875))

In [None]:
trainydf=trainydf.columns.values

In [None]:
inputs.shape, targets.shape

In [None]:
inputs2.shape, targets2.shape

In [None]:
inputs3.shape, targets3.shape

In [None]:
plt.contourf(trainych4[100][80].reshape(75,75))

In [None]:
print(trainXch4.shape, trainych4.shape)
print(validXch4.shape, validych4.shape)

In [None]:
train_data = [[1.0,2.0,3.0],[4.0,5.0,6.0]]
train_data_np = np.asarray(train_data)
train_label = [[1,2,3],[4,5,6]]
train_label_np = np.asarray(train_label)

In [None]:
train_data_np.shape, train_label_np.shape

In [None]:
inputs.shape

In [None]:
inputs.shape, targets.shape

In [None]:
traininputs = keras.Input(shape=(trainXch4.shape[0], trainXch4.shape[1],trainXch4.shape[2], trainXch4.shape[3], trainXch4.shape[4]))
traintargets = keras.Input(shape=(trainych4.shape[0]))

In [None]:
validinputs = keras.Input(shape=(validXch4.shape[0], validXch4.shape[1],validXch4.shape[2], validXch4.shape[3], validXch4.shape[4]))
validtargets = keras.Input(shape=(validych4.shape[0]))

In [None]:
testinputs = keras.Input(shape=(testXch4.shape[0], testXch4.shape[1],testXch4.shape[2], testXch4.shape[3], testXch4.shape[4]))
testtargets = keras.Input(shape=(testych4.shape[0]))

In [None]:
inputs.shape, targets.shape, trainXch4.shape, trainych4.shape, validXch4.shape, validych4.shape

In [None]:
trainy=trainXch4.reshape(3285,1856,35,35,)

In [None]:
trainy.shape

In [None]:
trainx=trainXch4.reshape(3285,1856,35,35)

In [None]:
trainx.shape, trainy.shape

In [None]:
validXch4_x,validXch4_y=validXch4[:2920],trainXch4[2920:]

In [None]:
if K.image_data_format() == 'channels_first':
    x_train = tf.reshape(trainx[-1, 1, 32, 32)
    x_test = x_test.reshape(-1, 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(-1, img_rows, img_cols, 1)
    x_test = x_test.reshape(-1, img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [None]:
trainxnew.shape, inputs.shape

In [None]:
#CH4
tuner.search(trainxnew,
                     trainynew,
                     #epochs=10,
                     steps_per_epoch = None,
                     shuffle = False,
                     validation_data = (validxnew, validynew),
                     #validation_split = 0.15,#0.2,
                     verbose = 1,
                     callbacks = [early_stopping, History(), TerminateOnNaN(),keras.callbacks.TensorBoard("/tmp/tb_logs")],
                     use_multiprocessing = True)

In [None]:
# #CO2
# bayesian_tuner.search(trainXscco2ref, 
#                      trainyscco2ref, 
#                      steps_per_epoch = None, 
#                      shuffle = False, 
#                      validation_data = (validXscco2ref, validyscco2ref),
#                      #validation_split = 0.15,#0.2,
#                      verbose = 1,
#                      callbacks = [early_stopping, History(), TerminateOnNaN(),keras.callbacks.TensorBoard("/tmp/tb_logs")], 
#                      use_multiprocessing = True)

In [None]:
bayesian_tuner.results_summary()

In [None]:
#HP ALT
# BEST MODEL: TRIAL24
# Trial 27 Complete [01h 42m 02s]
# val_loss: 0.10911799222230911

# Best val_loss So Far: 0.10056757181882858
# Total elapsed time: 16h 43m 38s

# Search: Running Trial #28

# Value             |Best Value So Far |Hyperparameter
# 0.0001            |0.0001            |learning_rate
# 64                |64                |batch_size
# 64                |64                |units
# 91                |96                |conv1d_filters
# 3                 |9                 |conv1d_kernel_size
# relu              |swish             |conv1d_activation
# 107               |97                |bilstm_units
# swish             |relu              |bilstm_activation
# relu              |relu              |bilstm_rec_activation
# 0.1               |0.4               |bilstm_dropout
# 0.1               |0.1               |bilstm_rec_dropout
# False             |False             |bilstm_use_bias
# True              |True              |bilstm_forgot_bias
# 88                |64                |lstm_units
# gelu              |relu              |lstm_activation
# softmax           |relu              |lstm_rec_activation
# 0.1               |0.1               |lstm_dropout
# 0.1               |0.1               |lstm_rec_dropout
# 128               |128               |lstm2_units
# relu              |relu              |lstm2_activation
# relu              |relu              |lstm2_rec_activation
# 0.1               |0.1               |lstm2_dropout
# 0.4               |0.4               |lstm2_rec_dropout
# 128               |128               |bilstm2_units
# relu              |relu              |bilstm2_activation
# swish             |sigmoid           |bilstm2_rec_activation
# 0.4               |0.4               |bilstm2_dropout
# 0.4               |0.4               |bilstm2_rec_dropout
# True              |True              |bilstm2_use_bias
# False             |False             |bilstm2_forgot_bias
# 91                |88                |conv1d2_filters
# 9                 |9                 |conv1d2_kernel_size
# swish             |swish             |conv1d2_activation

In [None]:
# #ALT Results
# Results summary
# Results in ./Bayesian_optimization_071423_ALT
# Showing 10 best trials
# Objective(name="val_loss", direction="min")

# Trial 09 summary
# Hyperparameters:
# units: 224
# learning_rate: 0.01
# conv1d_filters: 38
# conv1d_kernel_size: 6
# conv1d_activation: linear
# conv1d2_filters: 48
# conv1d2_kernel_size: 5
# conv1d2_activation: exponential
# bilstm_units: 63
# bilstm_activation: sigmoid
# bilstm_rec_activation: tanh
# bilstm_dropout: 0.3
# bilstm_rec_dropout: 0.1
# bilstm_use_bias: False
# bilstm_forgot_bias: True
# bilstm2_units: 45
# bilstm2_activation: sigmoid
# bilstm2_rec_activation: exponential
# bilstm2_dropout: 0.1
# bilstm2_rec_dropout: 0.1
# bilstm2_use_bias: True
# bilstm2_forgot_bias: False
# bilstm3_units: 46
# bilstm3_activation: selu
# bilstm3_rec_activation: relu
# bilstm3_dropout: 0.4
# bilstm3_rec_dropout: 0.1
# bilstm3_use_bias: True
# bilstm3_forgot_bias: False
# bilstm4_units: 47
# bilstm4_activation: sigmoid
# bilstm4_rec_activation: gelu
# bilstm4_dropout: 0.3
# bilstm4_rec_dropout: 0.1
# bilstm4_use_bias: False
# bilstm4_forgot_bias: False
# lstm_units: 63
# lstm_activation: sigmoid
# lstm_rec_activation: gelu
# lstm_dropout: 0.3
# lstm_rec_dropout: 0.1
# lstm2_units: 64
# lstm2_activation: exponential
# lstm2_rec_activation: selu
# lstm2_dropout: 0.3
# lstm2_rec_dropout: 0.3
# Score: 2.8679637908935547

In [None]:
#ALT
class geocryoai2(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        #inputs = keras.Input(shape=(X_train_reframed.shape[1], X_train_reframed.shape[2]))
        inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))        
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=38, 
            kernel_size=6, 
            activation = 'swish',
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = 97,
            activation = 'relu',
            #recurrent_activation = 'relu',
            use_bias = False,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(LSTM(
            units=64,
            activation = 'relu',
            #recurrent_activation = 'relu',
            return_sequences=False, 
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=128, 
            activation = 'relu',
            #recurrent_activation='relu',
            return_sequences=True,
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 128,
            activation = 'relu',
            #recurrent_activation = 'sigmoid',
            use_bias = True,
            unit_forget_bias = False,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(Conv1DTranspose(
           filters=88, 
           kernel_size=9, 
           activation = 'swish',
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        model.add(TimeDistributed(Dense(trainyscaltref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXscch4ref.shape[1]))
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 256, **kwargs)

In [None]:
#ALT
class geocryoai2(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        inputs = keras.Input(shape=(trainXscaltref.shape[1], trainXscaltref.shape[2]))
        #inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))        
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=96, 
            kernel_size=9, 
            activation = 'swish',
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = 97,
            activation = 'relu',
            #recurrent_activation = 'relu',
            use_bias = False,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(LSTM(
            units=64,
            activation = 'relu',
            #recurrent_activation = 'relu',
            return_sequences=False, 
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=128, 
            activation = 'relu',
            #recurrent_activation='relu',
            return_sequences=True,
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 128,
            activation = 'relu',
            #recurrent_activation = 'sigmoid',
            use_bias = True,
            unit_forget_bias = False,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(Conv1DTranspose(
           filters=88, 
           kernel_size=9, 
           activation = 'swish',
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        model.add(TimeDistributed(Dense(trainyscaltref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXscch4ref.shape[1]))
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 256, **kwargs)

In [None]:
#HP CH4
#BEST MODEL: TRIAL22
# Trial 26 Complete [01h 35m 54s]
# val_loss: 0.4272751808166504

# Best val_loss So Far: 0.004293057601898909
# Total elapsed time: 14h 38m 44s

# Search: Running Trial #27

# Value             |Best Value So Far |Hyperparameter
# 0.001             |0.0001            |learning_rate
# 64                |64                |batch_size
# 320               |128               |units
# 64                |71                |conv1d_filters
# 9                 |9                 |conv1d_kernel_size
# relu              |relu              |conv1d_activation
# 116               |92                |bilstm_units
# relu              |relu              |bilstm_activation
# relu              |relu              |bilstm_rec_activation
# 0.4               |0.3               |bilstm_dropout
# 0.4               |0.4               |bilstm_rec_dropout
# False             |False             |bilstm_use_bias
# True              |True              |bilstm_forgot_bias
# 100               |128               |lstm_units
# linear            |tanh              |lstm_activation
# swish             |elu               |lstm_rec_activation
# 0.4               |0.4               |lstm_dropout
# 0.4               |0.4               |lstm_rec_dropout
# 128               |128               |lstm2_units
# relu              |softmax           |lstm2_activation
# swish             |swish             |lstm2_rec_activation
# 0.4               |0.4               |lstm2_dropout
# 0.4               |0.1               |lstm2_rec_dropout
# 123               |78                |bilstm2_units
# relu              |relu              |bilstm2_activation
# swish             |relu              |bilstm2_rec_activation
# 0.1               |0.1               |bilstm2_dropout
# 0.1               |0.1               |bilstm2_rec_dropout
# False             |False             |bilstm2_use_bias
# True              |False             |bilstm2_forgot_bias
# 128               |117               |conv1d2_filters
# 3                 |3                 |conv1d2_kernel_size
# sigmoid           |swish             |conv1d2_activation

In [None]:
#CH4
class geocryoai2(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        #inputs = keras.Input(shape=(X_train_reframed.shape[1], X_train_reframed.shape[2]))
        #inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))        
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=71, 
            kernel_size=9, 
            activation = 'swish',
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = 92,
            activation = 'relu',
            #recurrent_activation = 'linear',
            use_bias = False,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        #for i in range(hp['n_layers']):
        model.add(LSTM(
            units=128,
            activation = 'tanh',
            #recurrent_activation = 'softmax',
            return_sequences=False, 
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=128, 
            activation = 'softmax',
            #recurrent_activation='elu',
            return_sequences=True,
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 78,
            activation = 'relu',
            #recurrent_activation = 'relu',
            use_bias = False,
            unit_forget_bias = False,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(Conv1DTranspose(
           filters=117, 
           kernel_size=3, 
           activation = 'swish',
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(TimeDistributed(Dense(trainyscaltref.shape[1])))
        model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscco2ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        model.add(Dense(trainXscch4ref.shape[1]))
        #model.add(Dense(trainXscco2ref.shape[1]))
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 256, **kwargs)

In [None]:
#HP CO2
#BEST MODEL: TRIAL22
# Trial 31 Complete [01h 26m 33s]
# val_loss: 0.09103024005889893

# Best val_loss So Far: 0.014815938659012318
# Total elapsed time: 13h 56m 11s

# Search: Running Trial #32

# Value             |Best Value So Far |Hyperparameter
# 0.0001            |0.0001            |learning_rate
# 64                |64                |batch_size
# 128               |192               |units
# 128               |128               |conv1d_filters
# 9                 |6                 |conv1d_kernel_size
# relu              |elu               |conv1d_activation
# 111               |100               |bilstm_units
# relu              |relu              |bilstm_activation
# relu              |relu              |bilstm_rec_activation
# 0.1               |0.1               |bilstm_dropout
# 0.3               |0.3               |bilstm_rec_dropout
# False             |True              |bilstm_use_bias
# True              |True              |bilstm_forgot_bias
# 64                |64                |lstm_units
# sigmoid           |tanh              |lstm_activation
# sigmoid           |tanh              |lstm_rec_activation
# 0.1               |0.1               |lstm_dropout
# 0.4               |0.4               |lstm_rec_dropout
# 80                |83                |lstm2_units
# relu              |relu              |lstm2_activation
# relu              |relu              |lstm2_rec_activation
# 0.1               |0.1               |lstm2_dropout
# 0.3               |0.3               |lstm2_rec_dropout
# 64                |64                |bilstm2_units
# relu              |relu              |bilstm2_activation
# relu              |sigmoid           |bilstm2_rec_activation
# 0.4               |0.3               |bilstm2_dropout
# 0.4               |0.3               |bilstm2_rec_dropout
# False             |False             |bilstm2_use_bias
# False             |True              |bilstm2_forgot_bias
# 128               |95                |conv1d2_filters
# 6                 |6                 |conv1d2_kernel_size
# swish             |swish             |conv1d2_activation

In [None]:
#CO2
class geocryoai2(HyperModel):
    def build(self, hp):
        #backend.clear_session()
        #inputs = keras.Input(shape=(X_train_reframed.shape[1], X_train_reframed.shape[2]))
        #inputs = keras.Input(shape=(trainXscch4ref.shape[1], trainXscch4ref.shape[2]))        
        model = tf.keras.Sequential()
        #for i in range(n_layers):
        model.add(Conv1D(
            filters=128, 
            kernel_size=6, 
            activation = 'elu',
            padding='same', 
            input_shape=(inputs.shape[1], inputs.shape[2])))
        # model.add(MaxPool1D(pool_size=1))
        #for i in range(hp['n_layers']): #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"1BiLSTM_layer_{i+1}",
            units = 120,
            activation = 'relu',
            #recurrent_activation = 'relu',
            use_bias = True,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        #for i in range(hp['n_layers']):
        model.add(LSTM(
            units=64,
            activation = 'tanh',
            #recurrent_activation = 'tanh',
            return_sequences=False, 
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = False, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2]))))
        model.add(RepeatVector(inputs.shape[1]))   #TUNE THIS (LAYERS) WHEN ADDING SATELLITE AND MODELING DATA
        model.add(LSTM(
            units=83, 
            activation = 'relu',
            #recurrent_activation='relu',
            return_sequences=True,
            dropout=0.1,
            recurrent_dropout=0.1,
            input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(Bidirectional(LSTM(inputs.shape[-1], activation='relu', return_sequences = True, dropout=0, input_shape=(inputs.shape[1], inputs.shape[2])))) 
        model.add(Bidirectional(LSTM(
            input_shape=(inputs.shape[1], inputs.shape[2]),
            return_sequences = True, 
            #kernel_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.00001),
            #name = f"2BiLSTM_layer_{i+1}",
            units = 64,
            activation = 'relu',
            #recurrent_activation = 'sigmoid',
            use_bias = False,
            unit_forget_bias = True,
            dropout=0.1,
            recurrent_dropout = 0.1)))
        model.add(Conv1DTranspose(
           filters=95, 
           kernel_size=6, 
           activation = 'swish',
           padding='same', 
           input_shape=(inputs.shape[1], inputs.shape[2])))
        #model.add(TimeDistributed(Dense(trainyscaltref.shape[1])))
        #model.add(TimeDistributed(Dense(trainyscch4ref.shape[1])))
        model.add(TimeDistributed(Dense(trainyscco2ref.shape[1])))
        #model.add(Dense(trainXscaltref.shape[1]))
        #model.add(Dense(trainXscch4ref.shape[1]))
        model.add(Dense(trainXscco2ref.shape[1]))
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
        #metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
        metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
        loss_function = 'mean_squared_error'
        model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),loss = loss_function, metrics = metrics)
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size = 64, **kwargs)

In [None]:
trainXscaltref.shape, trainyscaltref.shape
#trainXscch4ref.shape, trainyscch4ref.shape
#trainXscco2ref.shape, trainyscco2ref.shape

In [None]:
model2=geocryoai2.build(trainXscaltref, hp)
#model3=geocryoai2.build(trainXscch4ref, hp)
#model4=geocryoai2.build(trainXscco2ref, hp)
#model3b=geocryoai2.build(trainXscch4ref, hp)

In [None]:
model2.summary()

In [None]:
model3.summary()

In [None]:
model4.summary()

In [None]:
model3b.summary()

In [None]:
#geocryoai(X_train)
#img_file = '/Users/bradleygay/Downloads/model_arch.jpeg'
img_file = '/Users/bradleygay/Downloads/GeoCryoAI_Arch_071523_insituALT.png'
tf.keras.utils.plot_model(model2, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000);
#img_file = '/Users/bradleygay/Downloads/GeoCryoAI_Arch_071423_insituCH4.png'
#tf.keras.utils.plot_model(model3, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000);
#img_file = '/Users/bradleygay/Downloads/GeoCryoAI_Arch_071423_insituCO2.png'
#tf.keras.utils.plot_model(model4, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000);
#img_file = '/Users/bradleygay/Downloads/GeoCryoAI_Arch_071423_insituCH4model3b.png'
#tf.keras.utils.plot_model(model3b, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000);

In [None]:
# serialize model to JSON
model_json = model.to_json()
#with open("model_070923_insituALT.json", "w") as json_file:
with open("model_081023_tcfmCH4.json", "w") as json_file:
#with open("model_070923_insituCO2.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
#model.save_weights("model_070923_insituALT_experimental.h5")
model.save_weights("model_081023_tcfmCH4.h5")
#model.save_weights("model_070923_insituCO2.h5")
print("Saved model to disk")

In [None]:
import os
root_logdir = os.path.join(os.curdir, 'logs')

def get_run_logdir():
    import time
    run_id = time.strftime('run_%Y_%m_%d-%H_%M_%S')
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()

log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
#ALT
# add early stopping criteria t/Volumes/op training if validation score does not improve - cuts down on computational load/speed.
filepath="weights_geocryoai2.alt_071523.hdf5"
#filepath="weights_geocryoai2.best_ch4_071423.hdf5"
#filepath="weights_geocryoai2.best__co2_071423.hdf5"
#filepath="weights_geocryoai2.best_ch4_model3b_071423.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
# fit network
start_time = time.time()
history2b = model2.fit(trainXscaltref, #trainXscaltref #trainXscch4ref
#history3 = model3.fit(trainXscch4ref, #trainXscaltref #trainXscco2ref
#history4 = model4.fit(trainXscco2ref, #trainXscaltref #trainXscco2ref
#history5 = model3b.fit(trainXscch4ref, #trainXscaltref #trainXscco2ref
#                   trainyscch4ref, #trainyscaltref #trainXscco2ref
                    trainyscaltref,  
                    epochs=10, 
                    batch_size=256,#128,#512, 
                    validation_data=(validXscaltref,validyscaltref),
                    #validation_data=(validXscch4ref,validyscch4ref), #(validXscaltref,validyscaltref) #(validXscco2ref,validyscco2ref)
                    steps_per_epoch = None,
                    shuffle=False, 
                    callbacks=[early_stopping, TerminateOnNaN(),  keras.callbacks.TensorBoard("/tmp/tb_logs")],
                    use_multiprocessing = True)

elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
#CH4
# add early stopping criteria t/Volumes/op training if validation score does not improve - cuts down on computational load/speed.
#filepath="weights_geocryoai2.best_071123.hdf5"
filepath="weights_geocryoai2.best_071223_ch4.hdf5"
#filepath="weights_geocryoai2.best_071223_co2.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
# fit network
start_time = time.time()
#history2 = model2.fit(trainXscaltref, #trainXscaltref #trainXscch4ref
history3 = model3.fit(trainXscch4ref, #trainXscaltref #trainXscco2ref
                    trainyscch4ref, #trainyscaltref #trainXscco2ref
                    epochs=10, 
                    batch_size=256,#512, 
                    validation_data=(validXscch4ref,validyscch4ref), #(validXscaltref,validyscaltref) #(validXscco2ref,validyscco2ref)
                    steps_per_epoch = None,
                    shuffle=False, 
                    callbacks=[early_stopping, TerminateOnNaN(),  keras.callbacks.TensorBoard("/tmp/tb_logs")],
                    use_multiprocessing = True)

elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
#CO2
# add early stopping criteria t/Volumes/op training if validation score does not improve - cuts down on computational load/speed.
#filepath="weights_geocryoai2.best_071123.hdf5"
#filepath="weights_geocryoai2.best_071223_ch4.hdf5"
filepath="weights_geocryoai2.best_071223_co2.hdf5"
tensorboard_cb = keras.callbacks.TensorBoard('/tmp/tb_logs')
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', verbose = 1, patience = 10, min_delta = 1e-4, restore_best_weights = True)
# fit network
start_time = time.time()
#history2 = model2.fit(trainXscaltref, #trainXscaltref #trainXscch4ref
#history3 = model3.fit(trainXscch4ref, #trainXscaltref #trainXscco2ref
history4 = model4.fit(trainXscco2ref, #trainXscaltref #trainXscco2ref
                    trainyscco2ref, #trainyscaltref #trainXscco2ref
                    epochs=10, 
                    batch_size=256,#512, 
                    validation_data=(validXscco2ref,validyscco2ref), #(validXscaltref,validyscaltref) #(validXscco2ref,validyscco2ref)
                    steps_per_epoch = None,
                    shuffle=False, 
                    callbacks=[early_stopping, TerminateOnNaN(),  keras.callbacks.TensorBoard("/tmp/tb_logs")],
                    use_multiprocessing = True)

elapsed_time = time.time() - start_time
print("\nThe first network took {} s to complete training.".format(round(elapsed_time)))

In [None]:
# serialize model to JSON
model_json = model2b.to_json()
#model_json = model3.to_json()
#model_json = model4.to_json()
#model_json = model3b.to_json()
with open("model2_071623_insituALT.json", "w") as json_file:
#with open("model3_071523_insituCH4_2.json", "w") as json_file:
#with open("model4_071523_insituCO2.json", "w") as json_file:
#with open("model3_071523_insituCH4_model3b.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model2.save_weights("model2_071623_insituALT.h5")
#model3.save_weights("model3_071523_insituCH4.h5")
#model4.save_weights("model4_071523_insituCO2.h5")
#model3b.save_weights("model3_071523_insituCH4_model3b.h5")
print("Saved model to disk")

In [None]:
# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(history2b.history)
#hist_df = pd.DataFrame(history3.history)
#hist_df = pd.DataFrame(history4.history)
#hist_df = pd.DataFrame(history5.history)

# save to json:  
hist_json_file = 'historyALT-071623.json' 
#hist_json_file = 'historyCH4-071523.json' 
#hist_json_file = 'historyCO2-071523.json' 
#hist_json_file = 'historyCH4-071523_model3b.json' 
with open(hist_json_file, mode='w') as f:
    hist_df.to_json(f)

# or save to csv: 
hist_csv_file = 'historyALT-071623.csv'
#hist_csv_file = 'historyCH4-071523.csv'
#hist_csv_file = 'historyCO2-071523.csv'
#hist_csv_file = 'historyCH4-071523_model3b.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:
with open('trainHistoryALT-071623', 'wb') as file_pi:
#with open('trainHistoryCH4-071523', 'wb') as file_pi:
#with open('trainHistoryCO2-071523', 'wb') as file_pi:
#with open('trainHistoryCH4-071523_model3b', 'wb') as file_pi:
    pickle.dump(history2b.history, file_pi)
    #pickle.dump(history3.history, file_pi)
    #pickle.dump(history4.history, file_pi)
    #pickle.dump(history5.history, file_pi)

In [None]:
testXscaltref.shape
#testXscch4ref.shape
#testXscco2ref.shape

In [None]:
#testXscaltrefres=testXscaltref.reshape(215136,1,456)
#testXscch4refres=testXscch4ref.reshape(161749,1,456)
#testXscco2refres=testXscco2ref.reshape(161749,1,456)

In [None]:
score_2b = model2.evaluate(testXscaltref, testyscaltref, verbose = 1) 
#score_3 = model3.evaluate(testXscch4ref, testyscch4ref, verbose = 1)
#score_4 = model4.evaluate(testXscco2ref, testyscco2ref, verbose = 1)
#score_3b = model3b.evaluate(testXscch4ref, testyscch4ref, verbose = 1)

In [None]:


print('Test MAE:', score_2[1])
print('Test MSE:', score_2[2])
print('Test RMSE:', np.sqrt(score_2[2]))

In [None]:
print('Test MAE:', score_2b[1])
print('Test MSE:', score_2b[2])
print('Test RMSE:', np.sqrt(score_2b[2]))

In [None]:
print('Test MAE:', score_3[1])
print('Test MSE:', score_3[2])
print('Test RMSE:', np.sqrt(score_3[2]))

In [None]:
print('Test MAE:', score_4[1])
print('Test MSE:', score_4[2])
print('Test RMSE:', np.sqrt(score_4[2]))

In [None]:
print('Test MAE:', score_3b[1])
print('Test MSE:', score_3b[2])
print('Test RMSE:', np.sqrt(score_3b[2]))

In [None]:
plt.plot(history2b.history['loss'])
#plt.plot(history3.history['loss'])
#plt.plot(history4.history['loss'])
#plt.plot(history5.history['loss'])

#

# <>

# Load

In [None]:
#import json
#althist_json_file = json.load(open('trainHistoryDictALT_experimental.json', 'r'))
with open('/Volumes/TIR/17sep23/research/code/historyALT.json.json', 'rb') as file:
    althistory=pickle.load(file)
with open('/Volumes/TIR/17sep23/research/code/trainHistoryDictCH4_experimental', 'rb') as file2:
    ch4history=pickle.load(file2)
with open('/Volumes/TIR/17sep23/research/code/trainHistoryDictCO2_experimental', 'rb') as file3:
    co2history=pickle.load(file3)

In [None]:
import os
os.chdir('/Volumes/TIR/17sep23/research/code/')

In [None]:
# Load Models
from keras.models import model_from_json
from keras.models import load_model
#ALT
# load json file and model
alt_json_file = open('model2_070923_insituALT.json', 'r')
alt_loaded_model_json = alt_json_file.read()
alt_json_file.close()
alt_loaded_model_json = model_from_json(alt_loaded_model_json)
# load weights for new model
alt_loaded_model_json.load_weights("model2_070923_insituALT.h5")
print("Loaded model from disk")
# save and reload
alt_loaded_model_json.save('model2_070923_insituALT.hdf5')
alt_loaded_model_json=load_model('model2_070923_insituALT.hdf5')

#CH4
# load json file and model
ch4_json_file = open('model2_070923_insituCH4.json', 'r')
ch4_loaded_model_json = ch4_json_file.read()
ch4_json_file.close()
ch4_loaded_model_json = model_from_json(ch4_loaded_model_json)
# load weights for new model
ch4_loaded_model_json.load_weights("model2_070923_insituCH4.h5")
print("Loaded model from disk")
# save and reload
ch4_loaded_model_json.save('model2_070923_insituCH4.hdf5')
ch4_loaded_model_json=load_model('model2_070923_insituCH4.hdf5')

#CO2
co2_json_file = open('model2_070923_insituCO2.json', 'r')
co2_loaded_model_json = co2_json_file.read()
co2_json_file.close()
co2_loaded_model_json = model_from_json(co2_loaded_model_json)
# load weights for new model
co2_loaded_model_json.load_weights("model2_070923_insituCO2.h5")
print("Loaded model from disk")
# save and reload
co2_loaded_model_json.save('model2_070923_insituCO2.hdf5')
co2_loaded_model_json=load_model('model2_070923_insituCO2.hdf5')

In [None]:
#FROM OTHER FILE
########################################################
########################################################
########################################################

In [None]:
fig,ax = plt.subplots(figsize=(6,4), dpi=100)
# l1=ax.plot(history2.history['loss'], color='dodgerblue', linestyle='solid', label='ALT Training Loss (cm)')
# l2=ax.plot(history3.history['loss'], color='magenta', linestyle='solid', label='CH4 Flux Training Loss (cm)')
# l3=ax.plot(history4.history['loss'], color='springgreen', linestyle='solid', label='CO2 Flux Training Loss (nmolCH4m-2s-1)')
#l1=ax.plot(history.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='ALT Training Loss (cm)')#CH4 Flux Training Loss (mgCH4m-2d-1)')
l2=ax.plot(history.history['val_loss'], color='magenta', linestyle='solid', label='ALT Training Loss (cm)')#CH4 Flux Validation Loss (mgCH4m-2d-1)')
#l3=ax.plot(history4.history['val_loss'], color='springgreen', linestyle='solid', label='CO2 Flux Validation Loss (µmolCO2m-2s-1)')
#ax2=ax.twinx();
#ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

lines = l1 + l2#+ ln2 + ln3 #+ ln4 #+ ln5# ln4 + ln5 + ln6# + ln7 + ln8
labs = [line.get_label() for line in lines];
ax.legend(lines, labs, loc='best', fontsize=8)#'lower left', fontsize=8)

ax.grid(linewidth=0.3);
ax.set_xlabel('Epochs', labelpad=8, fontsize=8);
ax.set_ylabel('Training Loss', labelpad=8, fontsize=8)
#ax.set(xticklabels=[])  # remove the tick labels
ax.tick_params(left=False)  # remove the ticks
plt.ylabel('Active Layer Thickness (cm)')
plt.title('GeoCryoAI In Situ Module | Bidirectional Conv1DLSTM Autoencoder Loss Functions \n ALT Simulations (1969-2022)', pad=10)
#plt.title('GeoCryoAI TCFM-Arctic Module | ConvLSTM2D Autoencoder Loss Function \n CH4 Flux Simulations (2003-2015)', pad=10, fontsize=10)

#plt.xlabel('Year')
#plt.axis([0, 6, 0, 60])
#plt.legend(loc='best')
#plt.show()
#plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_CO2_loss.png',dpi=1000)

In [None]:
#reframed_alt.iloc[:,-1]

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(history2.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='Training, ALT (cm)');
ax2=ax.twinx();
lns2=ax2.plot(history2.history['val_mean_squared_error'], color='tomato', linestyle='solid', label='Validation, ALT (cm)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax2.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Training MSE, Scaled ALT (cm)', labelpad=12, fontsize=10);
ax.tick_params(axis='y', labelcolor='dodgerblue')
ax2.set_ylabel('Validation MSE, Scaled ALT (cm)', labelpad=12, fontsize=10)
ax2.tick_params(axis='y', labelcolor='tomato')
ax.tick_params(left=False)  # remove the ticks
ax2.tick_params(right=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of ALT | Alaska [1969-2022] \n Number of Thaw Depth Samples/Replicates: 2.441M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.tight_layout()
plt.savefig('ALTstats_1969-2022_071323.png', dpi=1000)

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(history3.history['mean_squared_error'], color='magenta', linestyle='solid', label='Training, CH4 Flux (nmolCH4m-2s-1)');
ax2=ax.twinx();
lns2=ax2.plot(history3.history['val_mean_squared_error'], color='slateblue', linestyle='solid', label='Validation, CH4 Flux (nmolCH4m-2s-1)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax2.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Training MSE, Scaled Ch4 Flux (nmolCH4m-2s-1)', labelpad=12, fontsize=10);
ax.tick_params(axis='y', labelcolor='magenta')
ax2.set_ylabel('Validation MSE, Scaled CH4 Flux (nmolCH4m-2s-1)', labelpad=12, fontsize=10)
ax2.tick_params(axis='y', labelcolor='slateblue')
ax.tick_params(left=False)  # remove the ticks
ax2.tick_params(right=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of CH4 Flux | Alaska [2011-2021] \n Number of CH4 Flux Samples/Replicates: 2.083M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.tight_layout()
#plt.savefig('ALTstats_CNNLSTMSAEmetrics_1969-2022_021323.png', dpi=1000)

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(history5.history['mean_squared_error'], color='midnightblue', linestyle='solid', label='Training, CH4 Flux (nmolCH4m-2s-1)');
ax2=ax.twinx();
lns2=ax2.plot(history5.history['val_mean_squared_error'], color='magenta', linestyle='solid', label='Validation, CH4 Flux (nmolCH4m-2s-1)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax2.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Training MSE, Scaled Ch4 Flux (nmolCH4m-2s-1)', labelpad=12, fontsize=10);
ax.tick_params(axis='y', labelcolor='midnightblue')
ax2.set_ylabel('Validation MSE, Scaled CH4 Flux (nmolCH4m-2s-1)', labelpad=12, fontsize=10)
ax2.tick_params(axis='y', labelcolor='magenta')
ax.tick_params(left=False)  # remove the ticks
ax2.tick_params(right=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of CH4 Flux | Alaska [2011-2021] \n Number of CH4 Flux Samples/Replicates: 2.083M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.tight_layout()
plt.savefig('CH4stats_1969-2022_071323.png', dpi=1000)

In [None]:
#plt.plot(reframed_co2.iloc[:,-1]['2006':'2019'].values)
reframed_co2.iloc[:,-1]['2006':'2019']

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(history4.history['mean_squared_error'], color='indigo', linestyle='solid', label='Training, CO2 Flux (µolCO2m-2s-1)');
ax2=ax.twinx();
lns2=ax2.plot(history4.history['val_mean_squared_error'], color='lime', linestyle='solid', label='Validation, CO2 Flux (µolCO2m-2s-1)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax2.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Training MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10);
ax.tick_params(axis='y', labelcolor='indigo')
ax2.set_ylabel('Validation MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10)
ax2.tick_params(axis='y', labelcolor='lime')
ax.tick_params(left=False)  # remove the ticks
ax2.tick_params(right=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of CO2 Flux | Alaska [2006-2019] \n Number of CO2 Flux Samples/Replicates: 1.966M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.tight_layout()
plt.savefig('CO2stats_1969-2022_071323.png', dpi=1000)

In [None]:
# fig,ax=plt.subplots(figsize=(10,7));
# #lns1=ax.plot(history2.history['loss'], color='dodgerblue', label='Loss, ALT (cm)');
# lns2=ax.plot(history2.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='RMSE, ALT (cm)');
# ax2=ax.twinx();
# #lns3=ax2.plot(history2.history['val_loss'], color='gold', label='Validation Loss, ALT (cm)');
# lns4=ax2.plot(history2.history['val_mean_squared_error'], color='gold', linestyle='solid', label='Validation RMSE, ALT (cm)');
          
# lns = lns2+lns4; #lns1+lns2+lns3+lns4;
# labs = [l.get_label() for l in lns];
# ax2.legend(lns, labs, loc='best', fontsize=8);

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
# ax.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=12, fontsize=10);
# #ax2.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=6, fontsize=9)
# plt.title('Number of Samples/Replicates: 95653', pad=15, fontsize=12, fontweight='ultralight');
# plt.suptitle('Cost Function and Validation Loss from Thaw Depth Modeling, GeoCryoAI Framework in Alaska [1969-2022]', fontsize=14);
# plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('ALTstats_CNNLSTMSAEmetrics_1969-2022_021323.png', dpi=1000)

In [None]:
score = model.evaluate(X_test_reframed, y_test_reframed, verbose = 1) 

In [None]:
print('Test MAE:', score[1])
print('Test MSE:', score[2])
print('Test RMSE:', score[3])

In [None]:
predict = model.predict(X_test_reframed, verbose = 1)

In [None]:
# fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
# ln1=ax.plot(y_test_reframed.reshape(215137,1), color='magenta', linestyle='solid', label='Observation, Test Set')
# ln2=ax.plot(predict.reshape(215137,1), color='dodgerblue', linestyle='solid', label='Prediction, Test Set')
# #ln3=ax.plot(history.history['mean_absolute_error'], color='springgreen', linestyle='dotted', label='MAE')
# #ln4=ax.plot(history.history['mean_squared_error'], color='springgreen', linestyle='dashed', label='Seward Peninsula')
# #ln4=ax.plot(history.history['root_mean_squared_error'], color='springgreen', linestyle='dashed', label='RMSE')
# #ln5=ax.plot(history.history['val_mean_absolute_error'], color='red', linestyle='dotted', label='Val MAE')
# #ln7=ax.plot(history.history['val_mean_squared_error'], color='red', linestyle='dashed', label='Seward Peninsula')
# #ln6=ax.plot(history.history['val_root_mean_squared_error'], color='red', linestyle='dashed', label='Val RMSE')
# #ln2=ax.plot(sib.iloc[2:,7].replace(-9999,np.nan).dropna()color='springgreen', linestyle='dashed', label='Interior')
# #ln3=ax.plot(sib.iloc[2:,16].replace(-9999,np.nan).dropna(), color='magenta', linestyle='dotted', label='Seward Peninsula')
# #ln4=ax.plot(sib.iloc[2:,39].replace(-9999,np.nan).dropna(), color='dodgerblue', linestyle='dotted', label='Yukon-Kuskokwim Delta')
# #ax2=ax.twinx();
# #ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

# lines = ln1 + ln2 #+ ln3 + ln4 + ln5 + ln6# + ln7 + ln8
# labs = [line.get_label() for line in lines];
# plt.legend(lines, labs, loc='lower left', fontsize=8)

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Samples, 2003-2021', labelpad=10, fontsize=16);
# ax.set_ylabel('Carbon Dioxide Flux (umolCm2s-1)', labelpad=10, fontsize=16)
# #ax.set(xticklabels=[])  # remove the tick labels
# ax.tick_params(left=False)  # remove the ticks
# #plt.ylabel('Active Layer Thickness (cm)')
# plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of ALT | Alaska [1969-2022]', pad=15, fontsize=14);
# plt.suptitle('Number of Samples/Replicates, ALT: 2.441M', fontsize=12, fontweight='ultralight');
# #plt.title('GeoCryoAI In Situ Module | Bidirectional LSTM Autoencoder Loss Function \n In Situ Carbon Dioxide Flux Simulations (2003-2021)', pad=10)
# #plt.xlabel('Year')
# #plt.axis([0, 6, 0, 60])
# #plt.legend(loc='best')
# #plt.show()
# plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_CO2_loss_predictions_experimental.png',dpi=1000);

In [None]:
plt.plot(y_testco2_reframed.reshape(215137,1))
plt.plot(predict.reshape(215137,1))
#plt.axis([0, 130000, -2, 5])
plt.show()

In [None]:
#May need to inverse scale

In [None]:
########################################################
########################################################
########################################################

In [None]:
# fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
# #l1=ax.plot(history2.history['loss'], color='dodgerblue', linestyle='solid', label='ALT Training Loss (cm)')
# #l2=ax.plot(history3.history['loss'], color='magenta', linestyle='solid', label='CH4 Flux Training Loss (cm)')
# #l3=ax.plot(history4.history['loss'], color='springgreen', linestyle='solid', label='CO2 Flux Training Loss (nmolCH4m-2s-1)')
# l1=ax.plot(history2.history['val_loss'], color='dodgerblue', linestyle='solid', label='ALT Validation Loss (cm)')
# ax2=ax.twinx();
# l2=ax2.plot(history3.history['val_loss'], color='magenta', linestyle='solid', label='CH4 Flux Validation Loss (nmolCH4m-2s-1)')
# l3=ax2.plot(history4.history['val_loss'], color='springgreen', linestyle='solid', label='CO2 Flux Validation Loss (µmolCO2m-2s-1)')
# #l3=ax.plot(history4.history['loss'], color='springgreen', linestyle='solid', label='CO2 Flux Loss (µmolCO2m-2s-1)')
# #l1=ax.plot(history2.history['val_mean_squared_error'], color='dodgerblue', linestyle='solid', label='ALT Loss (cm)')
# #l2=ax.plot(history3.history['val_mean_squared_error'], color='magenta', linestyle='solid', label='CH4 Flux Loss (nmolCH4m-2s-1)')
# #l3=ax.plot(history4.history['val_mean_squared_error'], color='springgreen', linestyle='solid', label='CO2 Flux Loss (µmolCO2m-2s-1)')
# #ax2=ax.twinx();
# #l2=ax2.plot(althistory['val_mean_squared_error'], color='dodgerblue', linestyle='solid', label='Validation Loss (MSE)')
# #l2=ax2.plot(ch4history['val_mean_squared_error'], color='coral', linestyle='solid', label='Validation Loss (MSE)')
# #l2=ax2.plot(althistory['val_mean_squared_error'], color='coral', linestyle='solid', label='Validation Loss (MSE)')

# lns = l1+l2 +l3
# labs = [l.get_label() for l in lns];
# #ax2.legend(lns, labs, loc='best', fontsize=8);
# ax.legend(lns, labs, loc='best', fontsize=12);


# ax.set_xlabel('Full Iterations (epochs)', labelpad=15, fontsize=12);
# #ax.set_ylabel('Training Loss (units)', labelpad=15, fontsize=12)
# ax.set_ylabel('Validation Loss (units)', labelpad=15, fontsize=12)
# ax2.set_ylabel('Validation Loss (units)', labelpad=15, fontsize=10)
# #ax2.set_ylabel('Validation Loss, MSE (cm)', labelpad=15, fontsize=10)
# #ax.set_ylabel('Training Loss (units)', labelpad=15, fontsize=12)
# #ax2.set_ylabel('Validation Loss, MSE (nmolCH4m2s-1)', labelpad=15, fontsize=10)
# #ax.set_ylabel('Loss, MSE (µmolCO2m2s-1)', labelpad=15, fontsize=10)
# #ax2.set_ylabel('Validation Loss, MSE (µmolCO2m2s-1)', labelpad=15, fontsize=10)
# #ax.set(xticklabels=[])  # remove the tick labels
# ax.tick_params(left=False)  # remove the ticks
# ax2.tick_params(left=False)  # remove the ticks
# #plt.ylabel('Active Layer Thickness (cm)')
# ax.grid(linewidth=0.3);
# plt.title('GeoCryoAI Modeling, Cost Function and Validation Loss of ALT | Alaska [1969-2022] \n Number of Samples/Replicates, ALT: 2.441M', pad=15, fontsize=14);
# #plt.suptitle('Number of Samples/Replicates, ALT: 2.441M', fontsize=12, fontweight='ultralight');
# #plt.title('GeoCryoAI Training and Validation Loss | In Situ Thaw Depth Simulations [1969-2022]', pad=15, fontsize=14)
# #plt.title('GeoCryoAI Model Simulations | Cost Functions \n ALT, CH4 Flux, and CO2 Flux [1969-2022]', pad=15, fontsize=14)
# #plt.title('GeoCryoAI Training and Validation Loss | In Situ CO2 Flux Simulations [2006-2019]', pad=15, fontsize=14)
# #plt.xlabel('Year')
# #plt.axis([0, 6, 0, 60])
# #plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_ALT_loss.png',dpi=1000)

In [None]:
alt_model=alt_loaded_model_json
ch4_model=ch4_loaded_model_json
co2_model=co2_loaded_model_json

In [None]:
loss_function = 'mean_squared_error'
alt_metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscaltref.shape[1],))]
alt_model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),
                  loss = loss_function, metrics = alt_metrics)
ch4_metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscch4ref.shape[1],))]
ch4_model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.001, **{"clipvalue" : 1000}),
                  loss = loss_function, metrics = ch4_metrics)
co2_metrics=['mean_squared_error', 'mean_absolute_error', RSquare().build(input_shape = (trainyscco2ref.shape[1],))]
co2_model.compile(optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate = 0.0001, **{"clipvalue" : 1000}),
                  loss = loss_function, metrics = co2_metrics)

In [None]:
#score1=alt_model.evaluate(testXscaltref,testyscaltref,verbose=1)
#score2=co2_model.evaluate(testXscco2ref,testyscco2ref,verbose=1)
score3=ch4_model.evaluate(testXscch4ref,testyscch4ref,verbose=1)

In [None]:
score_2, score_3b, score_4#score_3

In [None]:
#ALT
print('Test MAE:', score1[1])
print('Test MSE:', score1[2])
print('Test RMSE:', np.sqrt(score1[2]))

In [None]:
#CO2
print('Test MAE:', score2[1])
print('Test MSE:', score2[2])
print('Test RMSE:', np.sqrt(score2[2]))

In [None]:
#CH4
print('Test MAE:', score3[1])
print('Test MSE:', score3[2])
print('Test RMSE:', np.sqrt(score3[2]))

In [None]:
trainyscaltpred = alt_model.predict(trainXscaltref)
validyscaltpred = alt_model.predict(validXscaltref)
testyscaltpred = alt_model.predict(testXscaltref)

In [None]:
with open('trainyscaltpred', 'wb') as file_theta:
    pickle.dump(trainyscaltpred, file_theta)
with open('validyscaltpred', 'wb') as file_alpha:
    pickle.dump(validyscaltpred, file_alpha)
with open('testyscaltpred', 'wb') as file_zeta:
    pickle.dump(testyscaltpred, file_zeta)

with open('/Users/bradleygay/code/trainyscaltpred', 'rb') as file_theta:
    trainyscaltpred=pickle.load(file_theta)
with open('/Users/bradleygay/code/validyscaltpred', 'rb') as file_alpha:
    validyscaltpred=pickle.load(file_alpha)
with open('/Users/bradleygay/code/testyscaltpred', 'rb') as file_zeta:
    testyscaltpred=pickle.load(file_zeta)

In [None]:
trainyscco2pred = co2_model.predict(trainXscco2ref)
validyscco2pred = co2_model.predict(validXscco2ref)
testyscco2pred = co2_model.predict(testXscco2ref)

In [None]:
with open('trainyscco2pred', 'wb') as file_a:
    pickle.dump(trainyscco2pred, file_a)
with open('validyscco2pred', 'wb') as file_b:
    pickle.dump(validyscco2pred, file_b)
with open('testyscco2pred', 'wb') as file_c:
    pickle.dump(testyscco2pred, file_c)

with open('/Users/bradleygay/code/trainyscco2pred', 'rb') as file_a:
    trainyscco2pred=pickle.load(file_a)
with open('/Users/bradleygay/code/validyscco2pred', 'rb') as file_b:
    validyscco2pred=pickle.load(file_b)
with open('/Users/bradleygay/code/testyscco2pred', 'rb') as file_c:
    testyscco2pred=pickle.load(file_c)

In [None]:
trainyscch4pred = ch4_model.predict(trainXscch4ref)
validyscch4pred = ch4_model.predict(validXscch4ref)
testyscch4pred = ch4_model.predict(testXscch4ref)

In [None]:
with open('trainyscch4pred', 'wb') as file_i:
    pickle.dump(trainyscch4pred, file_i)
with open('validyscch4pred', 'wb') as file_ii:
    pickle.dump(validyscch4pred, file_ii)
with open('testyscch4pred', 'wb') as file_iii:
    pickle.dump(testyscch4pred, file_iii)

with open('/Users/bradleygay/code/trainyscch4pred', 'rb') as file_i:
    trainyscch4pred=pickle.load(file_i)
with open('/Users/bradleygay/code/validyscch4pred', 'rb') as file_ii:
    validyscch4pred=pickle.load(file_ii)
with open('/Users/bradleygay/code/testyscch4pred', 'rb') as file_iii:
    testyscch4pred=pickle.load(file_iii)

In [None]:
#model2 == ALT (history2)
#model3b == CH4 (history5)
#model4 == CO2 (history4)

### Archive

In [None]:
#altlist=[]
#altlist=np.append(altlist,p)
#altlist=np.append(altlist,pp)
altlist=np.append(altlist,ppp)

In [None]:
plt.plot(yscaleralt.inverse_transform(altlist.reshape(-1,1)))

In [None]:
plt.plot(yscaleralt.inverse_transform(p.reshape(1432318,1)))

In [None]:
data_val = X_scaler.fit_transform(df.tail(48))
   val_rescaled = data_val.reshape(1, data_val.shape[0], data_val.shape[1])
 pred = lstm_model.predict(val_rescaled)
 pred_Inverse = Y_scaler.inverse_transform(pred)
 pred_Inverse 

In [None]:
type(p.reshape(1432318, 1))

In [None]:
type(testXscaltref.reshape((testXscaltref.shape[0], testXscaltref.shape[2])))

In [None]:
# make a prediction
yhat = model2.predict(testXscalt)
test_X = testXscalt.reshape((test_X.shape[0], test_X.shape[2]))
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
trainXaltdf=pd.DataFrame(trainXalt).to_numpy().reshape(1432318, 1, 273)

In [None]:
#y_pred = model2.predict(trainXaltdfres)
y_pred_scaled = model2.predict(testXscaltref)

In [None]:
y_pred.shape

In [None]:
y_pred_scaled.shape

In [None]:
testXalt.shape

In [None]:
testyalt.iloc[:,0]

In [None]:
testyscaltpd=pd.DataFrame(testyscalt)
testyscaltpd.index=testyalt.iloc[:,0].index
testyscaltpd.index=pd.to_datetime(testyscaltpd.index, format='%Y')
testyscaltpd.index.name = None

In [None]:
#testyscaltpd.to_numpy().reshape(215137,1,1)

In [None]:
testyscaltpd.columns=[testyalt.iloc[:,-1].name]

In [None]:
testyscaltpd

In [None]:
validyscaltpd=pd.DataFrame(validyscalt)
validyscaltpd.index=validyalt.iloc[:,0].index
validyscaltpd.index=pd.to_datetime(validyscaltpd.index, format='%Y')
validyscaltpd.index.name = None

In [None]:
#validyscaltpd.to_numpy().reshape(793782,1,1)

In [None]:
validyscaltpd.columns=[validyalt.iloc[:,-1].name]

In [None]:
validyscaltpd

In [None]:
trainyscaltpd=pd.DataFrame(trainyscalt)
trainyscaltpd.index=trainyalt.iloc[:,0].index
trainyscaltpd.index=pd.to_datetime(trainyscaltpd.index, format='%Y')
trainyscaltpd.index.name = None

In [None]:
trainyscaltpd.columns=[trainyalt.iloc[:,-1].name]

In [None]:
trainyscaltpd

In [None]:
plt.plot(trainyscaltpd)
plt.plot(validyscaltpd)
plt.plot(testyscaltpd)

In [None]:
plt.plot(trainyalt)
plt.plot(validyalt)
plt.plot(testyalt)

In [None]:
#trainXalt

In [None]:
#testyscaltpd.to_numpy().reshape(215137,1,1)
#testyscalt.reshape(215137,1,1)
t=trainXalt.iloc[:,-1].resample('Y').mean()
v=validXalt.iloc[:,-1].resample('Y').mean()
r=testXalt.iloc[:,-1].resample('Y').mean()#.reshape(215137,1, 273)

In [None]:
trainXalt.iloc[:,-183]

In [None]:
t2=trainXalt.iloc[:,-92].resample('Y').mean()
v2=validXalt.iloc[:,-92].resample('Y').mean()
r2=testXalt.iloc[:,-92].resample('Y').mean()#.reshape(215137,1, 273)

In [None]:
t3=trainXalt.iloc[:,-183].resample('Y').mean()
v3=validXalt.iloc[:,-183].resample('Y').mean()
r3=testXalt.iloc[:,-183].resample('Y').mean()#.reshape(215137,1, 273)

In [None]:
yup=[]
yup=np.append(yup,t.values)
yup=np.append(yup,v.values)
yup=np.append(yup,r.values)

In [None]:
yup2=[]
yup2=np.append(yup2,t2.values)
yup2=np.append(yup2,v2.values)
yup2=np.append(yup2,r2.values)

In [None]:
yup3=[]
yup3=np.append(yup3,t3.values)
yup3=np.append(yup3,v3.values)
yup3=np.append(yup3,r3.values)

In [None]:
plt.plot(reframed_alt.iloc[:,-1].resample('Y').mean().values)

In [None]:
plt.plot(df['ALT'].resample('Y').mean().values)
#plt.plot(reframed_alt.iloc[:,-1].resample('Y').mean().values)
plt.plot(yup, linestyle='dotted')
plt.plot(yup2, linestyle='dotted')
plt.plot(yup3, linestyle='dotted')
plt.show()

In [None]:
plt.plot(df['ALT'].resample('Y').mean().values)

In [None]:
plt.plot(yup, linestyle='dotted')
plt.plot(yup2, linestyle='dotted')
plt.plot(yup3, linestyle='dotted')

In [None]:
#trainXaltdfres=pd.DataFrame(trainXalt).to_numpy().reshape(1432318, 1, 273)

### ALT

In [None]:
trainXscalt.shape

In [None]:
trainXscaltdfres=pd.DataFrame(trainXscalt).to_numpy().reshape(1432318, 1, 273)

In [None]:
trainXscaltdfres.shape

In [None]:
#p=model2.predict(trainXaltdfres)

In [None]:
p=model2.predict(trainXscaltdfres)

In [None]:
p.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(p.reshape(1432318,1))
plt.show()

In [None]:
validXscalt.shape

In [None]:
validXscaltdfres=pd.DataFrame(validXscalt).to_numpy().reshape(793782, 1, 273)

In [None]:
validXscaltdfres.shape

In [None]:
pp=model2.predict(validXscaltdfres)

In [None]:
pp.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(pp.reshape(793782,1))
plt.show()

In [None]:
testXscalt.shape

In [None]:
testXscaltdfres=pd.DataFrame(testXscalt).to_numpy().reshape(215137, 1, 273)

In [None]:
testXscaltdfres.shape

In [None]:
ppp=model2.predict(testXscaltdfres)

In [None]:
ppp.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(ppp.reshape(215137,1))
plt.show()

In [None]:
arr=[]
arr=np.append(arr,p.reshape(1432318,))
arr=np.append(arr,pp.reshape(793782,))
arr=np.append(arr,ppp.reshape(215137,))

In [None]:
plt.plot(arr)

In [None]:
arr2=[]
arr2=np.append(arr2,trainyalt.values.reshape(1432318,))
arr2=np.append(arr2,validyalt.values.reshape(793782,))
arr2=np.append(arr2,testyalt.values.reshape(215137,))

In [None]:
plt.plot(arr2)

In [None]:
plt.plot(yscaleralt.inverse_transform(arr.reshape(2441237,1)))

In [None]:
2441237-215137
#testXscalt.shape

In [None]:
arr2.shape

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(arr2.reshape(2441237,1), color='dodgerblue', linestyle='solid', label='Thaw Depth Observations, ALT (cm)');
#ax2=ax.twinx();
lns2=ax.plot(yscaleralt.inverse_transform(arr.reshape(2441237,1)), color='tomato', alpha=0.5, linestyle='solid', label='Thaw Depth Predictions, ALT (cm)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
#ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Active Layer Thickness (cm)', labelpad=12, fontsize=10);
#ax.tick_params(axis='y', labelcolor='springgreen')
#ax2.set_ylabel('Validation MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10)
#ax2.tick_params(axis='y', labelcolor='yellowgreen')
ax.tick_params(left=False)  # remove the ticks
#ax2.tick_params(right=False, labelright=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, ALT Observations v. Predictions | Alaska [1969-2022] \n Number of ALT Samples/Replicates: 2.441M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
plt.axis([0, 2441237, 0, 300])
#plt.axis([2226100, 2441237, 0, 200])
plt.tight_layout()
plt.savefig('ALT_ObsVPred_1969-2022_071323.svg', dpi=1000)
plt.savefig('ALT_ObsVPred_1969-2022_071323.png', dpi=1000)

### CO2

In [None]:
trainXscco2.shape

In [None]:
trainXscco2dfres=pd.DataFrame(trainXscco2).to_numpy().reshape(1432318, 1, 273)

In [None]:
trainXscco2dfres.shape

In [None]:
#p=model2.predict(trainXaltdfres)

In [None]:
q=model4.predict(trainXscco2dfres)

In [None]:
q.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(q.reshape(1432318,1))
plt.show()

In [None]:
validXscco2.shape

In [None]:
validXscco2dfres=pd.DataFrame(validXscco2).to_numpy().reshape(793782, 1, 273)

In [None]:
validXscco2dfres.shape

In [None]:
qq=model4.predict(validXscco2dfres)

In [None]:
qq.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(qq.reshape(793782,1))
plt.show()

In [None]:
testXscco2.shape

In [None]:
testXscco2dfres=pd.DataFrame(testXscco2).to_numpy().reshape(215137, 1, 273)

In [None]:
testXscco2dfres.shape

In [None]:
qqq=model4.predict(testXscco2dfres)

In [None]:
qqq.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(qqq.reshape(215137,1))
plt.show()

In [None]:
arr3=[]
arr3=np.append(arr3,q.reshape(1432318,))
arr3=np.append(arr3,qq.reshape(793782,))
arr3=np.append(arr3,qqq.reshape(215137,))

In [None]:
plt.plot(arr3)

In [None]:
arr4=[]
arr4=np.append(arr4,trainyco2.values.reshape(1432318,))
arr4=np.append(arr4,validyco2.values.reshape(793782,))
arr4=np.append(arr4,testyco2.values.reshape(215137,))

In [None]:
plt.plot(arr4)

In [None]:
plt.plot(yscalerco2.inverse_transform(arr3.reshape(2441237,1)))

In [None]:
#reframed_co2.iloc[:,-1].name
#('CO2_1_2_1', 't')
#reframed_co2.iloc[:,-1]['2006':'2019']
#reframed_co2.shape
#2441237-1965628
#78115:2043743
plt.plot(reframed_co2.iloc[78115:2043743,-1].values)#['2005':'2019']
#reframed_co2.iloc[:,-1][:'2019']

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(arr4.reshape(2441237,1), color='indigo', linestyle='solid', label='Flux Observations, CO2 (µmolCO2m-2s-1)');
ax2=ax.twinx();
lns2=ax2.plot(yscalerco2.inverse_transform(arr3.reshape(2441237,1)), alpha=0.5, color='lime', linestyle='solid', label='Flux Predictions, CO2 (µmolCO2m-2s-1)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax.legend(lns, labs, loc='upper right', fontsize=12);

ax.grid(linewidth=0.3);
#ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Carbon Dioxide Flux (µmolCO2m-2s-1)', labelpad=12, fontsize=10);
#ax.tick_params(axis='y', labelcolor='springgreen')
#ax2.set_ylabel('Validation MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10)
#ax2.tick_params(axis='y', labelcolor='yellowgreen')
ax.tick_params(left=False, labelright=False)  # remove the ticks
ax2.tick_params(right=False, labelright=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, CO2 Flux Observations v. Predictions | Alaska [2006-2019] \n Number of CO2 Flux \
Samples/Replicates: 1.966M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
#plt.axis([78115, 2043743, 0, 800])
#plt.axis([2226100, 2441237, 0, 200])
plt.tight_layout()
plt.savefig('CO2_ObsVPred_2006-2019_071323.svg', dpi=1000)
plt.savefig('CO2_ObsVPred_2006-2019_071323.png', dpi=1000)

### CH4

In [None]:
trainXscch4.shape

In [None]:
trainXscch4dfres=pd.DataFrame(trainXscch4).to_numpy().reshape(1432318, 1, 273)

In [None]:
trainXscch4dfres.shape

In [None]:
#p=model2.predict(trainXaltdfres)

In [None]:
o=model3b.predict(trainXscch4dfres)

In [None]:
o.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(o.reshape(1432318,1))
plt.show()

In [None]:
validXscch4.shape

In [None]:
validXscch4dfres=pd.DataFrame(validXscch4).to_numpy().reshape(793782, 1, 273)

In [None]:
validXscch4dfres.shape

In [None]:
oo=model3b.predict(validXscch4dfres)

In [None]:
oo.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(oo.reshape(793782,1))
plt.show()

In [None]:
testXscch4.shape

In [None]:
testXscch4dfres=pd.DataFrame(testXscch4).to_numpy().reshape(215137, 1, 273)

In [None]:
testXscch4dfres.shape

In [None]:
ooo=model3b.predict(testXscch4dfres)

In [None]:
ooo.shape

In [None]:
# plt.plot(trainXalt.to_numpy().reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXalt.to_numpy().reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXalt.to_numpy().reshape(215137, 1, 273)[:,-1,-1])
# plt.plot(trainXscalt.reshape(1432318, 1, 273)[:,-1,-1])
# plt.plot(validXscalt.reshape(793782, 1, 273)[:,-1,-1])
# plt.plot(testXscalt.reshape(215137, 1, 273)[:,-1,-1])

In [None]:
plt.plot(ooo.reshape(215137,1))
plt.show()

In [None]:
arr5=[]
arr5=np.append(arr5,o.reshape(1432318,))
arr5=np.append(arr5,oo.reshape(793782,))
arr5=np.append(arr5,ooo.reshape(215137,))

In [None]:
arr5.shape

In [None]:
plt.plot(yscalerch4.inverse_transform(arr5.reshape(2441237,1)))

In [None]:
arr6=[]
arr6=np.append(arr6,trainych4.values.reshape(1432318,))
arr6=np.append(arr6,validych4.values.reshape(793782,))
arr6=np.append(arr6,testych4.values.reshape(215137,))

In [None]:
plt.plot(arr6)

In [None]:
plt.plot(yscalerch4.inverse_transform(arr5.reshape(2441237,1)))

In [None]:
#reframed_ch4.iloc[:,-1].name
#('CH4_1_1_2', 't')
#reframed_ch4.iloc[:,-1]['2011':'2021']#2.083M
#reframed_ch4.iloc[:,-1][:'2021']
#reframed_ch4.iloc[:,-1][:'2021']
#reframed_ch4.iloc[:304966,-1]
reframed_ch4.iloc[304966:2387849,-1]

In [None]:
#arr6.reshape(2441237,1)

In [None]:
fig,ax=plt.subplots(figsize=(10,6), dpi=1000);
lns1=ax.plot(arr6.reshape(2441237,1), color='midnightblue', linestyle='solid', label='Flux Observations, CH4 (nmolCO2m-2s-1)');
ax2=ax.twinx();
lns2=ax2.plot(yscalerch4.inverse_transform(arr5.reshape(2441237,1)), color='magenta', alpha=0.5, linestyle='solid', label='Flux Predictions, CH4 (nmolCO2m-2s-1)');

lns = lns1+lns2#+lns3+lns4;
labs = [l.get_label() for l in lns];
ax2.legend(lns, labs, loc='best', fontsize=12);

ax.grid(linewidth=0.3);
#ax2.grid(linewidth=0.3);
ax.set_xlabel('Full Iterations (epochs)', labelpad=12, fontsize=10);
ax.set_ylabel('Methane Flux (nmolCO2m-2s-1)', labelpad=12, fontsize=10);
#ax.tick_params(axis='y', labelcolor='springgreen')
#ax2.set_ylabel('Validation MSE, Scaled CO2 Flux (µolCO2m-2s-1)', labelpad=12, fontsize=10)
#ax2.tick_params(axis='y', labelcolor='yellowgreen')
ax.tick_params(left=False)  # remove the ticks
ax2.tick_params(right=False, labelright=False)  # remove the ticks
plt.title('GeoCryoAI Modeling, CH4 Flux Observations v. Predictions | Alaska [2011-2021] \n Number of CH4 Flux Samples/Replicates: 2.083M', pad=15, fontsize=14);
ax.grid(linewidth=0.3);
#plt.axis([304966, 2387849, 0, 2060])
#plt.axis([0, 2441237, 0, 2060])
plt.tight_layout()
plt.savefig('CH4_ObsVPred_2006-2019_071323.svg', dpi=1000)
plt.savefig('CH4_ObsVPred_2006-2019_071323.png', dpi=1000)

### Archive

In [None]:
validXaltdfres=pd.DataFrame(validXalt).to_numpy().reshape(793782, 1, 273)

In [None]:
pp=model2.predict(validXaltdfres)

In [None]:
plt.plot(pp.reshape(793782,1))
plt.show()

In [None]:
testXaltdfres=pd.DataFrame(testXalt).to_numpy().reshape(215137, 1, 273)

In [None]:
ppp=model2.predict(testXaltdfres)

In [None]:
plt.plot(ppp.reshape(215137,1))
plt.show()

In [None]:
plt.plot(yscaleralt.inverse_transform(p.reshape(1432318,1)))

In [None]:
plt.plot(yscaleralt.inverse_transform(pp.reshape(793782,1)))

In [None]:
plt.plot(yscaleralt.inverse_transform(ppp.reshape(215137,1)))

In [None]:
#pd.DataFrame(yscaleralt.inverse_transform(p.reshape(1432318,1)))
plt.plot(trainyalt.values)
plt.plot(yscaleralt.inverse_transform(p.reshape(1432318,1)))

In [None]:
#pd.DataFrame(yscaleralt.inverse_transform(p.reshape(1432318,1)))
plt.plot(validyalt.values)
plt.plot(yscaleralt.inverse_transform(pp.reshape(793782,1)))

In [None]:
plt.plot(p.reshape(1432318,1))
plt.plot(pp.reshape(793782,1))
plt.plot(ppp.reshape(215137,1))
plt.show()

In [None]:
newp=p.reshape(1432318,1)
newpp=pp.reshape(793782,1)
newppp=ppp.reshape(215137,1)

In [None]:
# invert predictions
sc1=StandardScaler().fit(newp)
newTrain=sc1.inverse_transform(newp)
sc2=StandardScaler().fit(newpp)
newValid = sc2.inverse_transform(newpp)
sc3=StandardScaler().fit(newppp)
newTest = sc3.inverse_transform(newppp)

In [None]:
newTrain.shape == newp.shape, newValid.shape == newpp.shape, newTest.shape == newppp.shape

In [None]:
trainXaltdfres.shape, newp.shape, newTrain.shape

In [None]:
testyalt.to_numpy().reshape(215137,)

In [None]:
# calculate root mean squared error
trainScore = np.sqrt(keras.losses.mean_squared_error(trainyalt.to_numpy().reshape(1432318,), newp[:,0]))
print('Train Score: %.6f RMSE' % (trainScore))

In [None]:
validScore = np.sqrt(keras.losses.mean_squared_error(validyalt.to_numpy().reshape(793782,), newpp[:,0]))
print('Valid Score: %.6f RMSE' % (validScore))

In [None]:
testScore = np.sqrt(keras.losses.mean_squared_error(testyalt.to_numpy().reshape(215137,), newppp[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

In [None]:
testScore2 = np.sqrt(keras.losses.mean_squared_error(newppp[0], newTest[:,0]))
print('Test Score: %.6f RMSE' % (testScore2))

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
ln1=ax.plot(y_train, color='royalblue', linestyle='solid', label='Observed ALT')
ln2=ax.plot(y_test, color='springgreen', linestyle='dashed', label='Tested ALT')
ln3=ax.plot(y_valid, color='magenta', linestyle='dotted', label='Validated ALT')
ax2=ax.twinx();
ln4=ax2.plot(testPredict, color='yellow', linestyle='dotted', label='Test_Predicted ALT')

lines = ln1 + ln2 + ln3 + ln4
labs = [line.get_label() for line in lines];
ax2.legend(lines, labs, loc='best')

ax.grid(linewidth=0.3);
ax.set_xlabel('Epochs', labelpad=6, fontsize=9);
ax.set_ylabel('Loss', labelpad=6, fontsize=9)
ax2.set_ylabel('Forecasted ALT')
plt.title('LSTM+CNN+VAE Model (GeoCryoAI): \n Observed v. Forecasted Active Layer Thickness (cm)')
plt.ylabel('Scaled Active Layer Thickness (cm)')
plt.xlabel('Epoch')
plt.axis([0, 12000, -0.1, 1])
#plt.legend(loc='best')
plt.show()



# fig,ax=plt.subplots(figsize=(10,5));
# lns1=ax.plot(history.history['loss'], color='dodgerblue', label='Loss, Active Layer Thickness (cm)');
# lns2=ax.plot(history.history['root_mean_squared_error'], color='dodgerblue', linestyle='dotted', label='RMSE, Active Layer Thickness (cm)');
# ax2=ax.twinx();
# lns3=ax2.plot(history.history['val_loss'], color='gold', label='Validation Loss, Active Layer Thickness (cm)');
# lns4=ax2.plot(history.history['val_root_mean_squared_error'], color='gold', linestyle='dotted', label='Validation RMSE, Active Layer Thickness (cm)');
          
# lns = lns1+lns2+lns3+lns4;
# labs = [l.get_label() for l in lns];
# ax2.legend(lns, labs, loc='best', fontsize=8);

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=6, fontsize=9);
# #ax.set_ylabel('Loss', labelpad=6, fontsize=9)
# #ax2.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=6, fontsize=9)
# plt.title('Number of Samples/Replicates: 95653', pad=12, fontsize=8, fontweight='ultralight');
# plt.suptitle('Cost Function and Validation Loss from Thaw Depth Modeling in LSTM-AE Framework, Alaska [1969-2022]', fontsize=11);
# plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('ALTstats_LSTM-AEmetrics_1969-2022.png', dpi=1000)


In [None]:
# Get the predicted values
yup_pred_scaled = model2.predict(testXscaltref)

# Unscale the predicted values
yup_pred = yscaleralt.inverse_transform(yup_pred_scaled.reshape(215137,1))
yup_test_unscaled = yscaleralt.inverse_transform(testyscaltref.reshape(-1, 1))


In [None]:
plt.plot(yup_pred_scaled.reshape(215137,1))

In [None]:
plt.plot(yup_pred)

In [None]:
plt.plot(yup_test_unscaled)

In [None]:
plt.plot(trainyalt.values)

In [None]:
plt.plot(y_testco2_reframed.reshape(215137,1))
plt.plot(predict.reshape(215137,1))
#plt.axis([0, 130000, -2, 5])
plt.show()

In [None]:
train, test = X[0:-144], X[-144:]
# walk-forward validation
history = [x for x in train]

In [None]:
from sklearn.metrics import accuracy_score
print("Test accuracy for the unscaled ALT data")
print(f"{accuracy_score(testyaltdfres, y_pred):.2%}\n")

In [None]:
testyaltdfres=pd.DataFrame(testyalt).to_numpy().reshape(215137, 1, 1)

In [None]:
from sklearn.metrics import accuracy_score
print("Test accuracy for the unscaled ALT data")
print(f"{accuracy_score(testyaltdfres, y_pred):.2%}\n")
print("Test accuracy for the standardized ALT data")
print(f"{accuracy_score(testyscalt, y_pred_scaled):.2%}\n")

In [None]:
trainXaltdfres=pd.DataFrame(trainXalt).to_numpy().reshape(1432318, 1, 273)

In [None]:
y_pred = model2.predict(trainXaltdfres)
y_pred_scaled = model2.predict(testXscalt)

In [None]:
testyaltdfres=pd.DataFrame(testyalt).to_numpy().reshape(215137, 1, 1)

In [None]:
from sklearn.metrics import accuracy_score
print("Test accuracy for the unscaled ALT data")
print(f"{accuracy_score(testyaltdfres, y_pred):.2%}\n")
print("Test accuracy for the standardized ALT data")
print(f"{accuracy_score(testyscalt, y_pred_scaled):.2%}\n")

In [None]:
Y_predicted_reframed = model2.predict(testXaltdfres, verbose = 1, use_multiprocessing = True)

In [None]:
plt.plot(testyscalt)
plt.plot(Y_predicted_reframed.reshape(215137,1))

In [None]:
Y_predicted_reframed = bayesian_best_model.predict(X_test_reframed,  batch_size = 384, verbose = 1, use_multiprocessing = True)

Y_predicted_scaled = Y_predicted_reframed.reshape(Y_predicted_reframed.shape[0], Y_predicted_reframed.shape[1])

Y_predicted = scaler_Y.inverse_transform(Y_predicted_scaled)

In [None]:
def plot_results(Y_test, Y_predicted, title = "Test Data and Predictions", index = None):
    if index is None:
        index = range(0, Y_test.shape[0])
    df_index = pd.DataFrame(data = index, index = range(0, Y_test.shape[0]))
    df_index.columns = ["user_index"]  

    shift = Y_test.shape[0] - Y_predicted.shape[0]

    fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = Y_test.shape[1], squeeze = False)
    
    for target, ax in enumerate(axes.flat):
        ax.step(df_index.values, Y_test[:,target], where = "post", label = "Testing Set", color = "blue")
        ax.step(df_index.loc[shift:, "user_index"].values, Y_predicted[:,target], where = "post", 
                label = "Predictions", color = "red")
    
    plt.suptitle(title)
    plt.legend()

In [None]:
plot_results(scaler_Y.inverse_transform(Y_test.values), Y_predicted, index = Y_test.index, 
             title = "Active Layer Thickness /n Predictions v. Test Data \n via 8-layer C1DLSTMSAE Network")

In [None]:
Y_predicted=Y_predicted.reshape(397492,)#;Y_test=Y_test.reshape(668168,)

In [None]:
def qq_plot(Y_test, Y_predicted, title = "Test Data and Predictions", index = None):
    if index is None:
        index = range(0, Y_test.shape[0])
    df_index = pd.DataFrame(data = index, index = range(0, Y_test.shape[0]))
    df_index.columns = ["user_index"]  

    shift = Y_test.shape[0] - Y_predicted.shape[0]

    fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = Y_test.shape[1], squeeze = False)
    
    for target, ax in enumerate(axes.flat):
        ax.scatter(Y_test[shift:,target], Y_predicted[:,target], label = "Predictions", color = "red", s = 5, 
                   alpha = 0.5)
        ax.scatter(Y_test[:,target], Y_test[:,target], label = "Testing Set", color = "blue", s = 5)

    plt.suptitle(title)
    plt.legend()

In [None]:
qq_plot(Y_test.values, Y_predicted_scaled, index = Y_test.index, 
        title = "Active Layer Thickness /n Predictions v. Test Data via \n 7-layer Sequential Time-Distributed C1DLSTMSAE Network")

In [None]:
Y_predicted_reframed = model.predict(X_test_reframed)
Y_predicted = Y_predicted_reframed.reshape(Y_predicted_reframed.shape[0], Y_predicted_reframed.shape[1])

In [None]:
import pandas as pd
testim=pd.read_csv(r'/Users/bradleygay/test_store_ALT_2022.csv')

In [None]:
testim.index=testim.iloc[:,0]

In [None]:
testim=testim.drop(testim.columns[0],axis=1)

In [None]:
testim.index.name = None

In [None]:
#testim.index=pd.to_datetime(testim.index, format='%Y')

In [None]:
testim=testim.sort_index()

In [None]:
testim.index = pd.to_datetime(testim.index)

In [None]:
testim

In [None]:
plt.plot(scaler_Y.inverse_transform(testim))

In [None]:
plot_results(scaler_Y.inverse_transform(Y_test.values), scaler_Y.inverse_transform(Y_predicted),
             index = Y_test.index, title = "Active Layer Thickness - Predicteed v. Test Data")

In [None]:
test_scores = model.evaluate(X_test_reframed, Y_test[backward_steps:],
                                           batch_size=hp["batch_size"], use_multiprocessing=True,)

In [None]:
def metrics_print(test_data,test_predict):
    print('Test RMSE: ', round(np.sqrt(sklearn.metrics.mean_squared_error(test_data, test_predict)), 2))
    print('Test R^2 : ', round((sklearn.metrics.r2_score(test_data, test_predict)*100), 2) ,"%")
    print('Test MAPE: ', round(sklearn.metrics.mean_absolute_percentage_error(test_data, test_predict)*100,2), '%')

In [None]:
print("##************** Linear Regression Results **************##")
metrics_print(prediction_df['Observed'], prediction_df['LR'])
print(" ")
print(" ")

print("##************** Deep Learning Results **************##")
metrics_print(prediction_df['Observed'], prediction_df['DNN'])
print(" ")
print(" ")

In [None]:
fa = plt.figure(figsize=(16,5))
plt.subplot(1,2,1)
plt.scatter(prediction_df['Observed'],prediction_df['LR'])
plt.xlabel('True Values [snow_depth]', fontsize=15)
plt.ylabel('Predictions [snow_depth]', fontsize=15)
plt.title("Linear Regression")


plt.subplot(1,2,2)
plt.scatter(prediction_df['Observed'],prediction_df['DNN'])
plt.xlabel('True Values [snow_depth]', fontsize=15)
plt.ylabel('Predictions [snow_depth]', fontsize=15)
plt.title("Deep Neural Network")

In [None]:
LR_error = prediction_df['Observed'] - prediction_df['LR']
DNN_error = prediction_df['Observed'] - prediction_df['DNN']

fa = plt.figure(figsize=(16,5))

plt.subplot(1,2,1)
LR_error.hist()
plt.xlabel('Error', fontsize=15)
plt.ylabel('Frequency', fontsize=15)
plt.title("Linear Regression")

plt.subplot(1,2,2)
DNN_error.hist()
plt.xlabel('Error', fontsize=15)
plt.ylabel('Frequency', fontsize=15)
plt.title("Deep Neural Network")

In [None]:
trainXaltdfres=pd.DataFrame(trainXalt).to_numpy().reshape(1432318, 1, 273)

In [None]:
y_pred = model2.predict(trainXaltdfres)
y_pred_scaled = model2.predict(testXscalt)

In [None]:
testyaltdfres=pd.DataFrame(testyalt).to_numpy().reshape(215137, 1, 1)

In [None]:
from sklearn.metrics import accuracy_score
print("Test accuracy for the unscaled ALT data")
print(f"{accuracy_score(testyaltdfres, y_pred):.2%}\n")
print("Test accuracy for the standardized ALT data")
print(f"{accuracy_score(testyscalt, y_pred_scaled):.2%}\n")

In [None]:
#ALT
plt.plot(yscaleralt.inverse_transform(testyscalt))

In [None]:
#CH4

In [None]:
#CO2

In [None]:
#alt_model.predict(testXscaltref, testyscaltref, verbose=1)

In [None]:
one=np.concatenate((trainyalt.resample('Y').mean(), validyalt.resample('Y').mean()), axis=0)
two=np.concatenate((one, testyalt.resample('Y').mean()), axis=0)

In [None]:
abc=pd.DataFrame(yscaler.inverse_transform(testyscaltpredres))
#yscaler.inverse_transform(testyscaltpredres)

In [None]:
plt.plot(two)

In [None]:
trainyalt.index

In [None]:
three=pd.DataFrame(trainyscaltpredinv)
#np.concatenate(trainyscaltpredinv, validyscaltpredinv)

In [None]:
three.index=trainyalt.index

In [None]:
plt.plot(three)

In [None]:
two=np.concatenate(one, validyalt.resample('Y').mean()), axis=0)

In [None]:
one=np.concatenate((trainyalt.resample('Y').mean(), validyalt.resample('Y').mean()), axis=0)

In [None]:
plt.plot(one)

In [None]:
plt.plot(trainyalt.resample('Y').mean())
plt.plot(validyalt.resample('Y').mean())
plt.plot(testyalt.resample('Y').mean())

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
#-----Visualize---------- 
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

In [None]:
#df['ALT']['2021':].shape
#1432321/2441240#0.5867186347921548
#793782/2441240#0.325155248971834
#215137/2441240#0.0881261162360112

In [None]:
# plt.plot(df.ALT['1970':'2017'][1:].to_numpy().reshape(1432317,1), trainyalt)
# plt.plot(df.ALT['2018':'2020'].values, validyalt)
# plt.plot(df.ALT['2021':][1:].to_numpy().reshape(215136,1), testyalt)
# plt.show()

In [None]:
# plt.figure(figsize=(10,6))
# plt.plot(trainyscalt, label='train')
# plt.plot(trainyscaltpredres, label='trainpred')
# plt.plot(validyscalt, label='valid')
# plt.plot(validyscaltpredres, label='validpred')
# plt.plot(testyscalt, label='test')
# plt.plot(testyscaltpredres, label='testpred')
# plt.legend()
# #plt.axis(xmin=0, xmax=250000)

In [None]:
testXscalt

In [None]:
from sklearn import metrics
metrics.r2_score(testyscalt, testyscaltpred)

In [None]:
#invyhat=np.concatenate((testyscaltpredres,testXscalt[:,-1:]),axis=1)

In [None]:
#invyhat=yscaler.inverse_transform(invyhat)[:,0]

In [None]:
#invy=np.concatenate((testyscalt.reshape((len(testyscalt), 1)),testXscalt[:,1:]),axis=1)

In [None]:
#invy=yscaler.inverse_transform(invy)[:,0]

In [None]:
#rmse = np.sqrt(mean_squared_error(invy, invyhat))
#print('Test RMSE: %.3f' % rmse)

In [None]:
# yhat = model3.predict(test_X)
# test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# # invert scaling for forecast
# inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
# inv_yhat = scaler.inverse_transform(inv_yhat)
# inv_yhat = inv_yhat[:,0]
# # invert scaling for actual
# test_y = test_y.reshape((len(test_y), 1))
# inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
# inv_y = scaler.inverse_transform(inv_y)
# inv_y = inv_y[:,0]
# # calculate RMSE
# rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
# print('Test RMSE: %.3f' % rmse)

In [None]:
trainXalt.shape, trainXscalt.shape, trainXscaltref.shape

In [None]:
trainyscaltpredres=trainyscaltpred.reshape(1432317,1)
validyscaltpredres=validyscaltpred.reshape(793782,1)
testyscaltpredres=testyscaltpred.reshape(215136,1)

In [None]:
#print(list(trainXalt.columns))

In [None]:
#ind=testyalt.index.values
#ind=trainyalt.index.values
ind=validyalt.index.values

In [None]:
plt.figure(figsize=(10,6))
plt.plot(ind,yscaler.inverse_transform(trainyscalt))
plt.plot(ind,yscaler.inverse_transform(trainyscaltpredres))
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(ind,yscaler.inverse_transform(validyscalt))
plt.plot(ind,yscaler.inverse_transform(validyscaltpredres))
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(ind,yscaler.inverse_transform(testyscalt))
plt.plot(ind,yscaler.inverse_transform(testyscaltpredres))
plt.show()

In [None]:
trainyscaltpredinv=yscaler.inverse_transform(trainyscaltpredres)
validyscaltpredinv=yscaler.inverse_transform(validyscaltpredres)

In [None]:
trainyscaltpredinv.shape, validyscaltpredinv.shape

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
print(mean_squared_error(trainyalt, trainyscaltpredinv))

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
print(mean_squared_error(train, y_train_pred))

## Exploratory Plots (Active Layer Thickness, Carbon Dioxide, Methane)

In [None]:
# fig,ax=plt.subplots(figsize=(10,7));
# #lns1=ax.plot(history2.history['loss'], color='dodgerblue', label='Loss, ALT (cm)');
# lns2=ax.plot(history2.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='RMSE, ALT (cm)');
# ax2=ax.twinx();
# #lns3=ax2.plot(history2.history['val_loss'], color='gold', label='Validation Loss, ALT (cm)');
# lns4=ax2.plot(history2.history['val_mean_squared_error'], color='gold', linestyle='solid', label='Validation RMSE, ALT (cm)');
          
# lns = lns2+lns4; #lns1+lns2+lns3+lns4;
# labs = [l.get_label() for l in lns];
# ax2.legend(lns, labs, loc='best', fontsize=8);

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=12, fontsize=10);
# ax.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=12, fontsize=10);
# #ax2.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=6, fontsize=9)
# plt.title('Number of Samples/Replicates: 95653', pad=15, fontsize=12, fontweight='ultralight');
# plt.suptitle('Cost Function and Validation Loss from Thaw Depth Modeling, GeoCryoAI Framework in Alaska [1969-2022]', fontsize=14);
# plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('ALTstats_CNNLSTMSAEmetrics_1969-2022_021323.png', dpi=1000)


In [None]:
# fig,ax=plt.subplots(figsize=(10,7));
# lns1=ax.plot(history.history['loss'], color='dodgerblue', label='Loss, Carbon Dioxide Mole Fraction (µmolCO2mol-1ms-1)');
# lns2=ax.plot(history.history['root_mean_squared_error'], color='dodgerblue', linestyle='dotted', label='RMSE, Carbon Dioxide Mole Fraction (µmolCO2mol-1m)');
# ax2=ax.twinx();
# lns3=ax2.plot(history.history['val_loss'], color='gold', label='Validation Loss, Active Layer Thickness (cm)');
# lns4=ax2.plot(history.history['val_root_mean_squared_error'], color='gold', linestyle='dotted', label='Validation RMSE, Carbon Dioxide Mole Fraction (µmolCO2mol-1m)');
          
# lns = lns1+lns2+lns3+lns4;
# labs = [l.get_label() for l in lns];
# ax2.legend(lns, labs, loc='best', fontsize=8);

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=6, fontsize=9);
# #ax.set_ylabel('Loss', labelpad=6, fon#tsize=9)
# #ax2.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=6, fontsize=9)
# plt.title('Number of Samples/Replicates: 95653', pad=12, fontsize=8, fontweight='ultralight');
# plt.suptitle('Cost Function and Validation Loss from Carbon Dioxide Mole Fraction Modeling in LSTM-AE Framework, Alaska [1969-2022]', fontsize=11);
# plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('ALTstats_LSTM-AEmetrics_1969-2022-x2.png', dpi=1000)


In [None]:
# fig,ax=plt.subplots(figsize=(10,5));
# lns1=ax.plot(history.history['loss'], color='dodgerblue', label='Loss, Methane Flux (nmolCO2m-2s)');
# lns2=ax.plot(history.history['root_mean_squared_error'], color='dodgerblue', linestyle='dotted', label='RMSE, Methane Flux (nmolCO2m-2s)');
# ax2=ax.twinx();
# lns3=ax2.plot(history.history['val_loss'], color='gold', label='Validation Loss, Active Layer Thickness (cm)');
# lns4=ax2.plot(history.history['val_root_mean_squared_error'], color='gold', linestyle='dotted', label='Validation RMSE, Methane Flux (nmolCO2m-2s)');
          
# lns = lns1+lns2+lns3+lns4;
# labs = [l.get_label() for l in lns];
# ax2.legend(lns, labs, loc='best', fontsize=8);

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=6, fontsize=9);
# #ax.set_ylabel('Loss', labelpad=6, fon#tsize=9)
# #ax2.set_ylabel('Scaled Depth to Refusal (cm)', labelpad=6, fontsize=9)
# plt.title('Number of Samples/Replicates: 120539', pad=12, fontsize=8, fontweight='ultralight');
# plt.suptitle('Cost Function and Validation Loss from Methane Flux Modeling in LSTM-AE Framework, Alaska [2015-2018]', fontsize=11);
# plt.grid(linewidth=0.3);
# #plt.show()
# #plt.savefig('ALTstats_LSTM-AEmetrics_1969-2022-x2.png', dpi=1000)


In [None]:
test=alt.loc["2022"].replace(-9999,np.nan).dropna().values

In [None]:
test=np.reshape(test, (test.shape[0], test.shape[1], 1))

In [None]:
testX=test

In [None]:
testy=np.reshape(test, (test.shape[0],))

In [None]:
#X_test.shape, y_test.shape

In [None]:
testX.shape, testy.shape

In [None]:
model.evaluate(X_testsc,y_testsc)
#Not an accurate depiction due to scaling; must invert prior to quantifying error

In [None]:
# train_Xt=np.array(trainX).reshape(1772962,1)
# train_yt=np.array(trainY).reshape(1772962,)
# valid_Xt=np.array(validX).reshape(453143,1)
# valid_yt=np.array(validY).reshape(453143,)
# test_Xt=np.array(testX).reshape(215135,1)
# test_yt=np.array(testY).reshape(215135,)

In [None]:
# lin_reg = linear_model.LinearRegression()
# # train model
# lin_reg.fit(train_Xt,train_yt)
# # predict
# y_train_pred = lin_reg.predict(train_Xt)
# y_valid_pred = lin_reg.predict(valid_Xt)
# y_test_pred = lin_reg.predict(test_Xt)
# # Plot predictions
# fig=plt.figure()
# plt.scatter(y_train_pred, train_yt, c = "blue", marker = "s", label = "Training data")
# plt.scatter(y_valid_pred, valid_yt, c = "magenta", marker = "s", label = "Validation data")
# plt.scatter(y_test_pred, test_yt, c = "lightgreen", marker = "s", label = "Testing data")
# plt.legend()
# plt.show()

In [None]:
# from sklearn.metrics import mean_squared_error
# error =np.sqrt(mean_squared_error(valid_yt, y_valid_pred))
# print(error)

In [None]:
# prediction=lin_reg.predict(valid_Xt)

In [None]:
# plt.plot(valid_yt, linestyle='dotted');
# plt.plot(prediction, linestyle='dotted');

In [None]:
plt.figure()
plt.ylabel('loss'); plt.xlabel('epoch')
plt.semilogy(history.history['loss'])

In [None]:
axes=plt.axes()
#axes.plot(pd.DataFrame(history.history)['loss'], label='Loss')
axes.plot(pd.DataFrame(history.history)['val_loss'], label='Validation Loss')
axes.legend(loc=0)
axes.set_title('Model fitting performance')

In [None]:
#1,#3
axes=plt.axes()
axes.plot(pd.DataFrame(history.history)['loss'], label='Loss')
axes.plot(pd.DataFrame(history.history)['val_loss'], label='Validation Loss')
axes.legend(loc=0)
axes.set_title('Model fitting performance')

In [None]:
sc=StandardScaler()
sc.fit_transform(alt)
newtest=alt.loc["2020":"2022":,:].values
newtest=sc.transform(np.reshape(alt.loc["2020":"2022":,:].values, (-1, 1)))
newtest=np.reshape(newtest, (newtest.shape[0],newtest.shape[1],1))

In [None]:
newtest.shape

In [None]:
preds=model.predict(newtest)

In [None]:
preds=preds.reshape(11050,1)
unspreds=sc.inverse_transform(preds)

In [None]:
plt.plot(unspreds, color = '#135485', linestyle='solid', label = "Predictions")
plt.plot(alt.loc["2020":"2022":,:].values, color = 'pink', linestyle='dotted', label = "Real Data")

In [None]:
newtest=alt.loc["2020":"2022":,:].values
testScore = np.sqrt(keras.losses.mean_squared_error(newtest[0], unspreds[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

In [None]:
sc=StandardScaler()
sc.fit_transform(alt)
#make predictions
trainPredict = model.predict(trainX)
validPredict = model.predict(validX)
#invert predictions
trainPredict=trainPredict.reshape(68801,1)
validPredict=validPredict.reshape(18902,1)
trainy=trainy.reshape(68801,)
validy=validy.reshape(18902,)
trainPredict = sc.inverse_transform(trainPredict)
trainy = sc.inverse_transform([trainy])
validPredict = sc.inverse_transform(validPredict)
validy = sc.inverse_transform([validy])

In [None]:
# calculate root mean squared error
trainScore = np.sqrt(keras.losses.mean_squared_error(trainy[0], trainPredict[:,0]))
print('Train Score: %.6f RMSE' % (trainScore))
validScore = np.sqrt(keras.losses.mean_squared_error(validy[0], validPredict[:,0]))
print('Valid Score: %.6f RMSE' % (validScore))

In [None]:
test=altsc.loc["2020":"2022"]

In [None]:
test.values

In [None]:
testPredict=model.predict(test.values.reshape(3363,1,1))
testPredict=testPredict.reshape(3363,1)
testy=test.values.reshape(3363,)
testPredict=sc.inverse_transform(testPredict)
testy=sc.inverse_transform([testy])
testScore = np.sqrt(keras.losses.mean_squared_error(testy[0], testPredict[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

In [None]:
trainPredict.shape, validPredict.shape, testPredict.shape

In [None]:
#plt.plot(trainX.reshape(68801,1))
#plt.plot(validX.reshape(18902,1))
plt.plot(trainPredict.reshape(68801,1))
plt.plot(validPredict.reshape(18902,1))
plt.plot(testPredict.reshape(3363,1))

In [None]:
testPredict=testPredict.reshape(11050,1)

In [None]:
testScore = np.sqrt(keras.losses.mean_squared_error(testX[0], testPredict[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

In [None]:
# make predictions
trainPredict = model.predict(trainX)
validPredict = model.predict(validX)
testPredict = model.predict(testX)

In [None]:
trainPredict.shape, validPredict.shape, testPredict.shape

In [None]:
# # make predictions
# #test_Xt=np.array(test_X)
# #test_yt=np.array(test_y)
# trainPredict.shape, testPredict.shape
# trainPredict=trainPredict.reshape(68801,1)
# validPredict=validPredict.reshape(18902,1)
# testPredict=testPredict.reshape(11050,1)
# trainPredict.shape, testPredict.shape, validPredict.shape

In [None]:
# # make predictions
# #test_Xt=np.array(test_X)
# #test_yt=np.array(test_y)
# trainPredict.shape, testPredict.shape
# trainPredict=trainPredict.reshape(298498,1)
# validPredict=validPredict.reshape(144987,1)
# testPredict=testPredict.reshape(103616,1)
# trainPredict.shape, testPredict.shape, validPredict.shape

In [None]:
# make predictions
#test_Xt=np.array(test_X)
#test_yt=np.array(test_y)
trainPredict=trainPredict.reshape(68801,1)
validPredict=validPredict.reshape(18902,1)
testPredict=testPredict.reshape(11050,1)
trainPredict.shape, validPredict.shape, testPredict.shape

In [None]:
# invert prediction
sc=StandardScaler()
newTrain=sc.fit_transform(trainPredict)
newTrain=sc.inverse_transform(trainPredict)
sc=StandardScaler()
newValid = sc.fit(validPredict)
newValid = sc.inverse_transform(validPredict)
#sc=StandardScaler().fit_transform(testPredict)
newTest = testPredict

In [None]:
newTrain.shape == trainPredict.shape, newValid.shape == validPredict.shape, newTest.shape == testPredict.shape

In [None]:
trainX.shape, trainPredict.shape, newTrain.shape

In [None]:
#plt.plot(X_train)
#plt.plot(trainPredict)
#plt.plot(newTrain)

In [None]:
print("Train": 1772964*91)
print("Valid": 614894*91)
print("Test": 53388*91)

In [None]:
# calculate root mean squared error
trainScore = np.sqrt(keras.losses.mean_squared_error(newTrain[0], trainPredict[:,0]))
print('Train Score: %.6f RMSE' % (trainScore))
validScore = np.sqrt(keras.losses.mean_squared_error(newValid[0], validPredict[:,0]))
print('Valid Score: %.6f RMSE' % (validScore))
testScore = np.sqrt(keras.losses.mean_squared_error(newTest[0], testPredict[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

# Forecast

In [None]:
#X_test_reframed_sup.shape
#Xscaler.inverse_transform(X_test_reframed_sup)
#X_test_reframed_sup.reshape(
#215134*376#80890384
#80890384/94#860536.0
#Xscaler.inverse_transform(X_test_reframed_sup.reshape(860536,94))

In [None]:
#X_train_reframed_sup.shape
#1772957*376#666631832
#666631832/94#7091828.0
#Xscaler.inverse_transform(X_train_reframed_sup.reshape(7091828,94))

In [None]:
#X_valid_reframed_sup.shape
#453141*376#170381016
#170381016/94#1812564.0
#Xscaler.inverse_transform(X_valid_reframed_sup.reshape(1812564,94))

In [None]:
98102016/32/456

In [None]:
6723*32*456

In [None]:
215136*455

In [None]:
97886880-98102016

In [None]:
testXscch4ref.shape

In [None]:
#X_test_reframed.reshape(215136,1,456).shape
testXscch4refres=testXscch4ref.reshape(161749,1,456)

In [None]:
score = model2.evaluate(testXscch4ref, testyscch4ref, verbose = 1) 

In [None]:
score

In [None]:
print('Test MAE:', score[1])
print('Test MSE:', score[2])
#print('Test RMSE:', score_experimental[3])

In [None]:
X_test_reframed.shape

In [None]:
predict_experimental = model2.predict(X_test_reframed, verbose = 1)

In [None]:
predict_experimental.shape, X_test_reframed.shape

In [None]:
predict_experimental=yscaler.inverse_transform(predict_experimental.reshape(215136,1))

In [None]:
X_t_experimental=Xscaler.inverse_transform(X_test_reframed.reshape(215136,286))

In [None]:
yhat_experimental=np.concatenate((X_t_experimental,predict_experimental),axis=1)

In [None]:
#np.concatenate(X_train_reframed.reshape(7574,299))
plt.plot(yscaler.inverse_transform(y_test_reframed.reshape(215136,1)))
plt.plot(predict_experimental)
#plt.plot(yhat.reshape(423000,1))
#plt.axis([0,1450,0,2])
plt.show()

In [None]:
#y_test_reframed.shape
np.sqrt(mean_squared_error(y_test_reframed.reshape(215136,1),predict_experimental))

In [None]:
mean_absolute_percentage_error(y_test_reframed.reshape(215136,1),predict_experimental)

In [None]:
r2_score(y_test_reframed.reshape(215136,1),predict_experimental)

In [None]:
plt.plot(y_test_reframed.reshape(215136,1))
plt.plot(predict_experimental.reshape(215136,1))
#plt.axis([0, 80000, -5, 5])
plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
ln1=ax.plot(history.history['loss'], color='magenta', linestyle='solid', label='Loss (MSE)')
ln2=ax.plot(history.history['val_loss'], color='dodgerblue', linestyle='solid', label='Val Loss (MSE)')
ln3=ax.plot(history.history['mean_absolute_error'], color='springgreen', linestyle='dotted', label='MAE')
#ln4=ax.plot(history.history['mean_squared_error'], color='springgreen', linestyle='dashed', label='Seward Peninsula')
ln4=ax.plot(history.history['root_mean_squared_error'], color='springgreen', linestyle='dashed', label='RMSE')
ln5=ax.plot(history.history['val_mean_absolute_error'], color='red', linestyle='dotted', label='Val MAE')
#ln7=ax.plot(history.history['val_mean_squared_error'], color='red', linestyle='dashed', label='Seward Peninsula')
ln6=ax.plot(history.history['val_root_mean_squared_error'], color='red', linestyle='dashed', label='Val RMSE')
#ln2=ax.plot(sib.iloc[2:,7].replace(-9999,np.nan).dropna()color='springgreen', linestyle='dashed', label='Interior')
#ln3=ax.plot(sib.iloc[2:,16].replace(-9999,np.nan).dropna(), color='magenta', linestyle='dotted', label='Seward Peninsula')
#ln4=ax.plot(sib.iloc[2:,39].replace(-9999,np.nan).dropna(), color='dodgerblue', linestyle='dotted', label='Yukon-Kuskokwim Delta')
#ax2=ax.twinx();
#ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

lines = ln1 + ln2 + ln3 + ln4 + ln5 + ln6# + ln7 + ln8
labs = [line.get_label() for line in lines];
plt.legend(lines, labs, loc='lower left', fontsize=8)

ax.grid(linewidth=0.3);
ax.set_xlabel('Epochs', labelpad=10, fontsize=16);
ax.set_ylabel('Loss (cm)', labelpad=10, fontsize=16)
#ax.set(xticklabels=[])  # remove the tick labels
ax.tick_params(left=False)  # remove the ticks
#plt.ylabel('Active Layer Thickness (cm)')
plt.title('GeoCryoAI In Situ Module | Bidirectional LSTM Autoencoder Loss Function \n In Situ Thaw Depth Simulations (1969-2022)', pad=10)
#plt.xlabel('Year')
#plt.axis([0, 6, 0, 60])
#plt.legend(loc='best')
#plt.show()
plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_ALT_loss.png',dpi=1000)

In [None]:
X_test_reframed.shape
Xscaler.fit_transform(X_test_reframed.reshape(215137,285))

In [None]:
y_test_inv = Xscaler.inverse_transform(y_test_reframed_sup.reshape(860536,1))
y_train_inv = Xscaler.inverse_transform(y_train_reframed_sup.reshape(7091828,1))
y_valid_inv = Xscaler.inverse_transform(y_valid_reframed_sup.reshape(1812564,1))

In [None]:
y_test_reframed_sup.shape, y_test_inv.shape, y_valid_inv.shape, y_train_inv.shape

In [None]:
plt.plot(y_train_reframed_sup.reshape(7091828,1))
plt.plot(y_valid_reframed_sup.reshape(1812564,1))
plt.plot(y_test_reframed_sup.reshape(860536,1))
plt.show()

In [None]:
plt.plot(y_train_inv)
plt.plot(y_valid_inv)
plt.plot(y_test_inv)
plt.show()
#plt.plot(y_test_reframed_sup.reshape(860536,1))

In [None]:
prediction = model.predict(X_test_reshaped)

In [None]:
prediction.shape

In [None]:
#prediction.shape
#prediction = Xscaler.inverse_transform(prediction.reshape(215134,1))

In [None]:
y_test_reshaped.shape

In [None]:
plt.plot(scaler.inverse_transform(prediction.reshape(215137,1)))
#plt.plot(y_test_reshaped.reshape(215137,1))

In [None]:
def plot_results(, Y_predicted, title = "Test Data and Predictions", index = None):
    if index is None:
        index = range(0, Y_test.shape[0])
    df_index = pd.DataFrame(data = index, index = range(0, Y_test.shape[0]))
    df_index.columns = ["user_index"]  

    shift = Y_test.shape[0] - Y_predicted.shape[0]

    fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = Y_test.shape[1], squeeze = False)
    
    for target, ax in enumerate(axes.flat):
        ax.step(df_index.values, Y_test[:,target], where = "post", label = "Testing Set", color = "blue")
        ax.step(df_index.loc[shift:, "user_index"].values, Y_predicted[:,target], where = "post", 
                label = "Predictions", color = "red")
    
    plt.suptitle(title)
    plt.legend()

In [None]:
y_predict_reframed_sup=model.predict(X_test_reframed_sup, verbose = 1, use_multiprocessing = True)
#y_predict_reframed_sup

In [None]:
y_predict_reframed_sup.shape

In [None]:
y_predict_reframed_sup=y_predict_reframed_sup.reshape(215134,1)

In [None]:
y_predict_reframed_sup = Xscaler.inverse_transform(y_predict_reframed_sup)

In [None]:
#plt.plot(y_test.values.reshape(215137,))
plt.plot(y_predict_reframed_sup)

In [None]:
# model_scores = model.evaluate(X_test_reframed_sup, y_test_reframed_sup.iloc[backward_steps:], batch_size = None, verbose = 1, 
#                               use_multiprocessing = True)
# model_scores

In [None]:
Y_predicted_reframed = bayesian_best_model.predict(X_test_reframed,  batch_size = 384, verbose = 1, use_multiprocessing = True)

Y_predicted_scaled = Y_predicted_reframed.reshape(Y_predicted_reframed.shape[0], Y_predicted_reframed.shape[1])

Y_predicted = scaler_Y.inverse_transform(Y_predicted_scaled)

In [None]:
def plot_results(y_test_reframed_sup, y_predict_reframed_sup, title = "Test Data and Predictions", index = None):
    if index is None:
        index = range(0, y_test_reframed_sup.shape[0])
    df_index = pd.DataFrame(data = index, index = range(0, y_test_reframed_sup.shape[0]))
    df_index.columns = ["user_index"]  

    shift = y_test_reframed_sup.shape[0] - y_predict_reframed_sup.shape[0]

    fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = y_test_reframed_sup.shape[1], squeeze = False)
    
    for target, ax in enumerate(axes.flat):
        ax.step(df_index.values, y_test_reframed_sup[:,target], where = "post", label = "Testing Set", color = "blue")
        ax.step(df_index.loc[shift:, "user_index"].values, y_predict_reframed_sup[:,target], where = "post", 
                label = "Predictions", color = "red")
    
    plt.suptitle(title)
    plt.legend()

In [None]:
pd.DataFrame(y_test_reframed_sup.reshape(215134,4)).index

In [None]:
plot_results(y_test_reframed_sup, y_predict_reframed_sup, 
             title = "Active Layer Thickness /n Predictions v. Test Data \n via 8-layer C1DLSTMSAE Network")

In [None]:
Y_predicted=Y_predicted.reshape(397492,)#;Y_test=Y_test.reshape(668168,)

In [None]:
def qq_plot(Y_test, Y_predicted, title = "Test Data and Predictions", index = None):
    if index is None:
        index = range(0, Y_test.shape[0])
    df_index = pd.DataFrame(data = index, index = range(0, Y_test.shape[0]))
    df_index.columns = ["user_index"]  

    shift = Y_test.shape[0] - Y_predicted.shape[0]

    fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = Y_test.shape[1], squeeze = False)
    
    for target, ax in enumerate(axes.flat):
        ax.scatter(Y_test[shift:,target], Y_predicted[:,target], label = "Predictions", color = "red", s = 5, 
                   alpha = 0.5)
        ax.scatter(Y_test[:,target], Y_test[:,target], label = "Testing Set", color = "blue", s = 5)

    plt.suptitle(title)
    plt.legend()

In [None]:
qq_plot(Y_test.values, Y_predicted_scaled, index = Y_test.index, 
        title = "Active Layer Thickness /n Predictions v. Test Data via \n 7-layer Sequential Time-Distributed C1DLSTMSAE Network")

In [None]:
Y_predicted_reframed = model.predict(X_test_reframed)
Y_predicted = Y_predicted_reframed.reshape(Y_predicted_reframed.shape[0], Y_predicted_reframed.shape[1])

In [None]:
import pandas as pd
testim=pd.read_csv(r'/Users/bradleygay/test_store_ALT_2022.csv')

In [None]:
testim.index=testim.iloc[:,0]

In [None]:
testim=testim.drop(testim.columns[0],axis=1)

In [None]:
testim.index.name = None

In [None]:
#testim.index=pd.to_datetime(testim.index, format='%Y')

In [None]:
testim=testim.sort_index()

In [None]:
testim.index = pd.to_datetime(testim.index)

In [None]:
testim

In [None]:
plt.plot(scaler_Y.inverse_transform(testim))

In [None]:
plot_results(scaler_Y.inverse_transform(Y_test.values), scaler_Y.inverse_transform(Y_predicted),
             index = Y_test.index, title = "Active Layer Thickness - Predicteed v. Test Data")

In [None]:
test_scores = model.evaluate(X_test_reframed, Y_test[backward_steps:],
                                           batch_size=hp["batch_size"], use_multiprocessing=True,)

In [None]:
def metrics_print(test_data,test_predict):
    print('Test RMSE: ', round(np.sqrt(sklearn.metrics.mean_squared_error(test_data, test_predict)), 2))
    print('Test R^2 : ', round((sklearn.metrics.r2_score(test_data, test_predict)*100), 2) ,"%")
    print('Test MAPE: ', round(sklearn.metrics.mean_absolute_percentage_error(test_data, test_predict)*100,2), '%')

In [None]:
print("##************** Linear Regression Results **************##")
metrics_print(prediction_df['Observed'], prediction_df['LR'])
print(" ")
print(" ")

print("##************** Deep Learning Results **************##")
metrics_print(prediction_df['Observed'], prediction_df['DNN'])
print(" ")
print(" ")

In [None]:
fa = plt.figure(figsize=(16,5))
plt.subplot(1,2,1)
plt.scatter(prediction_df['Observed'],prediction_df['LR'])
plt.xlabel('True Values [snow_depth]', fontsize=15)
plt.ylabel('Predictions [snow_depth]', fontsize=15)
plt.title("Linear Regression")


plt.subplot(1,2,2)
plt.scatter(prediction_df['Observed'],prediction_df['DNN'])
plt.xlabel('True Values [snow_depth]', fontsize=15)
plt.ylabel('Predictions [snow_depth]', fontsize=15)
plt.title("Deep Neural Network")

In [None]:
LR_error = prediction_df['Observed'] - prediction_df['LR']
DNN_error = prediction_df['Observed'] - prediction_df['DNN']

fa = plt.figure(figsize=(16,5))

plt.subplot(1,2,1)
LR_error.hist()
plt.xlabel('Error', fontsize=15)
plt.ylabel('Frequency', fontsize=15)
plt.title("Linear Regression")

plt.subplot(1,2,2)
DNN_error.hist()
plt.xlabel('Error', fontsize=15)
plt.ylabel('Frequency', fontsize=15)
plt.title("Deep Neural Network")

In [None]:
network.save('DNN')

## To load model, use;
model = tf.keras.models.load_model('DNN')

In [None]:
model.evaluate(X_test_reframed, Y_test.iloc[backward_steps:], batch_size = None, verbose = 1, use_multiprocessing = True)

In [None]:
def estimate_hyperband_load(max_epochs, factor, hyperband_iterations = 1, sec_per_epoch = None):
    total_epochs = round(hyperband_iterations * max_epochs * (math.log(max_epochs, factor) ** 2))
    estimate_runtime = "Unknown, provide sec_per_epoch to compute."
    if not sec_per_epoch == None:
        estimate_runtime = str(timedelta(seconds = round(sec_per_epoch * total_epochs)))
    return total_epochs, estimate_runtime

In [None]:
estimate_hyperband_load(300, 10, sec_per_epoch = elapsed_time/len(history.history["val_loss"]))

In [None]:
hyperband_tuner = Hyperband(custom_HyperModel(),
                    hyperparameters = hp,
                    objective = "val_loss", 
                    project_name ="time_hyperband_LSTM_tuning",
                    max_epochs = 300, #maximum number of epochs to train one model
                    hyperband_iterations = 1,  #the number of times to iterate over the full Hyperband algorithm
                    factor = 10, # changed from 3 to 10
                    directory = main_dir,
                    #distribution_strategy = tf.distribute.MirroredStrategy(["/cpu:0","/cpu:1", "/cpu:2", "/cpu:3"]),
                    tuner_id = "Hyperband",
                    overwrite = True, 
                    )

In [None]:
# perform hyperparameters tuning
hyperband_tuner.search(X_train_reframed, 
                         Y_train, 
                         steps_per_epoch = None, 
                         shuffle = False, 
                         validation_split = 0.20, 
                         verbose = 1, #
                         callbacks = [early_stopping, 
                                      History(), 
                                    TerminateOnNaN()], 
                         use_multiprocessing = True, 
                        )

In [None]:
hyperband_best_hps = hyperband_tuner.get_best_hyperparameters()[0]
hyperband_best_model = hyperband_tuner.hypermodel.build(hyperband_best_hps)

In [None]:
hyperband_best_hps.values

In [None]:
hyperband_best_model.fit(X_train_reframed, Y_train[1:], 
                           epochs = 300,     
                           shuffle = False,
                           steps_per_epoch = None,
                           validation_split = 0,
                           batch_size = hyperband_best_hps["batch_size"],
                           callbacks = [EarlyStopping(monitor='loss', verbose = 0, patience = 3, min_delta = 1e-3, restore_best_weights = True), 
                                          History(), TerminateOnNaN()],
                           verbose = 1,
                           use_multiprocessing = True
                          )

In [None]:
Y_predicted_reframed = hyperband_best_model.predict(X_test_reframed)

Y_predicted = Y_predicted_reframed.reshape(Y_predicted_reframed.shape[0], Y_predicted_reframed.shape[1])

In [None]:
plot_results(scaler_Y.inverse_transform(Y_test.values), scaler_Y.inverse_transform(Y_predicted),
             index = Y_test.index, title = "Pollution - predicted vs test data")

In [None]:
hyperband_test_scores = hyperband_best_model.evaluate(X_test_reframed, Y_test[backward_steps:], batch_size=hyperband_best_hps["batch_size"], 
                                    use_multiprocessing=True,
                                 )
hyperband_test_scores

In [None]:
from sklearn.metrics import r2_score
r2_score(Y_test[backward_steps:], Y_predicted)

In [None]:
hyperband_best_model.save("best_model_with_hyperband")

In [None]:
tf.keras.models.load_model("best_model_with_hyperband")

In [None]:
################################################
################################################

In [None]:
from sklearn.model_selection import KFold
scores=[]
kFold=KFold(n_splits=5,shuffle=False)
for train_index,test_index in kFold.split(X):
    print("Train Index: ", train_index, "\n")
    print("Test Index: ", test_index)

In [None]:
legacy

In [None]:
import plotly.express as px
fig = px.parallel_coordinates(df, color="species_id",
                              dimensions=['sepal_width', 'sepal_length', 'petal_width',
                                          'petal_length'],
                              color_continuous_scale=px.colors.diverging.Tealrose,
                              color_continuous_midpoint=2)
fig.show()

In [None]:
print(list(divisorGenerator(132*8*8291)))

In [None]:
initinputs=Xtrainsc_sup.values.reshape(8291, 53, 94, 20, 1)#15,30,7129,154,1)

In [None]:
initinputs.shape

In [None]:
inputs = keras.Input(shape=(initinputs.shape[0], initinputs.shape[1],initinputs.shape[2], initinputs.shape[3], 
                            initinputs.shape[4]))

In [None]:
#keras.Input(shape=(inputs.shape[0],inputs.shape[1], inputs.shape[2],inputs.shape[3], inputs.shape[4]))#, df2.shape[5]))
inputs

In [None]:
# model.layers[0].input_shape

In [None]:
#########If data_format='channels_first' 6D tensor with shape: (samples, time, channels, rows, cols, depth)
#Input shape:
#If data_format='channels_last' 5D tensor with shape: (samples <inputs takes care of batch_size for us with None>, 
#time, rows, cols, depth, channels)

None, 382, None, 552, 4, 6, 128 </br>
timestep, width, height, number ofthe spectral band, kernel size and depth

In [None]:
from keras.layers import Masking
model = tf.keras.Sequential()
model.add(Masking(mask_value=-1, input_shape=(2, 1)))
model.add(ConvLSTM3D(filters=30, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same',
                     data_format='channels_last',
                     input_shape=(inputs.shape[1], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=16, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
                     #input_shape=(inputs.shape[0], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=16, 
                     kernel_size=(3, 3, 3), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(2,2,2))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(ConvLSTM3D(filters=8, 
                     kernel_size=(1, 1, 128), 
                     strides=(1,1,1),
                     activation='relu', 
                     return_sequences=True,
                     padding='same'))
                     #data_format='channels_last'))
#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1,1,1))))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Reshape((45, 8291, 19, 8), input_shape=(8291, 6, 11, 2, 8)))
model.add(Conv3DTranspose(8, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(16, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(16, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(Conv3DTranspose(32, (4, 4, 4), strides=(2,2,2), padding='same'))
model.add(TimeDistributed(Dense(1)))
model.add(Flatten())
# model.add(TimeDistributed(Dense(y_train[0])))
# model.add(Dense(x_train.shape[1]))
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy'])
model.summary()


In [None]:
print(model.summary)

## Look into

In [None]:
# model = tf.keras.Sequential()
# model.add(ConvLSTM1D(filters=376, 
#                      kernel_size=3, 
#                      strides=1,
#                      activation='relu', 
#                      return_sequences=True,
#                      padding='valid',
#                      data_format='channels_last',
#                      input_shape=(inputs.shape[1], inputs.shape[2], inputs.shape[3])))
# #model.add(MaxPool1D(pool_size=1))
# #model.add(layers.TimeDistributed(layers.MaxPooling1D(pool_size=2)))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(ConvLSTM1D(filters=48, 
#                      kernel_size=3, 
#                      strides=1,
#                      activation='relu', 
#                      return_sequences=True,
#                      padding='valid'))
#                      #data_format='channels_last'))
#                      #input_shape=(inputs.shape[0], inputs.shape[2], inputs.shape[3], inputs.shape[4], inputs.shape[5])))
# #model.add(layers.TimeDistributed(layers.MaxPooling3D(pool_size=2)))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(ConvLSTM1D(filters=16, 
#                      kernel_size=3, 
#                      strides=1,
#                      activation='relu', 
#                      return_sequences=True,
#                      padding='valid'))
#                      #data_format='channels_last'))
# #model.add(layers.TimeDistributed(layers.MaxPooling1D(pool_size=2)))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(ConvLSTM1D(filters=8, 
#                      kernel_size=1, 
#                      strides=1,
#                      activation='relu', 
#                      return_sequences=True,
#                      padding='valid'))
#                      #data_format='channels_last'))
# #model.add(MaxPooling3D(pool_size=(2, 2, 2)))
# #model.add(layers.TimeDistributed(layers.MaxPooling1D(pool_size=1)))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(Reshape((14183608, 1), input_shape=(1772957, 1, 8)))
# model.add(Conv1DTranspose(8, 4, strides=2, padding='valid'))
# model.add(Conv1DTranspose(16, 4, strides=2, padding='valid'))
# model.add(Conv1DTranspose(48, 4, strides=2, padding='valid'))
# model.add(Conv1DTranspose(376, 4, strides=2, padding='valid'))
# model.add(TimeDistributed(Dense(1)))
# #model.add(Flatten())
# # model.add(TimeDistributed(Dense(y_train[0])))
# # model.add(Dense(x_train.shape[1]))
# model.compile(loss='mse',#loss='categorical_crossentropy',
#               optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
#               metrics=['MeanAbsoluteError','MeanSquaredError','RootMeanSquaredError'])#metrics=['accuracy'])
# model.summary()

In [None]:
# print(model.summary())

In [None]:
#print(list(divisorGenerator(14183656)))

In [None]:
# Xtrainsc_sup_new=Xtrainsc_sup.reshape(1, 56, 31660, 450)
# ytrainsc_sup_new=ytrainsc_sup.reshape(1, 56, 31660, 5)

In [None]:
# # Fit data to model
# history = model.fit(Xtrainsc_sup, ytrainsc_sup,
#             batch_size=256,
#             epochs=10,
#             verbose=1,
#                    validation_split=0.2)

In [None]:
#Baseline model prior to training
#model.evaluate(Xtrainsc_sup,ytrainsc_sup,verbose=1)
#1: [0.9084790349006653, 0.9084790349006653, 0.9531416893005371]
#2: [0.9072189331054688, 0.9072189331054688, 0.9524804353713989]
#3: [0.9084791541099548, 0.9084791541099548, 0.9531500339508057]
#4: [0.9240716099739075, 0.9240716099739075, 0.9612789154052734]
#5: [0.8854997754096985, 0.8854997754096985, 0.9410284161567688]

In [None]:
#pd.DataFrame(y_test_reframed_sup.reshape(215134,4))
# #Naive Persistence Model
# Y_p = pd.DataFrame(y_test_reframed_sup.reshape(215134,4)).iloc[:, -1]
# np.mean(keras.losses.mean_squared_error(y_test_reframed_sup, Y_p))

In [None]:
import tensorflow.keras.backend as K
Xtr = K.constant(Xtrainsc_sup)
ytr = K.constant(ytrainsc_sup)
Xv = K.constant(Xvalidsc_sup)
yv = K.constant(yvalidsc_sup)
Xt = K.constant(Xtestsc_sup)
yt = K.constant(ytestsc_sup)

In [None]:
history = model.fit(K.constant(df.loc['1969':'2018',:]), K.constant(df.loc['1969':'2018','ALT']), 
                    epochs=20, validation_data=(K.constant(df.loc['2019':'2021',:]), 
                                                K.constant(df.loc['2019':'2021','ALT'])))

In [None]:
insitumodule=model

In [None]:
inputs

In [None]:
X_train_reframed.shape, y_train_reframed.shape

In [None]:
#batch size
#available GPU memory bytes / 4 / (size of tensors + trainable parameters)
#16000 / 4 / (12 + 1)
#~256

In [None]:
#################

In [None]:
# from tensorflow.keras.models import Sequential, model_from_json
# json_file = open('model_lstmsae_042323_insituALT.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
# loaded_model = model_from_json(loaded_model_json)
# # load weights into new model
# loaded_model.load_weights("model_lstmsae_042323_insituALT.h5")
# print("Loaded model from disk")

In [None]:
###################

In [None]:
# with open('trainHistoryDictALT', "rb") as file_pi:
#     history = pickle.load(file_pi)

In [None]:
# # early stopping implementation
# filepath="weights_bilstmsae_ALT.best_070723_experimental.hdf5"
# tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
# checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='min') #patience=200, mode='min') 
# callbacks_list = [tensorboard_cb, checkpoint, early_stop]

# history=insitumodule.fit(X_train_reframed, y_train_reframed, validation_data=(X_valid_reframed, y_valid_reframed), epochs=10, batch_size=256, verbose=1, \
#                          shuffle=False, callbacks=callbacks_list)

In [None]:
# fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
# ln1=ax.plot(history.history['loss'], color='magenta', linestyle='solid', label='Loss (Training Loss)')
# #ln2=ax.plot(history.history['val_loss'], color='dodgerblue', linestyle='solid', label='Val Loss (MSE)')
# ln2=ax.plot(history.history['mean_absolute_error'], color='red', linestyle='dashed', label='Training MAE')
# ln3=ax.plot(history.history['mean_squared_error'], color='green', linestyle='dashed', label='Training MSE')
# ln4=ax.plot(history.history['root_mean_squared_error'], color='blue', linestyle='dashed', label='Training RMSE')
# ln5=ax.plot(history.history['val_loss'], color='springgreen', linestyle='solid', label='Validation Loss')
# ln6=ax.plot(history.history['val_mean_absolute_error'], color='red', linestyle='dotted', label='Validation MAE')
# ln7=ax.plot(history.history['val_mean_squared_error'], color='green', linestyle='dotted', label='Validation MSE')
# ln8=ax.plot(history.history['val_mean_squared_error'], color='blue', linestyle='dotted', label='Validation RMSE')
# #ln2=ax.plot(sib.iloc[2:,7].replace(-9999,np.nan).dropna()color='springgreen', linestyle='dashed', label='Interior')
# #ln3=ax.plot(sib.iloc[2:,16].replace(-9999,np.nan).dropna(), color='magenta', linestyle='dotted', label='Seward Peninsula')
# #ln4=ax.plot(sib.iloc[2:,39].replace(-9999,np.nan).dropna(), color='dodgerblue', linestyle='dotted', label='Yukon-Kuskokwim Delta')
# #ax2=ax.twinx();
# #ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

# lines = ln1 + ln2 +ln3 + ln4 + ln5 + ln6 + ln7 + ln8
# labs = [line.get_label() for line in lines];
# plt.legend(lines, labs, loc='best', fontsize=8)

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=10, fontsize=16);
# ax.set_ylabel('Error (cm)', labelpad=10, fontsize=16)
# #ax.set(xticklabels=[])  # remove the tick labels
# ax.tick_params(left=False)  # remove the ticks
# #plt.ylabel('Active Layer Thickness (cm)')
# plt.title('GeoCryoAI In Situ Module | Bidirectional LSTM Autoencoder Loss Function \n In Situ Thaw Depth Simulations (1969-2022)', pad=10)
# #plt.xlabel('Year')
# #plt.axis([0, 6, 0, 60])
# #plt.legend(loc='best')
# plt.show()
# #plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_ALT_loss.png',dpi=1000)

In [None]:
X_test_reframed.shape

In [None]:
Xscaler.inverse_transform(X_test_reframed.reshape(215136,456))

In [None]:
X_train_reframed.shape

In [None]:
#X_train_reframed_sup.shape
#1772957*376#666631832
#666631832/94#7091828.0
Xscaler.inverse_transform(X_train_reframed.reshape(1432317,456))

In [None]:
X_valid_reframed.shape

In [None]:
#X_valid_reframed_sup.shape
#453141*376#170381016
#170381016/94#1812564.0
Xscaler.inverse_transform(X_valid_reframed.reshape(453144,456))

In [None]:
#y_test_reframed.shape
215136/32
.reshape(32,6723,1)

In [None]:
yscaler.inverse_transform(y_test_reframed.reshape(215136,1))

In [None]:
score_experimental = model2.evaluate(X_test_reframed, y_test_reframed, verbose = 1) 

In [None]:
y_predicted_reframed = model2.predict(X_test_reframed, verbose = 1, use_multiprocessing = True)

In [None]:
np.sqrt(mean_squared_error(y_test_reframed.reshape(215136,1), y_predicted_reframed.reshape(215136,1)))

In [None]:
print("R^2: {}".format(reg_all.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: {}".format(rmse))

In [None]:
y_predicted_reframed=y_predicted_reframed.reshape(215136,1,1)

In [None]:
y_predicted_scaled = y_predicted_reframed.reshape(y_predicted_reframed.shape[0], y_predicted_reframed.shape[1])

In [None]:
#y_predicted_reframed = yscaler.inverse_transform(y_predicted_reframed)

In [None]:
y_predicted = yscaler.inverse_transform(y_predicted_scaled)

In [None]:
y_test_reframed.shape, testy.shape, testysc.shape

In [None]:
MSE_testpred = sklearn.metrics.mean_squared_error(y_test_reframed, y_predicted_reframed)
MSE_testpred

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test_reframed, y_predicted_reframed)
cm

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = model,X = X_train_reframed,y = y_train_reframed,cv = 10,n_jobs = -1)

In [None]:
mean = accuracies.mean()
mean

In [None]:
variance = accuracies.var()
variance

In [None]:
y_test_reframed.shape, y_predicted_reframed.shape, y_predicted_scaled.shape

In [None]:
y_test_reframed_inv=yscaler.inverse_transform(y_test_reframed.reshape(215136,1))

In [None]:
# def plot_results(Y_test, Y_predicted, title = "Test Data and Predictions", index = None):
#     if index is None:
#         index = range(0, Y_test.shape[0])
#     df_index = pd.DataFrame(data = index, index = range(0, Y_test.shape[0]))
#     df_index.columns = ["user_index"]  

#     shift = Y_test.shape[0] - Y_predicted.shape[0]

#     fig, axes = plt.subplots(figsize = (9, 6), sharex = True, nrows = Y_test.shape[1], squeeze = False)
    
#     for target, ax in enumerate(axes.flat):
#         ax.step(df_index.values, Y_test[:,target], where = "post", label = "Testing Set", color = "blue")
#         ax.step(df_index.loc[shift:, "user_index"].values, Y_predicted[:,target], where = "post", 
#                 label = "Predictions", color = "red")
    
#     plt.suptitle(title)
#     plt.legend()

In [None]:
# plot_results(y_test_reframed_inv, y_predicted_scaled, index = y_test_reframed_inv, 
#              title = "GeoCryoAI Loss | Active Layer Thickness /n Predictions v. Test Data")

In [None]:
plt.plot(y_train_reframed.reshape(1432317,1))
plt.plot(y_valid_reframed.reshape(453144,1))
plt.plot(y_test_reframed_inv)

In [None]:
################################################################
#Model
################################################################

In [None]:
#plt.plot(history2.history['mean_absolute_error'])
#plt.plot(history2.history['mean_squared_error'])
#plt.plot(history2.history['val_mean_absolute_error'])
#plt.plot(history2.history['val_mean_squared_error'])

In [None]:
# fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
# ln1=ax.plot(history2.history['loss'], color='magenta', linestyle='solid', label='Loss (MSE)')
# ln2=ax.plot(history2.history['val_loss'], color='dodgerblue', linestyle='solid', label='Val Loss (MSE)')
# ln3=ax.plot(history2.history['mean_absolute_error'], color='springgreen', linestyle='dotted', label='MAE')
# #ln4=ax.plot(history.history['mean_squared_error'], color='springgreen', linestyle='dashed', label='Seward Peninsula')
# #ln4=ax.plot(history2.history['root_mean_squared_error'], color='springgreen', linestyle='dashed', label='RMSE')
# ln4=ax.plot(history2.history['val_mean_absolute_error'], color='red', linestyle='dotted', label='Val MAE')
# #ln7=ax.plot(history.history['val_mean_squared_error'], color='red', linestyle='dashed', label='Seward Peninsula')
# #ln6=ax.plot(history2.history['val_root_mean_squared_error'], color='red', linestyle='dashed', label='Val RMSE')
# #ln2=ax.plot(sib.iloc[2:,7].replace(-9999,np.nan).dropna()color='springgreen', linestyle='dashed', label='Interior')
# #ln3=ax.plot(sib.iloc[2:,16].replace(-9999,np.nan).dropna(), color='magenta', linestyle='dotted', label='Seward Peninsula')
# #ln4=ax.plot(sib.iloc[2:,39].replace(-9999,np.nan).dropna(), color='dodgerblue', linestyle='dotted', label='Yukon-Kuskokwim Delta')
# #ax2=ax.twinx();
# #ln4=ax2.plot(validPredict, color='coral', linestyle='dotted')

# lines = ln1 + ln2 + ln3 + ln4 #+ ln5 #+ ln6# + ln7 + ln8
# labs = [line.get_label() for line in lines];
# plt.legend(lines, labs, loc='lower left', fontsize=8)

# ax.grid(linewidth=0.3);
# ax.set_xlabel('Epochs', labelpad=10, fontsize=16);
# ax.set_ylabel('Loss (cm)', labelpad=10, fontsize=16)
# #ax.set(xticklabels=[])  # remove the tick labels
# ax.tick_params(left=False)  # remove the ticks
# #plt.ylabel('Active Layer Thickness (cm)')
# plt.title('GeoCryoAI In Situ Module | Bidirectional LSTM Autoencoder Loss Function \n In Situ Thaw Depth Simulations (1969-2022)', pad=10)
# #plt.xlabel('Year')
# #plt.axis([0, 6, 0, 60])
# #plt.legend(loc='best')
# plt.show()
# #plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_ALT_loss.png',dpi=1000)

In [None]:
fig,ax = plt.subplots(figsize=(10,6), dpi=1000)
#ln1=ax.plot(history2.history['mean_absolute_error'], color='magenta', linestyle='solid', label='Loss (MAE)')
#ln1=ax.plot(history2.history['mean_squared_error'], color='dodgerblue', linestyle='solid', label='Loss (MSE)')
#ln1=ax.plot(history2.history['loss'], color='springgreen', linestyle='solid', label='Loss (MSE)')
ln1=ax.plot(history2.history['val_mean_squared_error'], color='coral', linestyle='solid', label='Validation Loss (MSE)')
plt.legend(loc='best', fontsize=8)
ax.grid(linewidth=0.3);
# ax.set_xlabel('Full Iterations (Epochs)', labelpad=15, fontsize=10);
# ax.set_ylabel('Loss, MSE (cm)', labelpad=15, fontsize=10)
# #ax.set_ylabel('Validation Loss, MSE (cm)', labelpad=15, fontsize=10)
ax.set_xlabel('Full Iterations (Epochs)', labelpad=15, fontsize=10);
#ax.set_ylabel('Loss, MSE (nmolCH4m2s-1)', labelpad=15, fontsize=10)
ax.set_ylabel('Validation Loss, MSE (nmolCH4m2s-1)', labelpad=15, fontsize=10)
#ax.set(xticklabels=[])  # remove the tick labels
ax.tick_params(left=False)  # remove the ticks
#plt.ylabel('Active Layer Thickness (cm)')
# plt.title('GeoCryoAI Training Loss | In Situ Thaw Depth Simulations [1969-2022]', pad=15, fontsize=14)
# #plt.title('GeoCryoAI Validation Loss | In Situ Thaw Depth Simulations [1969-2022]', pad=15, fontsize=14)
#plt.title('GeoCryoAI Training Loss | In Situ CH4 Flux Simulations [2011-2019]', pad=15, fontsize=14)
plt.title('GeoCryoAI Validation Loss | In Situ CH4 Flux Simulations [2011-2019]', pad=15, fontsize=14)
#plt.xlabel('Year')
#plt.axis([0, 6, 0, 60])
#plt.legend(loc='best')
plt.show()
#plt.savefig('/Users/bradleygay/Downloads/bilstmae_insitu_ALT_loss.png',dpi=1000)

In [None]:
ex=history2

In [None]:
# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(history2.history) 

# save to json:  
#hist_json_file = 'historyALT_experimental.json' 
hist_json_file = 'historyCH4_experimental.json' 
#hist_json_file = 'historyCO2_experimental.json' 
with open(hist_json_file, mode='w') as f:
    hist_df.to_json(f)

# or save to csv: 
#hist_csv_file = 'historyALT_experimental.csv'
hist_csv_file = 'historyCH4_experimental.csv'
#hist_csv_file = 'historyCO2_experimental.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:
#with open('trainHistoryDictALT_experimental', 'wb') as file_pi:
with open('trainHistoryDictCH4_experimental', 'wb') as file_pi:
#with open('trainHistoryDictCO2_experimental', 'wb') as file_pi:
    pickle.dump(history2.history, file_pi)

In [None]:
# from statsmodels.tsa.stattools import adfuller
# adfuller(np.array(y_test_reframed_sup).reshape(860536,))

In [None]:
#Z=X_train_reshaped.reshape(1772957,285)

In [None]:
# #Spatial Autocorrelation

# # Use your matrix here, instead of this random one
# #Z = np.random.rand(200,150)

# # Create the matrix of weigthts 
# w = lat2W(Z.shape[0], Z.shape[1])

# # Create the pysal Moran object 
# mi = Moran(Z, w)

# # Verify Moran's I results 
# print(mi.I) 
# print(mi.p_norm)

## Continue...

In [None]:
#geocryoai(X_train)
#img_file = '/Users/bradleygay/Downloads/model_arch.jpeg'
img_file = '/Users/bradleygay/Downloads/GeoCryoAI_Arch_042123.jpeg'
tf.keras.utils.plot_model(model, to_file=img_file, show_shapes=True, show_layer_names=True, dpi=1000)

In [None]:
#like a sore thumb

In [None]:
#realsc=realsc.reshape(239,37,425,1,744)

In [None]:
model.layers[0].input_shape

In [None]:
model.layers[0].output_shape

In [None]:
inputs.shape

In [None]:
print(list(divisorGenerator(450)))

In [None]:
outinputs=initinputs.reshape(15,30,7129, 154,1)

In [None]:
#outinputs.flatten().reshape(10,45,7129,154,1)
outinputs=outinputs.reshape(15, 30, 7129, 154, 1)

In [None]:
from sklearn.model_selection import train_test_split

# Separate the test data
x, x_test, y, y_test = train_test_split(outinputs, list(range(outinputs.shape[0])), test_size=0.1, shuffle=False)

# Split the remaining data to train and validation
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, shuffle=False)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, shuffle=False)

In [None]:
X_train.shape, x_test.shape

In [None]:
x_train=x_train.reshape(10, 10, 30,7129,154,1)
x_val=x_val.reshape(3, 10, 30, 7129,154,1)
x_test=x_test.reshape(2, 30,7129,154,1)

In [None]:
# x_train=x_train.reshape(1,324,7129,154,1,1)
# x_val=x_val.reshape(27,3,7129,154,1,1)
# x_test=x_test.reshape(15,3,7129,154,1,1)

In [None]:
y_train=np.asarray(y_train).reshape(1,10)
y_val=np.asarray(y_val).reshape(1,3)
y_test=np.asarray(y_test).reshape(1,2)

In [None]:
y_train.shape

In [None]:
model.layers[0].input_shape

In [None]:
# inputs2 = keras.Input(shape=(x_train.shape[0], x_train.shape[1],x_train.shape[2], x_train.shape[3], x_train.shape[4]))
# inputs3 = keras.Input(shape=(x_val.shape[0], x_val.shape[1],x_val.shape[2], x_val.shape[3], x_val.shape[4]))
# inputs3y = keras.Input(shape=(yv[0],yv[1],yv[2]))
# inputs4 = keras.Input(shape=(x_test.shape[0], x_test.shape[1],x_test.shape[2], x_test.shape[3], x_test.shape[4]))

In [None]:
x_traint=tf.convert_to_tensor(x_train)
y_traint=tf.convert_to_tensor(y_train)
x_valt=tf.convert_to_tensor(x_val)
y_valt=tf.convert_to_tensor(y_val)
x_testt=tf.convert_to_tensor(x_test)
x_testt=tf.convert_to_tensor(x_test)

In [None]:
x_traint.shape, y_traint.shape, x_valt.shape, x_testt.shape

In [None]:
#model's first layer is looking for this shape: (None, 1, 450, 7129, 154, 1)
# x_traint.shape

In [None]:
# Fit data to model
history = model.fit(x,y,
            batch_size=256,
            epochs=10,
            verbose=1,
            validation_data=(x_valt,y_valt))

In [None]:
import rioxarray # for the extension to load
import xarray

%matplotlib inline

In [None]:
x=xarray.open_dataarray(rast)

In [None]:
#Scandinavian Bliss

In [None]:
unw_tns.plot()

# Exploratory

In [None]:
X=df.loc[:,df.columns!='ALT']; y=df.loc[:,df.columns=='ALT']

In [None]:
import shap

In [None]:
def f(X):
    return regression.predict([X[:,i] for i in range(X.shape[1])]).flatten()

In [None]:

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.1, random_state=1)
Xtrain, Xvalid, ytrain, yvalid = train_test_split(Xtrain, ytrain, test_size=0.05, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
# #CO2 (defined based on CO2 temporal coverage (5-years lag, 3-years lead)
# features=df.loc[:,df.columns != 'CO2_1_1_1']; target=df.loc[:,df.columns == 'CO2_1_1_1']
# Xtrain=features.loc['2003':'2018']; ytrain=target.loc['2003':'2018'] #use for training
# Xvalid=features.loc['2019':'2020']; yvalid=target.loc['2019':'2020'] #use for validation
# Xtest=features.loc['2020':'2021']; ytest=target.loc['2020':'2021'] #use for testing

In [None]:
# #CH4 (defined based on CH4 temporal coverage (5-years lag, 3-years lead)
# features=df.loc[:,df.columns != 'CH4_1_1_1']; target=df.loc[:,df.columns == 'CH4_1_1_1'] 
# Xtrain=features.loc['2003':'2018']; ytrain=target.loc['2003':'2018'] #use for training
# Xvalid=features.loc['2019':'2020']; yvalid=target.loc['2019':'2020'] #use for validation
# Xtest=features.loc['2019':'2021']; ytest=target.loc['2019':'2021'] #use for testing

In [None]:
#newXt=Xtrain.values.reshape(34764,51,90)

In [None]:
#Xtrain.loc[:,'CO2_1_1_1'].replace(-9999,np.nan).dropna() #2003-2018
#Xtrain.loc[:,'CH4_1_1_1'].replace(-9999,np.nan).dropna() #2003-2018, 2019-2020, 2021
#ytrain.loc['2003':'2021','ALT'].replace(-9999,np.nan).dropna() #2003-2021=62078

#df.loc['2003':'2021','ALT'].replace(-9999,np.nan).dropna() #2003-2021=78861
#78861*.8 #63089

In [None]:
#ytrain.replace(-9999,np.nan).dropna()

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.3, shuffle=False)

In [None]:
df

In [None]:
Xtrain.shape, ytrain.shape, \
Xvalid.shape, yvalid.shape, \
Xtest.shape, ytest.shape

In [None]:
Xscaler=StandardScaler()
yscaler=StandardScaler()
Xtrainsc=Xscaler.fit_transform(Xtrain)
ytrainsc=yscaler.fit_transform(ytrain)
Xvalidsc=Xscaler.transform(Xvalid)
yvalidsc=yscaler.transform(yvalid)
Xtestsc=Xscaler.transform(Xtest)
ytestsc=yscaler.transform(ytest)

In [None]:
def series_to_supervised(data, lags = 1, forecasting_steps = 1, dropna=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    for i in range(lags, 0, -1):
        cols.append(df.shift(i))
        names += [(df.columns[j], str('t-%d') %  i) for j in range(n_vars)]
    for a in range(0, forecasting_steps):
        cols.append(df.shift(-a))
        if a == 0:
            names += [(df.columns[b], str('t')) for b in range(n_vars)]
        else:
            names += [(df.columns[b], str('t+%d') %  a) for b in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg = agg.loc[:,~agg.columns.duplicated()]
    if dropna:
        agg.dropna(inplace=True)
    return agg

In [None]:
Xtrainsc_sup=series_to_supervised(Xtrainsc,lags=3, forecasting_steps=2, dropna=True)

In [None]:
ytrainsc_sup=series_to_supervised(ytrainsc,lags=3, forecasting_steps=2,dropna=True)

In [None]:
Xvalidsc_sup=series_to_supervised(Xvalidsc,lags=3, forecasting_steps=2,dropna=True)

In [None]:
yvalidsc_sup=series_to_supervised(yvalidsc,lags=3, forecasting_steps=2,dropna=True)

In [None]:
Xtestsc_sup=series_to_supervised(Xtestsc,lags=3, forecasting_steps=2,dropna=True)

In [None]:
ytestsc_sup=series_to_supervised(ytestsc,lags=3, forecasting_steps=2,dropna=True)

In [None]:
Xtrainsc_sup.shape, ytrainsc_sup.shape, \
Xvalidsc_sup.shape, yvalidsc_sup.shape, \
Xtestsc_sup.shape, ytestsc_sup.shape

In [None]:
print(list(divisorGenerator(8788460)))

In [None]:
Xtrainsc_sup.values.reshape(33164,53,5,94);

In [None]:
Xtrainsc_sup=Xtrainsc_sup.values.reshape(56,31660,450);
ytrainsc_sup=ytrainsc_sup.values.reshape(56,31660,5);
Xvalidsc_sup=Xvalidsc_sup.values.reshape(13,34857,450);
yvalidsc_sup=yvalidsc_sup.values.reshape(13,34857,5);
Xtestsc_sup=Xtestsc_sup.values.reshape(3,71711,450);
ytestsc_sup=ytestsc_sup.values.reshape(3,71711,5)

In [None]:
Xtrainsc_sup.shape, ytrainsc_sup.shape, \
Xvalidsc_sup.shape, yvalidsc_sup.shape, \
Xtestsc_sup.shape, ytestsc_sup.shape

In [None]:
backend.clear_session()

In [None]:
print(Xtrain.shape)
print(Xtrainsc.shape)
print(Xtrainsc_sup.shape)
print('///')
print(ytrain.shape)
print(ytrainsc.shape)
print(ytrainsc_sup.shape)
print('///')
print(Xvalid.shape)
print(Xvalidsc.shape)
print(Xvalidsc_sup.shape)
print('///')
print(yvalid.shape)
print(yvalidsc.shape)
print(yvalidsc_sup.shape)
print('///')
print(Xtest.shape)
print(Xtestsc.shape)
print(Xtestsc_sup.shape)
print('///')
print(ytest.shape)
print(ytestsc.shape)
print(ytestsc_sup.shape)
#df_trainsc.shape, df_Xtrainsc.shape, df_ytrainsc.shape, df_Xtrainsc_s2s.shape

In [None]:
# filepatha = Path('/Users/bradleygay/Downloads/CH4_X_trainsc.pkl')
# filepathb = Path('/Users/bradleygay/Downloads/CH4_y_trainsc.pkl')
# filepathc = Path('/Users/bradleygay/Downloads/CH4_X_testsc.pkl')
# filepathd = Path('/Users/bradleygay/Downloads/CH4_y_testsc.pkl')
# with open(filepatha, 'wb') as f:
#     pickle.dump(X_trainsc, f)
# with open(filepathb, 'wb') as f:
#     pickle.dump(y_trainsc, f)
# with open(filepathc, 'wb') as f:
#     pickle.dump(X_testsc, f)
# with open(filepathd, 'wb') as f:
#     pickle.dump(y_testsc, f)

In [None]:
# filepatha = Path('/Users/bradleygay/Downloads/CO2_X_trainsc.pkl')
# filepathb = Path('/Users/bradleygay/Downloads/CO2_y_trainsc.pkl')
# filepathc = Path('/Users/bradleygay/Downloads/CO2_X_testsc.pkl')
# filepathd = Path('/Users/bradleygay/Downloads/CO2_y_testsc.pkl')
# with open(filepatha, 'wb') as f:
#     pickle.dump(X_trainsc, f)
# with open(filepathb, 'wb') as f:
#     pickle.dump(y_trainsc, f)
# with open(filepathc, 'wb') as f:
#     pickle.dump(X_testsc, f)
# with open(filepathd, 'wb') as f:
#     pickle.dump(y_testsc, f)

In [None]:
Xtrainsc_sup.shape, type(Xtrainsc_sup), ytrainsc_sup.shape, type(ytrainsc_sup), \
df.shape, type(df)

In [None]:
#TNS
#68.6068, -149.2958
#68.6058, -149.3110
#INT
#63.8784, -149.2536
#63.8757, -149.2133
#SEW
#64.8618, -163.7002
#YKD
#61.2548, -163.2590
#61.2723, -163.2228

In [None]:
#ALT
Xtrainsc_sup.shape, ytrainsc_sup.shape, df.shape

######

#

######

# <>

# END OF CODEBASE FOR MANUSCRIPT 1

######