In [86]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
from d2d import *


filename = "/data/fast0/datasets/Rhone_data_continuous.h5"
f = h5py.File(filename, 'r')
print("Keys: %s" % f.keys())

Keys: <KeysViewHDF5 ['DAS Data', 'Discharge', 'Times']>


In [87]:
# # Put data in a data frame
# das = np.transpose([chan[750] for chan in f['DAS Data']])

# # Might want to try this later:
# # das = np.log10(np.transpose([chan[750] for chan in f['DAS Data']]))

# discharge = np.array(f['Discharge'])
# df = pd.DataFrame(data={'das':das,'discharge':discharge})

In [88]:
das_data_all = f['DAS Data'][:]
discharge = f['Discharge'][:]

df_all_chan = pd.DataFrame(das_data_all)
df_all_chan['Discharge'] = discharge
df_all_chan.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2487,2488,2489,2490,2491,2492,2493,2494,2495,Discharge
0,1737.318436,1932.321258,1788.269101,3706.977556,1831.756626,1768.674227,1908.850695,3690.341997,1804.14515,2437.436803,...,3603.932847,2066.0708,2215.66939,1881.772352,3990.883595,1584.96597,1787.281257,1862.91244,4160.545583,9.947179
1,8161.349279,8245.783723,8118.189935,528.834231,468.890511,714.71431,728.051775,158.440806,82.719811,116.949415,...,8238.452283,8204.901437,8194.671705,8228.769841,8211.066134,8229.370787,8218.436955,8210.143402,8178.032001,9.934657
2,8242.942236,8205.640252,8093.980026,510.832905,451.884623,709.510655,709.932307,153.715844,83.404382,116.112236,...,8184.500407,8221.490571,8202.429647,8194.927561,8197.675324,8188.718827,8189.658071,8225.017259,8163.918322,9.92929
3,8159.780047,8196.654224,8113.873839,489.734947,438.657688,690.418208,701.896139,158.058233,83.003254,112.787627,...,8173.860868,8156.187127,8151.113841,8235.843141,8145.360321,8192.392266,8169.275389,8129.926491,8161.652382,9.923923
4,8198.498465,8161.470921,8120.452338,521.425978,444.272741,689.710217,695.164917,160.24664,84.054029,111.125592,...,8198.827286,8234.815465,8219.987333,8140.961605,8193.51774,8157.154485,8241.648596,8229.194191,8234.212679,9.918557


In [89]:
# Plot the raw data

# fig,ax=plt.subplots(figsize=(15,5))
# ax.plot(das)
# ax.set_ylabel('DAS-measured strain rate')
# ax2=plt.twinx()
# ax2.plot(discharge,'-r')
# ax2.set_ylabel('Discharge')

In [90]:
# Split the data
column_indices = {name: i for i, name in enumerate(df_all_chan.columns)}
n = len(df_all_chan)

train_df = df_all_chan[0:int(n*0.7)]
val_df = df_all_chan[int(n*0.7):int(n*0.9)]
test_df = df_all_chan[int(n*0.9):]

In [95]:
input_columns = list(np.arange(0,2308,1))

In [91]:
# Normalize
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

In [97]:
# # Visualize the normalized data
# df_std = (df_all_chan - train_mean) / train_std
# df_std = df_std.melt(var_name='Column', value_name='Normalized')
# plt.figure(figsize=(12, 6))
# ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
# _ = ax.set_xticklabels(df.keys(), rotation=90)

# Does not like to visulaize with all the channels applied

In [121]:
single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=0,
    train_df=train_df, 
    val_df=val_df, 
    test_df=test_df,
    label_columns=['Discharge'],
    input_columns=input_columns)

In [99]:
linear = tf.keras.Sequential([
    tf.keras.layers.Dense(1)
])

lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])

dnn_model = tf.keras.models.Sequential([
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

In [116]:
MAX_EPOCHS = 20

def compile_and_fit(model, window, patience=5):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')

    model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=MAX_EPOCHS,
                      validation_data=window.val,
                      callbacks=[early_stopping])
    return history

In [111]:
history = compile_and_fit(linear, single_step_window)

val_performance={}
performance={}
val_performance['Linear'] = linear.evaluate(single_step_window.val)
performance['Linear'] = linear.evaluate(single_step_window.test, verbose=0)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


In [122]:
# inputs, labels = single_step_window.example

# predictions = np.array(linear(inputs)).flatten()
# inputs = np.array(inputs).flatten()
# labels = np.array(labels).flatten()

# plt.subplots()
# plt.plot(labels,predictions,'ok')
# plt.plot((-1,1),(-1,1))
# plt.xlabel('Actual Normalized Discharge')
# plt.ylabel('Predicted Normalized Discharge')

# plt.subplots()
# plt.plot(inputs, labels,'ok')
# # for xx, yy in single_step_window.val.take(5):
# #     plt.plot(np.array(xx[:,0,:]).flatten(),np.array(yy).flatten(),'or')
# plt.xlabel('Actual Normalized DAS')
# plt.ylabel('Normalized Discharge (Actual and Modeled)')
# weights=linear.layers[0].kernel[:,0].numpy()
# biases=linear.layers[0].bias.numpy()
# plt.plot((-1,3.0),weights[0]*np.array((-1,3.0))+biases[0])

# # Compare to linear least squares
# from scipy import stats
# slope, intercept, r_value, p_value, std_err = stats.linregress(inputs,labels)
# plt.plot((-1,3.0),slope*np.array((-1,3.0))+intercept)

In [117]:
multi_step_window = WindowGenerator(
    input_width=500, label_width=1, shift=0,
    train_df=train_df, 
    val_df=val_df, 
    test_df=test_df,
    label_columns=['Discharge'],
    input_columns=input_columns)
# multi_step_window

# Multistep Model Runs

In [118]:
history = compile_and_fit(linear, multi_step_window)

val_performance['Multistep_Linear'] = linear.evaluate(multi_step_window.val)
performance['Multistep_Linear'] = linear.evaluate(multi_step_window.test, verbose=0)
performance

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


{'Linear': [5.418895244598389, 1.952818512916565],
 'Multistep_Linear': [2.6468114852905273, 1.4173250198364258],
 'Multistep_LSTM': [2.745037078857422, 1.4381883144378662],
 'Multistep_DNN': [2.9065146446228027, 1.4924529790878296]}

In [119]:
history = compile_and_fit(lstm_model, multi_step_window)

val_performance['Multistep_LSTM'] = lstm_model.evaluate(multi_step_window.val)
performance['Multistep_LSTM'] = lstm_model.evaluate(multi_step_window.test, verbose=0)
performance

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


{'Linear': [5.418895244598389, 1.952818512916565],
 'Multistep_Linear': [2.6468114852905273, 1.4173250198364258],
 'Multistep_LSTM': [1.9776830673217773, 1.204664945602417],
 'Multistep_DNN': [2.9065146446228027, 1.4924529790878296]}

In [120]:
history = compile_and_fit(dnn_model, multi_step_window)

val_performance['Multistep_DNN'] = dnn_model.evaluate(multi_step_window.val)
performance['Multistep_DNN'] = dnn_model.evaluate(multi_step_window.test, verbose=0)
performance

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


{'Linear': [5.418895244598389, 1.952818512916565],
 'Multistep_Linear': [2.6468114852905273, 1.4173250198364258],
 'Multistep_LSTM': [1.9776830673217773, 1.204664945602417],
 'Multistep_DNN': [2.1063549518585205, 1.2446939945220947]}