In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.initializers import Constant
from itertools import product
from tqdm import tqdm


from Libs.load_data import ClassificationDataLoader, DataLoader, get_dataset_split
from Libs.threshold import get_labels_physic
from Libs.keras_f1score import f1_m

2023-03-13 08:15:36.841342: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-13 08:15:37.117478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-13 08:15:37.117594: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-13 08:15:38.783967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
# initialize data loader
data_loader = DataLoader(run=100, N=1000, s=0.5, t=[0.01, 0.1, 0.5, 3], d=0.2, m=1)
# get the grid
grid_X, grid_y = data_loader.get_grid()
# get params dictionary
params = data_loader.get_params()

grid_X.shape, grid_y.shape

((100, 1, 4, 1, 1, 1000), (100, 1, 4, 1, 1, 1000))

In [3]:
# reshape grid_X and grid_y
# Xs, labels =  grid_X.reshape((grid_X.shape[0]*grid_X.shape[2], grid_X.shape[-1])), grid_y.reshape((grid_X.shape[0]*grid_X.shape[2], grid_X.shape[-1]))
# Xs.shape, labels.shape

# LSTM model with multiple all theta parameters

Let's start seeing what is going to happen with training and testing the NN with all the configurations of theta parameters

In [4]:
df_train,df_val,df_test = get_dataset_split(grid_X, grid_y, None, window_size=20, overlap_size=19,
                                            label_treshold=1, split_on_run=True, shuffle_run=False, 
                                            shuffle_window=False, test_size = 0.3, val_size=0.2, 
                                            get_validation=True, random_state=42)
df_train.shape, df_val.shape, df_test.shape

((192080, 21), (82320, 21), (117600, 21))

This cell shows the amounts of class 0 and 1 for each data set

In [6]:
# number of classes
print('Training set:')
print(df_train['future_flare'].value_counts(), '\n')
pos = df_train['future_flare'].value_counts()[0]
true = df_train['future_flare'].value_counts()[1]
print('validation set:')
print(df_val['future_flare'].value_counts(), '\n')
print('Test set:')
print(df_test['future_flare'].value_counts(), '\n')

Training set:
0    124288
1     67792
Name: future_flare, dtype: int64 

validation set:
0    53781
1    28539
Name: future_flare, dtype: int64 

Test set:
0    76596
1    41004
Name: future_flare, dtype: int64 



In [7]:
X_train, y_train = df_train.iloc[:,:-1].to_numpy(), df_train.future_flare.to_numpy()
X_val, y_val = df_val.iloc[:,:-1].to_numpy(), df_val.future_flare.to_numpy()
X_test, y_test = df_test.iloc[:,:-1].to_numpy(), df_test.future_flare.to_numpy()
X = np.vstack((X_train, X_val, X_test))
y = np.hstack((y_train, y_val, y_test))
print('X ## Train:', X_train.shape, 'Val:', X_val.shape, 'Test:', X_test.shape)
print('y ## Train:', y_train.shape, 'Val:', y_val.shape, 'Test:', y_test.shape)

X ## Train: (192080, 20) Val: (82320, 20) Test: (117600, 20)
y ## Train: (192080,) Val: (82320,) Test: (117600,)


Let's construct now the model

In [8]:
initial_bias = Constant([np.log(true/pos)])
# define model
model = Sequential()
model.add(Bidirectional(LSTM(20, activation='relu'), input_shape=(X_train.shape[1], 1)))
model.add(Dense(30, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid',bias_initializer=initial_bias))
model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=[f1_m, 'accuracy'])

print(model.summary())

2023-03-13 08:18:13.383453: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-03-13 08:18:13.383909: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-13 08:18:13.384082: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (83dc2cdd3c94): /proc/driver/nvidia/version does not exist
2023-03-13 08:18:13.392995: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 40)               3520      
 l)                                                              
                                                                 
 dense (Dense)               (None, 30)                1230      
                                                                 
 dense_1 (Dense)             (None, 10)                310       
                                                                 
 dense_2 (Dense)             (None, 1)                 11        
                                                                 
Total params: 5,071
Trainable params: 5,071
Non-trainable params: 0
_________________________________________________________________
None


In [9]:
epochs = 20
batch_size = 32
# define callbacks
callbacks = [
    ModelCheckpoint(
        os.path.join("models", "LSTM_allTheta_checkpoint.h5"), save_weights_only=True, monitor="val_loss"
    ),
    EarlyStopping(monitor="val_loss", patience=3, verbose=1),
]
# fit model
model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=(X_val, y_val),
    verbose=1,
)

Epoch 1/20
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 14: early stopping


<keras.callbacks.History at 0x7f66df77d3d0>

Validation set results

In [10]:
scores = model.evaluate(X_val, y_val, verbose=0)
y_pred = np.round(model.predict(X_val), 0)

print("### Evaluation on validation set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_val)))
print("F1 score: %.2f" % (f1_score(y_pred, y_val)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_val, y_pred)
print(result)

### Evaluation on validation set ###
Accuracy: 0.99
F1 score: 0.98
[[53521   260]
 [  889 27650]]


Test results

In [11]:
scores = model.evaluate(X_test, y_test, verbose=0)
y_pred = np.round(model.predict(X_test), 0)

print("### Evaluation on test set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_test)))
print("F1 score: %.2f" % (f1_score(y_pred, y_test)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_test, y_pred)
print(result)

### Evaluation on test set ###
Accuracy: 0.99
F1 score: 0.98
[[76282   314]
 [ 1292 39712]]


# Interpolation

Same model as before, but now we are fitting only in the dataset coming from the generation with using just the extreme parameters: 

$\theta=0.01$ and $\theta=3$

and a fraction of the other dataset, coming from $\theta=0.1$ and $\theta=0.5$ as test set

In [12]:
params

{'run': 100,
 'sigma': [0.5],
 'theta': [0.01, 0.1, 0.5, 3],
 'mu': [1],
 'delta': [0.2],
 'N': 1000}

In [15]:
p = 'theta'
theta_train_list     = [0.01, 3]
theta_train_list_idx = [params[p].index(t) for t in theta_train_list]
theta_test_list      = [0.1, 0.5]
theta_test_list_idx  = [params[p].index(t) for t in theta_test_list]

# get the train and validation set, selecting the index for grid given the interpolation assuption
# notice that theta is the third parameter
df_train, df_val, _ = get_dataset_split(grid_X[:,:,theta_train_list_idx,:,:,:], 
                                        grid_y[:,:,theta_train_list_idx,:,:,:], 
                                        None, window_size=20, overlap_size=19,
                                        label_treshold=1, split_on_run=True, shuffle_run=False, 
                                        shuffle_window=False, test_size = 0.3, val_size=0.2, 
                                        get_validation=True, random_state=42)
# get the test set, selecting the index for grid given the interpolation assuption
# notice that theta is the third parameter
_, _, df_test = get_dataset_split(grid_X[:,:,theta_test_list_idx,:,:,:], 
                                  grid_y[:,:,theta_test_list_idx,:,:,:], 
                                  None, window_size=20, overlap_size=19,
                                  label_treshold=1, split_on_run=True, shuffle_run=False, 
                                  shuffle_window=False, test_size = 0.3, val_size=0.2, 
                                  get_validation=True, random_state=42)
df_train.shape, df_val.shape, df_test.shape

((96040, 21), (41160, 21), (58800, 21))

In [16]:
# number of classes
print('Training set:')
print(df_train['future_flare'].value_counts(), '\n')
pos = df_train['future_flare'].value_counts()[0]
true = df_train['future_flare'].value_counts()[1]
print('validation set:')
print(df_val['future_flare'].value_counts(), '\n')
print('Test set:')
print(df_test['future_flare'].value_counts(), '\n')

Training set:
0    67067
1    28973
Name: future_flare, dtype: int64 

validation set:
0    28312
1    12848
Name: future_flare, dtype: int64 

Test set:
0    35204
1    23596
Name: future_flare, dtype: int64 



In [17]:
X_train, y_train = df_train.iloc[:,:-1].to_numpy(), df_train.future_flare.to_numpy()
X_val, y_val = df_val.iloc[:,:-1].to_numpy(), df_val.future_flare.to_numpy()
X_test, y_test = df_test.iloc[:,:-1].to_numpy(), df_test.future_flare.to_numpy()
X = np.vstack((X_train, X_val, X_test))
y = np.hstack((y_train, y_val, y_test))
print('X ## Train:', X_train.shape, 'Val:', X_val.shape, 'Test:', X_test.shape)
print('y ## Train:', y_train.shape, 'Val:', y_val.shape, 'Test:', y_test.shape)

X ## Train: (96040, 20) Val: (41160, 20) Test: (58800, 20)
y ## Train: (96040,) Val: (41160,) Test: (58800,)


In [18]:
epochs = 20
batch_size = 32
# define callbacks
callbacks = [
    ModelCheckpoint(
        os.path.join("models", "LSTM_intrpTheta_checkpoint.h5"), save_weights_only=True, monitor="val_loss"
    ),
    EarlyStopping(monitor="val_loss", patience=3, verbose=1),
]
# fit model
model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=(X_val, y_val),
    verbose=1,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


<keras.callbacks.History at 0x7f66d40d5f10>

Validation results

In [19]:
scores = model.evaluate(X_val, y_val, verbose=0)
y_pred = np.round(model.predict(X_val), 0)

print("### Evaluation on validation set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_val)))
print("F1 score: %.2f" % (f1_score(y_pred, y_val)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_val, y_pred)
print(result)

### Evaluation on validation set ###
Accuracy: 0.99
F1 score: 0.98
[[28302    10]
 [  522 12326]]


Test results

In [20]:
scores = model.evaluate(X_test, y_test, verbose=0)
y_pred = np.round(model.predict(X_test), 0)

print("### Evaluation on test set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_test)))
print("F1 score: %.2f" % (f1_score(y_pred, y_test)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_test, y_pred)
print(result)

### Evaluation on test set ###
Accuracy: 0.99
F1 score: 0.99
[[35177    27]
 [  668 22928]]


Results are still similar to the standard case

# Extrapolation

Same model as before, but now we are fitting only in the dataset coming from the generation without using the extreme parameters: 

$\theta=0.1$ and $\theta=0.5$

In [21]:
p = 'theta'
theta_train_list     = [0.1, 0.5]
theta_train_list_idx = [params[p].index(t) for t in theta_train_list]
theta_test_list      = [0.01, 3]
theta_test_list_idx  = [params[p].index(t) for t in theta_test_list]

# get the train and validation set, selecting the index for grid given the interpolation assuption
# notice that theta is the third parameter
df_train, df_val, _ = get_dataset_split(grid_X[:,:,theta_train_list_idx,:,:,:], 
                                        grid_y[:,:,theta_train_list_idx,:,:,:], 
                                        None, window_size=20, overlap_size=19,
                                        label_treshold=1, split_on_run=True, shuffle_run=False, 
                                        shuffle_window=False, test_size = 0.3, val_size=0.2, 
                                        get_validation=True, random_state=42)
# get the test set, selecting the index for grid given the interpolation assuption
# notice that theta is the third parameter
_, _, df_test = get_dataset_split(grid_X[:,:,theta_test_list_idx,:,:,:], 
                                  grid_y[:,:,theta_test_list_idx,:,:,:], 
                                  None, window_size=20, overlap_size=19,
                                  label_treshold=1, split_on_run=True, shuffle_run=False, 
                                  shuffle_window=False, test_size = 0.3, val_size=0.2, 
                                  get_validation=True, random_state=42)
df_train.shape, df_val.shape, df_test.shape

((96040, 21), (41160, 21), (58800, 21))

In [22]:
X_train, y_train = df_train.iloc[:,:-1].to_numpy(), df_train.future_flare.to_numpy()
X_val, y_val = df_val.iloc[:,:-1].to_numpy(), df_val.future_flare.to_numpy()
X_test, y_test = df_test.iloc[:,:-1].to_numpy(), df_test.future_flare.to_numpy()
X = np.vstack((X_train, X_val, X_test))
y = np.hstack((y_train, y_val, y_test))
print('X ## Train:', X_train.shape, 'Val:', X_val.shape, 'Test:', X_test.shape)
print('y ## Train:', y_train.shape, 'Val:', y_val.shape, 'Test:', y_test.shape)

X ## Train: (96040, 20) Val: (41160, 20) Test: (58800, 20)
y ## Train: (96040,) Val: (41160,) Test: (58800,)


Same model as before

In [24]:
initial_bias = Constant([np.log(true/pos)])
# define model
model = Sequential()
model.add(Bidirectional(LSTM(20, activation='relu'), input_shape=(X_train.shape[1], 1)))
model.add(Dense(30, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid',bias_initializer=initial_bias))
model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=[f1_m, 'accuracy'])

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_1 (Bidirectio  (None, 40)               3520      
 nal)                                                            
                                                                 
 dense_3 (Dense)             (None, 30)                1230      
                                                                 
 dense_4 (Dense)             (None, 10)                310       
                                                                 
 dense_5 (Dense)             (None, 1)                 11        
                                                                 
Total params: 5,071
Trainable params: 5,071
Non-trainable params: 0
_________________________________________________________________
None


In [25]:
epochs = 20
batch_size = 32
# define callbacks
callbacks = [
    ModelCheckpoint(
        os.path.join("models", "LSTM_extrpTheta_checkpoint.h5"), save_weights_only=True, monitor="val_loss"
    ),
    EarlyStopping(monitor="val_loss", patience=3, verbose=1),
]
# fit model
model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=(X_val, y_val),
    verbose=1,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 12: early stopping


<keras.callbacks.History at 0x7f66c502f040>

Validation results

In [26]:
scores = model.evaluate(X_val, y_val, verbose=0)
y_pred = np.round(model.predict(X_val), 0)

print("### Evaluation on validation set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_val)))
print("F1 score: %.2f" % (f1_score(y_pred, y_val)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_val, y_pred)
print(result)

### Evaluation on validation set ###
Accuracy: 0.98
F1 score: 0.98
[[25074   395]
 [  348 15343]]


Test results

In [27]:
scores = model.evaluate(X_test, y_test, verbose=0)
y_pred = np.round(model.predict(X_test), 0)

print("### Evaluation on test set ###")
print("Accuracy: %.2f" % (accuracy_score(y_pred, y_test)))
print("F1 score: %.2f" % (f1_score(y_pred, y_test)))
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_test, y_pred)
print(result)

### Evaluation on test set ###
Accuracy: 0.98
F1 score: 0.96
[[40513   879]
 [  507 16901]]
