## 0 - Librairies

In [1]:
import os
os.environ['KERAS_BACKEND'] = 'torch'
from numpy import mean
from numpy import std
import numpy as np
import pandas as pd
from dateutil.relativedelta import relativedelta
#choose kernel (keras-env) when run on Macbook Pro 
import keras
from keras import layers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import torch
import torch.nn
import torch.optim
import torch.profiler
import torch.utils.data
import torchvision.datasets
import torchvision.models
import torchvision.transforms as T
from datetime import datetime
import calendar
import random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import toml
import h5py
from keramss import *
import joblib
from pathlib import Path

## 1 - Config, seeding and data import

In [2]:
with open('./model_config.toml','r') as f:
        config = toml.load(f)
        seed = config['stat']['seed']
        t1 = config['data']['t1']
        t2 = config['data']['t2']
        sat = config['data']['sat']
        density_threshold = config['data']['density_threshold']
        name = config['model']['name']
        f_train, f_valid, f_test = config['model']['f_train'], config['model']['f_valid'], config['model']['f_test']
        shuffle = config['model']['shuffle']
        OHM = config['model']['OHM']
        epochs = config['model']['epochs']
        patience = config['model']['patience']
        data_path = config['paths']['data']
        saved_models_path = config['paths']['saved_models']
        log_path = config['paths']['logs']

        Path(f"{saved_models_path}/{name}").mkdir(parents=True, exist_ok=True)
        with open(f"{saved_models_path}/{name}/model_config.toml",'w') as fsave:
            toml.dump(config,fsave)

os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")
print(f"OHM={OHM}")
print(f"density_threshold={density_threshold}")

Using cuda device
OHM=True
density_threshold=0


In [3]:
dataset = MMS_Dataset(sat,data_path,t1,t2,density_threshold,OHM=OHM)

train_data, val_data, test_data = random_split(dataset, [0.8, 0.1, 0.1],generator=torch.Generator().manual_seed(seed))

batch_size = int(2**(np.round(np.log2(0.25*len(train_data)/100))))
print(f"Batch size of ~0.25% of data (rounded to closest power of 2) : {batch_size}")

Batch size of ~0.25% of data (rounded to closest power of 2) : 32768


In [4]:
def first_dnn(input_size,output_size,scale,device):
    model = keras.Sequential([
        layers.Dense(input_size, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(output_size)
    ])
    model.compile(loss='mean_squared_error',
                optimizer=keras.optimizers.Adam(0.0001),metrics=[
                    MaxError(scaler=scale,device=device),
                    MSE(scaler=scale,device=device),
                    keras.metrics.R2Score(
                    class_aggregation=None, num_regressors=0, name="r2_score", dtype=None),
                    PCC()])

    return model

In [5]:
scale = Standard_Scaler(train_data)
torch.save(scale,f'{saved_models_path}/{name}/scale.keras')
#torch.load()
X_train, y_train = scale.transform(dataset=train_data)
X_val  , y_val   = scale.transform(dataset=val_data)
X_test , y_test  = scale.transform(dataset=test_data)

scale.to(device)
X_train, y_train = X_train.to(device) , y_train.to(device)

print(device)

cuda


In [6]:
model = first_dnn(np.shape(X_train)[1]-3,3,scale,device)

#model.summary()

In [7]:
class LossAndErrorPrintingCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(
            "The average loss for epoch {} is {:7.2f} "
            "and mean absolute error is {:7.2f}.".format(
                epoch, logs["loss"], logs['r2_score'], logs['MSE'], logs['max_error']
            )
        )

In [8]:
class TorchTensorBoard(keras.callbacks.Callback):
    '''
    https://github.com/keras-team/keras/issues/19121
    '''
    def __init__(self, path):
        self._path = path
        self._writers = {}

    def writer(self, writer):
        if writer not in self._writers:
            import torch.utils.tensorboard
            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer))
        return self._writers[writer]

    def add_logs(self, writer, logs, step):
        for key, value in logs.items():
            self.writer(writer).add_scalar(key, value, step)

    def on_epoch_end(self, epoch, logs=None):
        if logs:
            dic = {}
            for k, v in logs.items():
                if not k.startswith("val_"):
                    #print('k :',k)
                    #print('v :',v)
                    if type(v) != float and len(v)==3:
                        dic.update({k+'/x':v[0]})
                        dic.update({k+'/y':v[1]})
                        dic.update({k+'/z':v[2]})
                        dic.update({k+'/global':v.square().mean().sqrt()})
                    else:
                        dic.update({k: v})
                self.add_logs("train",dic, epoch + 1)
            if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer):
                #print(self.model.optimizer.learning_rate)
                self.add_logs("train", {"learning_rate": self.model.optimizer.learning_rate.numpy()}, epoch + 1)
            dic = {}
            for k, v in logs.items():
                if k.startswith("val_"):
                    #print('k :',k)
                    #print('v :',v)
                    if type(v) != float and len(v)==3:
                        dic.update({k[4:]+'/x':v[0]})
                        dic.update({k[4:]+'/y':v[1]})
                        dic.update({k[4:]+'/z':v[2]})
                        dic.update({k[4:]+'/global':v.square().mean().sqrt()})
                    else:
                        dic.update({k[4:]: v})
                self.add_logs("val", dic, epoch + 1)


In [9]:
# checkpoint
log_dir = f'{log_path}/{name}_' + datetime.now().strftime("%Y%m%d-%H%M%S")
filepath= f'{saved_models_path}/{name}/_weights.best.keras'
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True)
earlystopping_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)

In [10]:
my_callbacks = [earlystopping_callback, checkpoint, TorchTensorBoard(log_dir)]#, LossAndErrorPrintingCallback()]
if True:
    model.fit(X_train, y_train, epochs=epochs, 
            validation_data=(X_val, y_val), verbose=2,
            callbacks=my_callbacks,batch_size=batch_size)
    model.save_weights(f'{saved_models_path}/{name}/model.weights.h5', overwrite=True)
    model.save(f'{saved_models_path}/{name}/model.keras')

Epoch 1/300

Epoch 1: val_loss improved from inf to 0.43676, saving model to /home/esevegnes/Workspace/Saved_models/hlayer_check/_weights.best.keras
358/358 - 35s - 99ms/step - MSE: 6.1809 - PCC: 0.7404 - loss: 0.6699 - max_error: 23.2060 - r2_score: 0.3296 - val_MSE: 25.8660 - val_PCC: 0.4012 - val_loss: 0.4368 - val_max_error: 8.2077 - val_r2_score: 0.5651
Epoch 2/300

Epoch 2: val_loss improved from 0.43676 to 0.33490, saving model to /home/esevegnes/Workspace/Saved_models/hlayer_check/_weights.best.keras
358/358 - 34s - 94ms/step - MSE: 4.8509 - PCC: 0.8008 - loss: 0.4176 - max_error: 21.0960 - r2_score: 0.5823 - val_MSE: 11.0107 - val_PCC: 0.8247 - val_loss: 0.3349 - val_max_error: 6.5654 - val_r2_score: 0.6656
Epoch 3/300

Epoch 3: val_loss improved from 0.33490 to 0.28500, saving model to /home/esevegnes/Workspace/Saved_models/hlayer_check/_weights.best.keras
358/358 - 33s - 93ms/step - MSE: 3.9820 - PCC: 0.8310 - loss: 0.3442 - max_error: 18.7153 - r2_score: 0.6557 - val_MSE: 4

In [None]:
if False:
    new_model = keras.models.load_model(f'{saved_models_path}/{name}/model.keras')
    model.fit(X_train, y_train, epochs=1000, 
            validation_data=(X_val, y_val), verbose=2,
            callbacks=my_callbacks,batch_size=batch_size, initial_epoch=300)
    model.save_weights(f'{saved_models_path}/{name}_XL/model.weights.h5', overwrite=True)
    model.save(f'{saved_models_path}/{name}_XL/model.keras'))   