In [1]:
# Retraining NN with Elegy then do HMCon Mana
# Author: Peter Nov 5 2022
# Edited by Linnea August/September 2023

# New Requirements:
# conda install python=3.9 numpy scipy pandas matplotlib
# conda install -c anaconda cudatoolkit
# pip install tensorflow
# pip install tensorflow-io\[tensorflow\] # Seems to want specific older tf versions

# pip install elegy==0.8.5 # Because 0.8.6 has error.
# pip install --upgrade "jax[cuda12_local]==0.4.13" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
# pip install tfp-nightly tensorflow_io tensorflow

"""
Train NN.
Authored by Peter July 2023
Edited by Linnea August/September 2023

New Requirements:
conda install python=3.9 numpy scipy pandas matplotlib
conda install -c anaconda cudatoolkit
pip install tensorflow
pip install tensorflow-io\[tensorflow\] # Seems to want specific older tf versions
"""

import os
from collections import defaultdict
import numpy as np
import h5py
import matplotlib.pyplot as plt
import datetime

import keras_core as keras


import tensorflow_io as tfio
from tensorflow.data import Dataset
from tensorflow.data.experimental import AUTOTUNE

#import tensorflow as tf 
# #os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
# #os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '.10'
# #os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# import jax
# import jax.numpy as jnp
# from jax import random
# from jax import vmap
# from jax import jit
# from jax import grad
# #assert jax.default_backend() == 'gpu'

# import elegy # pip install elegy
# import optax
# import tensorflow_io as tfio # pip install tensorflow-io
# #import tensorflow as tf # Recommended not to import this with jax because will also try to grab memory.
# from tensorflow.data import Dataset # Trying not to import tf. 

2023-08-29 15:36:05.431674: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-29 15:36:05.808605: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using TensorFlow backend


In [2]:
# Create dataset object using IODataset
polarity = 'pos' #'pos'
path = '/home/linneamw/sadow_lts/personal/linneamw/research/gcr/data/2023_07_01'
f = f'{path}/{polarity}/model_collection_1AU_90deg_0deg_fixed_training.h5'
# 8 input parameters for the NN: alpha, cmf, vspoles, cpa, pwr1par, pwr2par, pwr1perr, and pwr2perr.
# features = ['alpha', 'cmf', 'cpa', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'vspoles']
with h5py.File(f, 'r') as h5:
    num_samples, num_inputs,  = h5['X_minmax'].shape
    _, num_flux,  = h5['Y_log_scaled'].shape
x = tfio.IODataset.from_hdf5(f, dataset='/X_minmax')
y = tfio.IODataset.from_hdf5(f, dataset='/Y_log_scaled')

# Split
full = Dataset.zip((x, y))
train = full.take(np.floor(num_samples *.9))#.repeat()
test = full.skip(np.floor(num_samples *.9))#.repeat()

# Batch
BATCH_SIZE = 128
train = train.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)
test = test.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)

# Some calcs
steps_per_epoch = int(num_samples * .9 / BATCH_SIZE )
validation_steps = int(num_samples * .1 / BATCH_SIZE)
print(f'Steps per epoch: {steps_per_epoch}')

#train_x = TFDatasetAdapter(train.map(lambda x,y: x))
#train_y = TFDatasetAdapter(train.map(lambda x,y: x))
#x = train.map(lambda x,y: x)
#y = train.map(lambda x,y: y)

2023-08-29 15:36:11.525145: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
2023-08-29 15:36:11.525399: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX AVX2 AVX512F FMA
2023-08-29 15:36:11.787467: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Steps per epoch: 14683


In [3]:
# Define model. 
l2=keras.regularizers.L2(l2=1e-6)
model = keras.Sequential(layers=[
   keras.layers.Input(shape=(8,)),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(32, activation='linear', kernel_regularizer=l2),
])

# add tensorboard callback
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

optimizer = keras.optimizers.Adam(learning_rate=1e-4)
model_path = f'../models/model_v1.0_{polarity}.keras'  # Must end with keras.
log_dir = f'../tensorboard_logs/fit/model_v1.0_{polarity}/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
callbacks = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10),
             keras.callbacks.EarlyStopping(monitor="val_loss", patience=20),
             keras.callbacks.ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_loss'),
             keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
            ]
model.compile(loss='mse', optimizer=optimizer)

history = model.fit(
    train,
    epochs=100,
    #steps_per_epoch=steps_per_epoch, #6 * 10000, #10000, # 10k*128 is approximate size of training set.
    validation_data=test,
    #validation_steps=1000,
    shuffle=False,
    verbose=2,
    callbacks=callbacks,
)


Epoch 1/100


2023-08-29 15:36:13.430763: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2b604c02abf0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-08-29 15:36:13.430805: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2023-08-29 15:36:13.476257: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-08-29 15:36:13.747292: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
  self.gen.throw(typ, value, traceback)


14683/14683 - 55s - 4ms/step - loss: 0.0016 - val_loss: 2.4655e-04 - learning_rate: 1.0000e-04
Epoch 2/100
14683/14683 - 52s - 4ms/step - loss: 1.8853e-04 - val_loss: 1.4227e-04 - learning_rate: 1.0000e-04
Epoch 3/100
14683/14683 - 56s - 4ms/step - loss: 1.1414e-04 - val_loss: 9.1052e-05 - learning_rate: 1.0000e-04
Epoch 4/100
14683/14683 - 57s - 4ms/step - loss: 7.5064e-05 - val_loss: 6.1310e-05 - learning_rate: 1.0000e-04
Epoch 5/100
14683/14683 - 56s - 4ms/step - loss: 5.3018e-05 - val_loss: 4.4690e-05 - learning_rate: 1.0000e-04
Epoch 6/100
14683/14683 - 54s - 4ms/step - loss: 3.9986e-05 - val_loss: 3.4751e-05 - learning_rate: 1.0000e-04
Epoch 7/100
14683/14683 - 54s - 4ms/step - loss: 3.2146e-05 - val_loss: 2.8631e-05 - learning_rate: 1.0000e-04
Epoch 8/100
14683/14683 - 54s - 4ms/step - loss: 2.7358e-05 - val_loss: 2.4891e-05 - learning_rate: 1.0000e-04
Epoch 9/100
14683/14683 - 55s - 4ms/step - loss: 2.4268e-05 - val_loss: 2.2447e-05 - learning_rate: 1.0000e-04
Epoch 10/100
1468

In [None]:
# Test model load.
model_path = '../models/model_2_256_selu_l21e-6.keras'  # Must end with keras.
model2 = keras.models.load_model(model_path)
x = np.random.rand(1,8)
yhat = model.predict(x)
print(yhat, yhat.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
[[ 0.36066428  0.3642369   0.37132177  0.38184062  0.39962852  0.42059767
   0.4450127   0.4758604   0.50966704  0.54539305  0.5860867   0.6265509
   0.66512847  0.7015863   0.72998816  0.74664545  0.7489979   0.73533297
   0.7083866   0.66809285  0.6181542   0.55204064  0.4763261   0.38666186
   0.2848085   0.17946152  0.06527229 -0.05374647 -0.18028405 -0.3099442
  -0.4432806  -0.5711507 ]] (1, 32)


In [None]:
# # Make predictions.

# path = '/home/linneamw/sadow_lts/personal/linneamw/research/gcr/data/2023_07_01'
# infile = f'{path}/pos/model_collection_1AU_90deg_0deg_fixed.h5'
# outfile = f'{path}/pos/model_collection_1AU_90deg_0deg_fixed_processed.h5'