In [1]:
# Retraining NN with Elegy then do HMCon Mana
# Author: Peter Nov 5 2022
# Edited by Linnea August/September 2023

# New Requirements:
# conda install python=3.9 numpy scipy pandas matplotlib
# conda install -c anaconda cudatoolkit
# pip install tensorflow
# pip install tensorflow-io\[tensorflow\] # Seems to want specific older tf versions

# pip install elegy==0.8.5 # Because 0.8.6 has error.
# pip install --upgrade "jax[cuda12_local]==0.4.13" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
# pip install tfp-nightly tensorflow_io tensorflow

"""
Train NN.
Authored by Peter July 2023
Edited by Linnea August/September 2023

New Requirements:
conda install python=3.9 numpy scipy pandas matplotlib
conda install -c anaconda cudatoolkit
pip install tensorflow
pip install tensorflow-io\[tensorflow\] # Seems to want specific older tf versions
"""

import os
from collections import defaultdict
import numpy as np
import h5py
import matplotlib.pyplot as plt
import datetime

import keras_core as keras


import tensorflow_io as tfio
from tensorflow.data import Dataset
from tensorflow.data.experimental import AUTOTUNE

#import tensorflow as tf 
# #os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
# #os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '.10'
# #os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# import jax
# import jax.numpy as jnp
# from jax import random
# from jax import vmap
# from jax import jit
# from jax import grad
# #assert jax.default_backend() == 'gpu'

# import elegy # pip install elegy
# import optax
# import tensorflow_io as tfio # pip install tensorflow-io
# #import tensorflow as tf # Recommended not to import this with jax because will also try to grab memory.
# from tensorflow.data import Dataset # Trying not to import tf. 

2023-09-27 15:52:12.516771: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using TensorFlow backend


# Train positive

In [2]:
# Create dataset object using IODataset
polarity = 'pos'
path = '/home/linneamw/sadow_lts/personal/linneamw/research/gcr/data/2023_07_01'
f = f'{path}/{polarity}/model_collection_1AU_90deg_0deg_fixed_training.h5'
# 8 input parameters for the NN: alpha, cmf, vspoles, cpa, pwr1par, pwr2par, pwr1perr, and pwr2perr.
# features = ['alpha', 'cmf', 'cpa', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'vspoles']
with h5py.File(f, 'r') as h5:
    num_samples, num_inputs,  = h5['X_minmax'].shape
    _, num_flux,  = h5['Y_log_scaled'].shape
x = tfio.IODataset.from_hdf5(f, dataset='/X_minmax')
y = tfio.IODataset.from_hdf5(f, dataset='/Y_log_scaled')

# Split
full = Dataset.zip((x, y))
train = full.take(np.floor(num_samples *.9))#.repeat()
test = full.skip(np.floor(num_samples *.9))#.repeat()

# Batch
BATCH_SIZE = 128
train = train.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)
test = test.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)

# Some calcs
steps_per_epoch = int(num_samples * .9 / BATCH_SIZE )
validation_steps = int(num_samples * .1 / BATCH_SIZE)
print(f'Steps per epoch: {steps_per_epoch}')

#train_x = TFDatasetAdapter(train.map(lambda x,y: x))
#train_y = TFDatasetAdapter(train.map(lambda x,y: x))
#x = train.map(lambda x,y: x)
#y = train.map(lambda x,y: y)

2023-09-27 15:52:17.144161: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
2023-09-27 15:52:17.144364: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX AVX2 FMA
2023-09-27 15:52:17.392395: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Steps per epoch: 14683


In [3]:
# Define model. 
l2=keras.regularizers.L2(l2=1e-6)
model = keras.Sequential(layers=[
   keras.layers.Input(shape=(8,)),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(32, activation='linear', kernel_regularizer=l2),
])

model_version = 'v3.0'

optimizer = keras.optimizers.Adam(learning_rate=1e-4)
model_path = f'../models/model_{model_version}_{polarity}.keras'  # Must end with keras.
log_dir = f'../../tensorboard_logs/fit/model_{model_version}_{polarity}/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
callbacks = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10),
             keras.callbacks.EarlyStopping(monitor="val_loss", patience=20),
             keras.callbacks.ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_loss'),
             keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
            ]
model.compile(loss='mae', optimizer=optimizer)

print("\nTensorboard log dir: ", log_dir)

history = model.fit(
    train,
    epochs=100,
    #steps_per_epoch=steps_per_epoch, #6 * 10000, #10000, # 10k*128 is approximate size of training set.
    validation_data=test,
    #validation_steps=1000,
    shuffle=False,
    verbose=2,
    callbacks=callbacks,
)



Tensorboard log dir:  ../../tensorboard_logs/fit/model_v3.0_pos/20230927-155217
Epoch 1/100


2023-09-27 15:52:19.148859: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2b121c00f130 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-09-27 15:52:19.148898: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2023-09-27 15:52:19.177846: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-27 15:52:19.506182: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
  self.gen.throw(typ, value, traceback)


14683/14683 - 59s - 4ms/step - loss: 0.0086 - val_loss: 0.0038 - learning_rate: 1.0000e-04
Epoch 2/100
14683/14683 - 79s - 5ms/step - loss: 0.0035 - val_loss: 0.0030 - learning_rate: 1.0000e-04
Epoch 3/100
14683/14683 - 82s - 6ms/step - loss: 0.0030 - val_loss: 0.0028 - learning_rate: 1.0000e-04
Epoch 4/100
14683/14683 - 81s - 6ms/step - loss: 0.0028 - val_loss: 0.0024 - learning_rate: 1.0000e-04
Epoch 5/100
14683/14683 - 82s - 6ms/step - loss: 0.0026 - val_loss: 0.0023 - learning_rate: 1.0000e-04
Epoch 6/100
14683/14683 - 82s - 6ms/step - loss: 0.0025 - val_loss: 0.0022 - learning_rate: 1.0000e-04
Epoch 7/100


KeyboardInterrupt: 

# Train negative

In [None]:
# Create dataset object using IODataset
polarity = 'neg'
path = '/home/linneamw/sadow_lts/personal/linneamw/research/gcr/data/2023_07_01'
f = f'{path}/{polarity}/model_collection_1AU_90deg_0deg_fixed_training.h5'
# 8 input parameters for the NN: alpha, cmf, vspoles, cpa, pwr1par, pwr2par, pwr1perr, and pwr2perr.
# features = ['alpha', 'cmf', 'cpa', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'vspoles']
with h5py.File(f, 'r') as h5:
    num_samples, num_inputs,  = h5['X_minmax'].shape
    _, num_flux,  = h5['Y_log_scaled'].shape
x = tfio.IODataset.from_hdf5(f, dataset='/X_minmax')
y = tfio.IODataset.from_hdf5(f, dataset='/Y_log_scaled')

# Split
full = Dataset.zip((x, y))
train = full.take(np.floor(num_samples *.9))#.repeat()
test = full.skip(np.floor(num_samples *.9))#.repeat()

# Batch
BATCH_SIZE = 128
train = train.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)
test = test.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTOTUNE)

# Some calcs
steps_per_epoch = int(num_samples * .9 / BATCH_SIZE )
validation_steps = int(num_samples * .1 / BATCH_SIZE)
print(f'Steps per epoch: {steps_per_epoch}')

#train_x = TFDatasetAdapter(train.map(lambda x,y: x))
#train_y = TFDatasetAdapter(train.map(lambda x,y: x))
#x = train.map(lambda x,y: x)
#y = train.map(lambda x,y: y)

In [None]:
# Define model. 
l2=keras.regularizers.L2(l2=1e-6)
model = keras.Sequential(layers=[
   keras.layers.Input(shape=(8,)),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(256, activation='selu', kernel_regularizer=l2),
   keras.layers.Dense(32, activation='linear', kernel_regularizer=l2),
])

# add tensorboard callback
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model_version = 'v3.0'

optimizer = keras.optimizers.Adam(learning_rate=1e-4)
model_path = f'../models/model_{model_version}_{polarity}.keras'  # Must end with keras.
log_dir = f'../../tensorboard_logs/fit/model_{model_version}_{polarity}/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
callbacks = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10),
             keras.callbacks.EarlyStopping(monitor="val_loss", patience=20),
             keras.callbacks.ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_loss'),
             keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
            ]
model.compile(loss='mae', optimizer=optimizer)

print("\nTensorboard log dir: ", log_dir)

history = model.fit(
    train,
    epochs=100,
    #steps_per_epoch=steps_per_epoch, #6 * 10000, #10000, # 10k*128 is approximate size of training set.
    validation_data=test,
    #validation_steps=1000,
    shuffle=False,
    verbose=2,
    callbacks=callbacks,
)
