# Fine tuning the MLP from fastsim to fullsim
# Use the old weights from the first run
Run this after pretrain_MLP.ipynb

In [1]:
import sys
import os
import glob
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from tqdm import tqdm
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import initializers

import wandb
from wandb.keras import WandbCallback

np.random.seed(8)
tf.random.set_seed(8)

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-05-19 11:51:20.791309: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-05-19 11:51:20.816200: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA A40 computeCapability: 8.6
coreClock: 1.74GHz coreCount: 84 deviceMemorySize: 44.56GiB deviceMemoryBandwidth: 648.29GiB/s
2023-05-19 11:51:20.816698: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2023-05-19 11:51:20.818934: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2023-05-19 11:51:20.821223: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2023-05-19 11:51:20.821719: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2023-0

In [2]:
f_full_train = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/train.h5", 'r')
f_full_test = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/test.h5", 'r')

In [3]:
feature_keys = ['fjet_clus_eta', 'fjet_clus_phi', 'fjet_clus_pt']
print("Fullsim Train")
for k in feature_keys:
    print(k, f_full_train[k].shape)
print("Fullsim Test")
for k in feature_keys:
    print(k, f_full_test[k].shape)

Fullsim Train
fjet_clus_eta (42233012, 200)
fjet_clus_phi (42233012, 200)
fjet_clus_pt (42233012, 200)
Fullsim Test
fjet_clus_eta (2484117, 200)
fjet_clus_phi (2484117, 200)
fjet_clus_pt (2484117, 200)


In [4]:
# num_samples = len(f_full_train["labels"])
num_samples = 4000000
x_train = np.concatenate([f_full_train[k][:num_samples] for k in feature_keys], axis=1)
x_train.shape

(4000000, 600)

In [5]:
y_train = f_full_train["labels"][:num_samples]
y_train.shape

(4000000,)

In [6]:
x_test = np.concatenate([f_full_test[k][:num_samples] for k in feature_keys], axis=1)
x_test.shape

(2484117, 600)

In [7]:
y_test = f_full_test["labels"][:num_samples]
y_test.shape

(2484117,)

In [8]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train, copy=False)
x_test = scaler.transform(x_test, copy=False)

In [9]:
os.environ["WANDB_NOTEBOOK_NAME"] = "tuning_MLP.ipynb"

In [None]:
# NUM_PRETRAIN_ROWS_LIST = [0, 1000000, 2000000, 4000000, 8000000, 16000000, 32000000]
NUM_PRETRAIN_ROWS_LIST = [4000000]
# wandb_run_id_list = ["5ndumuik", "ovkhun2m", "cbwykdzs", "mfcusa0l", "kjyvjndx", "suz9cn8k"]    # wandb id of pretraining runs
config = {
    "batch_size": 256,
    "epochs": 400,
}
for num_pretrain_rows in NUM_PRETRAIN_ROWS_LIST:
    print(f"Starting tuning with {num_pretrain_rows} rows")
    config["num_pretrain_rows"] = num_pretrain_rows
    run = wandb.init(project="fullsim_MLP", name=f"fullsim_MLP_pretrainOld_{int(num_pretrain_rows / 1000000)}M_rows", config=config, reinit=True)
    
    model = Sequential()
    model.add(Dense(64, input_shape=(600,), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # if num_pretrain_rows != 0:
    #     model.load_weights(f"models/fastsim_MLP_{int(num_pretrain_rows/1000000)}M_rows.h5")
    
    # load weights from old model
    model.load_weights(f"models/old_first_experiment/fast_sim_MLP.h5")
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(
        x_train,
        y_train,
        epochs=config["epochs"],
        batch_size=config["batch_size"],
        shuffle=True,
        validation_data=(x_test, y_test),
        callbacks=[wandb.keras.WandbCallback()]
    )
    
    plt.figure()
    plt.plot(history.history["accuracy"], label="acc")
    plt.plot(history.history["val_accuracy"], label="val_acc")
    plt.title(f"Fullsim MLP (Pretained (old) for {int(num_pretrain_rows / 1000000)}M Rows)")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/fullsim_MLP_pretrainOld_{int(num_pretrain_rows / 1000000)}M_rows_acc.png")
    
    plt.figure()
    plt.plot(history.history["loss"], label="loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.title(f"Fullsim MLP (Pretrained (old) for {int(num_pretrain_rows / 1000000)}M Rows)")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/fullsim_MLP_pretrainOld_{int(num_pretrain_rows / 1000000)}M_rows_loss.png")
    
    
    model.save_weights(f"models/fullsim_MLP_pretrainOld_{int(num_pretrain_rows / 1000000)}M_rows.h5")
    wandb.finish()

Starting tuning with 4000000 rows


[34m[1mwandb[0m: Currently logged in as: [33mmingfong[0m. Use [1m`wandb login --relogin`[0m to force relogin


2023-05-19 03:17:34.212385: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2023-05-19 03:17:34.228962: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2994330000 Hz
2023-05-19 03:17:34.231128: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55b6dd1f3720 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-05-19 03:17:34.231180: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2023-05-19 03:17:34.378029: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55b6dc503ab0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-05-19 03:17:34.378104: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA A40, Compute Capability 8.6
2023-05-19 03:17:34.379376: I tenso

Epoch 1/400


2023-05-19 03:22:56.914787: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10




2023-05-19 03:25:22.059023: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 5961880800 exceeds 10% of free system memory.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2023-05-19 03:25:41.194120: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 2/400

2023-05-19 03:26:13.677443: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 5961880800 exceeds 10% of free system memory.


INFO:tensorflow:Assets written to: /global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 3/400

2023-05-19 03:27:04.488320: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 5961880800 exceeds 10% of free system memory.


INFO:tensorflow:Assets written to: /global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 4/400

2023-05-19 03:27:54.626988: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 5961880800 exceeds 10% of free system memory.


INFO:tensorflow:Assets written to: /global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 5/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 6/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 7/400
Epoch 8/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 9/400
Epoch 10/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 11/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 12/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 13/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 14/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 15/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 16/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 17/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 18/400
Epoch 19/400
Epoch 20/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 21/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 22/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 23/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 24/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 25/400
Epoch 26/400
Epoch 27/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 28/400
Epoch 29/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 30/400
Epoch 31/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_031724-r4azjtzy/files/model-best)... Done. 0.0s


Epoch 32/400