# Pretraining `num_rows` vs transfered performance
Pretrain fastsim weights for 1M, 2M, 4M, 8M, ... rows

Then transfer and do fixed fullsim transfer training

In [1]:
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from tqdm import tqdm
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import initializers

import wandb
from wandb.keras import WandbCallback

# https://gitlab.cern.ch/atlas/ATLAS-top-tagging-open-data/-/blob/master/preprocessing.py
import preprocessing

np.random.seed(8)
tf.random.set_seed(8)

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-05-19 02:46:57.046284: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-05-19 02:46:57.078873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:41:00.0 name: NVIDIA A40 computeCapability: 8.6
coreClock: 1.74GHz coreCount: 84 deviceMemorySize: 44.56GiB deviceMemoryBandwidth: 648.29GiB/s
2023-05-19 02:46:57.079640: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2023-05-19 02:46:57.083264: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2023-05-19 02:46:57.086610: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2023-05-19 02:46:57.087622: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2023-0

In [2]:
train_file_names = os.listdir("/global/ml4hep/spss/mfong/transfer_learning/delphes_train")
# for i in range(7, 15):
for i in range(1, 15):
    train_file_names.remove(f"train_{i}.h5")
train_file_names
# f = h5py.File('/clusterfs/ml4hep/mfong/transfer_learning/delphes_train.h5', 'r')
# f2 = h5py.File('/clusterfs/ml4hep/mfong/transfer_learning/delphes_test.h5', 'r')

['train_0.h5']

In [3]:
num_samples_per_file = []
for train_file_name in train_file_names:
    f = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/delphes_train/" + train_file_name, 'r')
    num_samples_per_file.append(f["fjet_clus_eta"].shape[0])
num_samples = sum(num_samples_per_file)
feature_keys = ['fjet_clus_eta', 'fjet_clus_phi', 'fjet_clus_pt', 'fjet_clus_E']
num_features = 7*200
# for k in feature_keys:
#     num_features += f[k].shape[1]
x = np.empty((num_samples, num_features))

In [4]:
x.shape

(5000000, 1400)

In [5]:
current_row = 0
for train_file_name, current_num_samples in tqdm(zip(train_file_names, num_samples_per_file)):
    f = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/delphes_train/" + train_file_name, 'r')
    
    # preprocess
    data_dict = {k:v for k, v in f.items() if k in feature_keys}
    x[current_row:current_row+current_num_samples] = preprocessing.constituent(data_dict, 200).reshape(f["fjet_clus_eta"].shape[0], x.shape[1])       # TODO need to put these into preallocated x array
    # x = preprocessing.high_level(x)
    
    # x[current_row:current_row+current_num_samples] = np.concatenate([f[k] for k in feature_keys], axis=1)
    current_row += current_num_samples

  log_pt = np.log(pt)
  log_energy = np.log(energy)
  lognorm_pt = np.log(pt / sum_pt[:,np.newaxis])
  lognorm_energy = np.log(energy / sum_energy[:,np.newaxis])
1it [04:42, 282.67s/it]


In [6]:
feature_keys = ['fjet_clus_eta', 'fjet_clus_phi', 'fjet_clus_pt', 'fjet_clus_E']
for k in f.keys():
    print(k, f[k].shape)

fjet_clus_E (5000000, 200)
fjet_clus_eta (5000000, 200)
fjet_clus_phi (5000000, 200)
fjet_clus_pt (5000000, 200)
fjet_eta (5000000,)
fjet_m (5000000,)
fjet_phi (5000000,)
fjet_pt (5000000,)
labels (5000000,)
training_weights (5000000,)


In [7]:
y = np.empty((num_samples))
current_row = 0
for train_file_name, current_num_samples in tqdm(zip(train_file_names, num_samples_per_file)):
    f = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/delphes_train/" + train_file_name, 'r')
    y[current_row:current_row+current_num_samples] = f["labels"][:]
    current_row += current_num_samples

1it [00:00,  1.92it/s]


In [8]:
x.shape

(5000000, 1400)

In [9]:
y.shape

(5000000,)

In [10]:
num_samples = len(y)
num_train_samples = int(0.8 * num_samples)
# num_train_samples = num_samples - 2000000       # save 2M rows for test data
x_train = x[:num_train_samples]
y_train = y[:num_train_samples]

x_test = x[num_train_samples:]
y_test = y[num_train_samples:]

In [11]:
x_train.shape

(4000000, 1400)

In [12]:
x_test.shape

(1000000, 1400)

In [13]:
# scaler = StandardScaler()
# scaler.fit(x_train[:2000000])   # only use first 2M otherwise takes too long

# x_train = scaler.transform(x_train, copy=False)
# x_test = scaler.transform(x_test, copy=False)

In [14]:
os.environ["WANDB_NOTEBOOK_NAME"] = "pretrain_MLP.ipynb"

In [None]:
# NUM_PRETRAIN_ROWS_LIST = [1000000, 2000000, 4000000, 8000000, 16000000, 32000000]
NUM_PRETRAIN_ROWS_LIST = [100000, 200000, 400000, 800000, 1000000]

# config = wandb.config
# config.batch_size = 256
config = {
    "batch_size": 256,
    "epochs": 400,
}
for num_pretrain_rows in NUM_PRETRAIN_ROWS_LIST:
    # config.num_pretrain_rows = num_pretrain_rows
    config["num_pretrain_rows"] = num_pretrain_rows
    run = wandb.init(project="pretrain_MLP", name=f"preprocess_fastsim_MLP_{int(num_pretrain_rows / 1000)}K_rows", config=config, reinit=True)
    
    model = Sequential()
    model.add(Dense(64, input_shape=(x_train.shape[1],), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(
        x_train[:config["num_pretrain_rows"]],
        y_train[:config["num_pretrain_rows"]],
        epochs=config["epochs"],
        batch_size=config["batch_size"],
        shuffle=True,
        validation_data=(x_test, y_test),
        callbacks=[wandb.keras.WandbCallback()]
    )
    
    plt.figure()
    plt.plot(history.history["accuracy"], label="acc")
    plt.plot(history.history["val_accuracy"], label="val_acc")
    plt.title(f"Preprocessed Fastsim MLP ({int(num_pretrain_rows / 1000)}K Rows)")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/preprocess_fastsim_MLP_{int(num_pretrain_rows / 1000)}K_rows_acc.png")
    
    plt.figure()
    plt.plot(history.history["loss"], label="loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.title(f"Preprocessed Fastsim MLP ({int(num_pretrain_rows / 1000)}K Rows)")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/preprocess_fastsim_MLP_{int(num_pretrain_rows / 1000)}K_rows_loss.png")
    
    
    
    model.save_weights(f"models/preprocess_fastsim_MLP_{int(num_pretrain_rows / 1000)}K_rows.h5")
    wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mmingfong[0m. Use [1m`wandb login --relogin`[0m to force relogin


2023-05-19 02:52:04.621494: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2023-05-19 02:52:04.637520: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2994330000 Hz
2023-05-19 02:52:04.639499: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55a0b000aa00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-05-19 02:52:04.639543: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2023-05-19 02:52:04.783329: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55a0b0074a20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-05-19 02:52:04.783399: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA A40, Compute Capability 8.6
2023-05-19 02:52:04.784475: I tenso

Epoch 1/400


2023-05-19 02:58:08.166282: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2023-05-19 03:00:14.259797: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 2/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 7/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 12/400
Epoch 13/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 14/400
Epoch 15/400
Epoch 16/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 17/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 18/400
Epoch 19/400
Epoch 20/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 21/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 22/400
Epoch 23/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 24/400
Epoch 25/400
Epoch 26/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 27/400
Epoch 28/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 29/400
Epoch 30/400
Epoch 31/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 79/400
Epoch 80/400
Epoch 81/400
Epoch 82/400
Epoch 83/400
Epoch 84/400
Epoch 85/400
Epoch 86/400
Epoch 87/400
Epoch 88/400
Epoch 89/400
Epoch 90/400
Epoch 91/400
Epoch 92/400
Epoch 93/400
Epoch 94/400
Epoch 95/400
Epoch 96/400
Epoch 97/400
Epoch 98/400
Epoch 99/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 100/400
Epoch 101/400
Epoch 102/400
Epoch 103/400
Epoch 104/400
Epoch 105/400
Epoch 106/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230519_025153-ckir1988/files/model-best)... Done. 0.0s


Epoch 107/400
Epoch 108/400
Epoch 109/400
Epoch 110/400
Epoch 111/400
Epoch 112/400
Epoch 113/400
Epoch 114/400
Epoch 115/400
Epoch 116/400
Epoch 117/400
Epoch 118/400
Epoch 119/400
Epoch 120/400
Epoch 121/400