# Fine tuning the MLP from fastsim to fullsim
Run this after pretrain_MLP.ipynb

In [1]:
import sys
import os
import glob
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from tqdm import tqdm
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import initializers

import wandb
from wandb.keras import WandbCallback

np.random.seed(8)
tf.random.set_seed(8)

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-05-07 19:01:13.131371: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-05-07 19:01:13.155037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:41:00.0 name: NVIDIA A40 computeCapability: 8.6
coreClock: 1.74GHz coreCount: 84 deviceMemorySize: 44.56GiB deviceMemoryBandwidth: 648.29GiB/s
2023-05-07 19:01:13.155846: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2023-05-07 19:01:13.158568: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2023-05-07 19:01:13.167681: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2023-05-07 19:01:13.182550: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2023-0

In [2]:
f_full_train = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/train.h5", 'r')
f_full_test = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/test.h5", 'r')

In [3]:
feature_keys = ['fjet_clus_eta', 'fjet_clus_phi', 'fjet_clus_pt']
print("Fullsim Train")
for k in feature_keys:
    print(k, f_full_train[k].shape)
print("Fullsim Test")
for k in feature_keys:
    print(k, f_full_test[k].shape)

Fullsim Train
fjet_clus_eta (42233012, 200)
fjet_clus_phi (42233012, 200)
fjet_clus_pt (42233012, 200)
Fullsim Test
fjet_clus_eta (2484117, 200)
fjet_clus_phi (2484117, 200)
fjet_clus_pt (2484117, 200)


In [4]:
# num_samples = len(f_full_train["labels"])
num_samples = 4000000
x_train = np.concatenate([f_full_train[k][:num_samples] for k in feature_keys], axis=1)
x_train.shape

(4000000, 600)

In [5]:
y_train = f_full_train["labels"][:num_samples]
y_train.shape

(4000000,)

In [6]:
x_test = np.concatenate([f_full_test[k][:num_samples] for k in feature_keys], axis=1)
x_test.shape

(2484117, 600)

In [7]:
y_test = f_full_test["labels"][:num_samples]
y_test.shape

(2484117,)

In [8]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train, copy=False)
x_test = scaler.transform(x_test, copy=False)

In [16]:
os.environ["WANDB_NOTEBOOK_NAME"] = "tuning_MLP.ipynb"

In [None]:
# NUM_PRETRAIN_ROWS_LIST = [0, 1000000, 2000000, 4000000, 8000000, 16000000, 32000000]
NUM_PRETRAIN_ROWS_LIST = [1000000, 2000000, 4000000, 8000000, 16000000, 32000000]
# wandb_run_id_list = ["5ndumuik", "ovkhun2m", "cbwykdzs", "mfcusa0l", "kjyvjndx", "suz9cn8k"]    # wandb id of pretraining runs
config = {
    "batch_size": 256,
    "epochs": 400,
}
for num_pretrain_rows in NUM_PRETRAIN_ROWS_LIST:
    print(f"Starting tuning with {num_pretrain_rows} rows")
    config["num_pretrain_rows"] = num_pretrain_rows
    run = wandb.init(project="fullsim_MLP", name=f"fullsim_MLP_pretrain_{int(num_pretrain_rows / 1000000)}M_rows", config=config, reinit=True)
    
    model = Sequential()
    model.add(Dense(64, input_shape=(600,), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    if num_pretrain_rows != 0:
        model.load_weights(f"models/fastsim_MLP_{int(num_pretrain_rows/1000000)}M_rows.h5")
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(
        x_train,
        y_train,
        epochs=config["epochs"],
        batch_size=config["batch_size"],
        shuffle=True,
        validation_data=(x_test, y_test),
        callbacks=[wandb.keras.WandbCallback()]
    )
    
    plt.figure()
    plt.plot(history.history["accuracy"], label="acc")
    plt.plot(history.history["val_accuracy"], label="val_acc")
    plt.title(f"Fullsim MLP (Pretained for {int(num_pretrain_rows / 1000000)}M Rows)")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/fullsim_MLP_pretrain_{int(num_pretrain_rows / 1000000)}M_rows_acc.png")
    
    plt.figure()
    plt.plot(history.history["loss"], label="loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.title(f"Fullsim MLP (Pretrained for {int(num_pretrain_rows / 1000000)}M Rows)")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.savefig(f"output/fullsim_MLP_pretrain_{int(num_pretrain_rows / 1000000)}M_rows_loss.png")
    
    
    model.save_weights(f"models/fullsim_MLP_pretrain_{int(num_pretrain_rows / 1000000)}M_rows.h5")
    wandb.finish()

Starting tuning with 1000000 rows


Epoch 1/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 2/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 3/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 4/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 5/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 6/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 7/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 8/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 9/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 10/400


[34m[1mwandb[0m: Adding directory to artifact (/global/home/users/mfong/git/transfer-learning/wandb/run-20230509_002732-mkebgvja/files/model-best)... Done. 0.0s


Epoch 11/400
 2562/15625 [===>..........................] - ETA: 26s - loss: 0.4672 - accuracy: 0.7753