# Fine tuning the MLP from fastsim to fullsim

In [1]:
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from tqdm import tqdm
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import initializers

import wandb
from wandb.keras import WandbCallback

np.random.seed(8)
tf.random.set_seed(8)

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-05-04 02:32:48.705172: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-05-04 02:32:48.725798: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:41:00.0 name: NVIDIA A40 computeCapability: 8.6
coreClock: 1.74GHz coreCount: 84 deviceMemorySize: 44.56GiB deviceMemoryBandwidth: 648.29GiB/s
2023-05-04 02:32:48.726267: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2023-05-04 02:32:48.728243: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2023-05-04 02:32:48.730216: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2023-05-04 02:32:48.730692: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2023-0

In [2]:
f_full_train = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/train.h5", 'r')
f_full_test = h5py.File("/global/ml4hep/spss/mfong/transfer_learning/test.h5", 'r')

In [9]:
feature_keys = ['fjet_clus_eta', 'fjet_clus_phi', 'fjet_clus_pt']
print("Fullsim Train")
for k in feature_keys:
    print(k, f_full_train[k].shape)
print("Fullsim Test")
for k in feature_keys:
    print(k, f_full_test[k].shape)

Fullsim Train
fjet_clus_eta (42233012, 200)
fjet_clus_phi (42233012, 200)
fjet_clus_pt (42233012, 200)
Fullsim Test
fjet_clus_eta (2484117, 200)
fjet_clus_phi (2484117, 200)
fjet_clus_pt (2484117, 200)


In [14]:
num_samples = len(f_full_train["labels"])
x_train = np.concatenate([f_full_train[k][:num_samples] for k in feature_keys], axis=1)
x_train.shape

(42233012, 600)

In [15]:
y_train = f_full_train["labels"][:num_samples]
y_train.shape

(42233012,)

In [16]:
x_test = np.concatenate([f_full_test[k][:num_samples] for k in feature_keys], axis=1)
x_test.shape

(2484117, 600)

In [17]:
y_test = f_full_test["labels"][:num_samples]
y_test.shape

(2484117,)

In [18]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train, copy=False)
x_test = scaler.transform(x_test, copy=False)

In [20]:
NUM_PRETRAIN_ROWS_LIST = [1000000, 2000000, 4000000, 8000000, 16000000, 32000000]
wandb_run_id_list = ["5ndumuik", "ovkhun2m", "cbwykdzs", "mfcusa0l", "kjyvjndx", "suz9cn8k"]