# TensorFlow Tutorial

In [1]:
## note we need to make sure these environment variables are set properly for tensorflow to properly make use of the GPU
# should be: /app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/nvidia/cudnn
!echo $CUDNN_PATH
# should be: /usr/local/cuda/lib64:/usr/local/cuda/lib64:/app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/nvidia/cudnn/lib:/app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/tensorrt_libs:/app/apps/rhel8/python-anaconda3/2023-04/lib::
!echo $LD_LIBRARY_PATH
# must include: /app/apps/rhel8/jupyter/2023-04/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/opt/jupyterhub/bin:/common/bin:/usr/local/cuda/bin:/usr/localsys/bin:/usr/share/Modules/bin
!echo $PATH

/app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/nvidia/cudnn
/usr/local/cuda/lib64:/usr/local/cuda/lib64:/app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/nvidia/cudnn/lib:/app/apps/rhel8/jupyter/2023-04/lib/python3.10/site-packages/tensorrt_libs:/app/apps/rhel8/python-anaconda3/2023-04/lib::
/app/apps/rhel8/jupyter/2023-04/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/opt/jupyterhub/bin:/common/bin:/usr/local/cuda/bin:/usr/localsys/bin:/usr/share/Modules/bin:/bin


In [2]:
import tensorflow as tf

2023-09-20 14:33:11.452882: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-20 14:33:11.507614: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.13.0


In [4]:
from tensorflow import keras
from tensorflow.keras import layers, models, datasets
import time
from tensorflow.python.client import device_lib
from numba import cuda 

In [5]:
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU' or x.device_type == 'CPU']
get_available_devices()

2023-09-20 14:33:15.966850: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /device:GPU:0 with 14551 MB memory:  -> device: 0, name: Tesla P6, pci bus id: 0000:18:00.0, compute capability: 6.1


['/device:CPU:0', '/device:GPU:0']

In [6]:
# Download and prepare the CIFAR10 dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Create the convolutional base
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add Dense layers on top
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

# Compile and train the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
# Fit the model on GPU
start_time = time.time()
history = model.fit(train_images, train_labels, epochs=10,
                    validation_data=(test_images, test_labels))
end_time = time.time()
print("Time to fit model on GPU: {:.2f} seconds".format(end_time - start_time))

# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(test_images, test_labels)
print("test loss, test acc:", results)

2023-09-20 14:33:17.129761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14551 MB memory:  -> device: 0, name: Tesla P6, pci bus id: 0000:18:00.0, compute capability: 6.1


Epoch 1/10


2023-09-20 14:33:19.646102: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-09-20 14:33:19.894156: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fdac74d9ba0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-09-20 14:33:19.894186: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla P6, Compute Capability 6.1
2023-09-20 14:33:19.899571: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-20 14:33:20.051609: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Time to fit model on GPU: 62.78 seconds
Evaluate on test data
test loss, test acc: [0.8848978281021118, 0.7110000252723694]


In [7]:
# free up the GPU memory so other people/processes can use it
device = cuda.get_current_device()
device.reset()

In [8]:
# code that I wrote for the PDFF imputation project using a baseline_mlp model
!cd /home/craut/wkspce/craut_lfs/MRI_PDFF_imputation/fit_models/ && time python evaluate_model.py -dd ../cut_data_again/real_data/ -od output2 -m baseline_mlp -tr -te -i simple

Num GPUs Available:  1
### TRAINING ###
(34634, 248) (34634,)
Fitting Model on All Data
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
identity Scale
MSE: 12.776887893676758
RMSE: 3.574477195739746
Pearson Correlation: 0.5881908265069317
Other: {}



Raw Scale
MSE: 12.776887893676758
RMSE: 3.574477195739746
Pearson Correlation: 0.5881908265069317
Other: {}




### TESTING ###
(8659, 248) (8659,)
identity Scale

MSE: 12.191680908203125
RMSE: 3.4916586875915527
Pearson Correlation: 0.5969078263768762
Other: {}



Raw Scale
MSE: 12.191680908203125
RMSE: 3.491