# Prediction time comparison between GPU and CPU

## Specs

In [1]:
!lscpu

/bin/bash: /home/alunos/a2162865/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Arquitetura:                     x86_64
Modo(s) operacional da CPU:      32-bit, 64-bit
Ordem dos bytes:                 Little Endian
Tamanhos de endereço:            46 bits physical, 57 bits virtual
CPU(s):                          64
Lista de CPU(s) on-line:         0-63
Thread(s) per núcleo:            1
Núcleo(s) por soquete:           16
Soquete(s):                      4
Nó(s) de NUMA:                   1
ID de fornecedor:                GenuineIntel
Família da CPU:                  6
Modelo:                          106
Nome do modelo:                  Intel(R) Xeon(R) Silver 4316 CPU @ 2.30GHz
Step:                            6
CPU MHz:                         2294.669
BogoMIPS:                        4589.33
Virtualização:                   VT-x
cache de L1d:                    2 MiB
cache de L1i:                    2 MiB
cache de L2:               

In [2]:
!nvidia-smi

/bin/bash: /home/alunos/a2162865/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Wed Jun 21 13:47:43 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 8000     On   | 00000000:01:00.0 Off |                  Off |
| 33%   24C    P8     7W / 260W |      1MiB / 49152MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                        

## Timing

### GPU

In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import pandas as pd
import tensorflow as tf

from main import Pipeline
from modeling.functional import rnn_stator_model
from utils.configs import rnn_stator_cfg

print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}\n")



Num GPUs Available: 1



  from .autonotebook import tqdm as notebook_tqdm


In [4]:
feature_names = list(pd.read_csv(f'out/RNN_stator/shap/shap_features_RNN_stator.csv', index_col=0).head(10).index)
p = Pipeline(rnn_stator_model(10), rnn_stator_cfg, feature_names=feature_names)
p.load_model_weights('out/models/RNN_stator_10.h5')

Model: RNN_stator
Num GPUs Available: 1

Batches in the training dataset: 245
Batches in the validation dataset: 9
Batches in the test dataset: 13



<main.Pipeline at 0x7fd19aaaba60>

In [5]:
# GPU single element prediction
%timeit p.model.predict(tf.random.uniform([1,42,10]), verbose=0)

50.5 ms ± 1.67 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
# GPU test dataset prediction (~65000 elements)
%timeit p.model.predict(p.test_ds, verbose=0)

32 s ± 726 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
# GPU train dataset prediction (~1225000 elements)
%timeit p.model.predict(p.train_ds, verbose=0)

29.5 s ± 446 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### CPU

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
import tensorflow as tf
import pandas as pd
print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}\n")

from main import Pipeline
from modeling.functional import rnn_stator_model
from utils.configs import rnn_stator_cfg



Num GPUs Available: 0



2023-06-21 14:24:16.055017: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
feature_names = list(pd.read_csv(f'out/RNN_stator/shap/shap_features_RNN_stator.csv', index_col=0).head(10).index)
p = Pipeline(rnn_stator_model(10), rnn_stator_cfg, feature_names=feature_names)
p.load_model_weights('out/models/RNN_stator_10.h5')

Model: RNN_stator
Num GPUs Available: 0

Batches in the training dataset: 245
Batches in the validation dataset: 9
Batches in the test dataset: 13



<main.Pipeline at 0x7f4bcba60a90>

In [3]:
# CPU single element prediction
%timeit p.model.predict(tf.random.uniform([1,42,10]), verbose=0)

52.7 ms ± 2.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
# CPU test dataset prediction (~65000 elements)
%timeit p.model.predict(p.test_ds, verbose=0)

39 s ± 304 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
# CPU train dataset prediction (~1225000 elements)
%timeit p.model.predict(p.train_ds, verbose=0)

2min 26s ± 227 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
