In [29]:
import os

import torchvision.transforms as transforms
from omegaconf import OmegaConf
from PIL import Image
from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler

from FishEye.models.model import FishNN
from FishEye.predict_model import preprocess_images

if "notebooks" in os.getcwd():
    os.chdir("..")

In [4]:
# Load, format and preprocess images
image_filenames = ["data/raw/NA_Fish_Dataset/Trout/00005.png", "data/raw/NA_Fish_Dataset/Red Mullet/00017.png"]

images = [transforms.ToTensor()(Image.open(filename)) for filename in image_filenames]

images = preprocess_images(images)

In [10]:
DEVICE = "cpu"

model = FishNN.load_from_checkpoint("models/epoch=99-step=600.ckpt", cfg=OmegaConf.load("config/config.yaml"))
model.to(DEVICE)
images = images.to(DEVICE)

with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
    model(images)

STAGE:2024-01-18 11:34:35 7025:7025 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-01-18 11:34:35 7025:7025 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-18 11:34:35 7025:7025 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [11]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))


----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                        Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::linear         0.14%       6.000us        59.13%       2.555ms       2.555ms          72 b           0 b             1  
                 aten::addmm        58.41%       2.524ms        58.71%       2.537ms       2.537ms          72 b          72 b             1  
                aten::conv2d         0.09%       4.000us        28.77%       1.243ms       1.243ms       7.97 Mb           0 b             1  
           aten::convolution         0.42%      18.000us        28.67%       1.239ms       1.239ms       7.97 Mb           0 b             1  

In [7]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=30))

----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                        Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes  
----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                aten::conv2d         0.05%      10.000us        70.00%      14.466ms      14.466ms             1                           [[2, 3, 445, 590], [16, 3, 3, 3], [16], [], [], [], []]  
           aten::convolution         0.15%      31.000us        69.95%      14.456ms      14.456ms             1                   [[2, 3, 445, 590], [16, 3, 3, 3], [16], [], [], [], [], [], []]  
          aten:

In [14]:
DEVICE = "cuda"

model.to(DEVICE)
images = images.to(DEVICE)

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True) as prof:
    model(images)

STAGE:2024-01-18 11:36:16 7025:7025 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-01-18 11:36:16 7025:7025 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-18 11:36:16 7025:7025 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [19]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cuda_time_total", row_limit=30))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls                                                                      Input Shapes  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  -------------------------------------------------------------------------

In [21]:
prof.export_chrome_trace(".profiling/trace.json")

# Now go to chrome://tracing to load the trace.json file in a chrome browser

RuntimeError: Trace is already saved.

In [18]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True) as prof:
    for i in range(10):
        model(images)
        prof.step()

prof.export_chrome_trace(".profiling/trace.json")

STAGE:2024-01-18 11:52:01 7025:7025 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-01-18 11:52:01 7025:7025 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-18 11:52:01 7025:7025 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [25]:
DEVICE = "cuda"

model.to(DEVICE)
images = images.to(DEVICE)

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True, on_trace_ready=tensorboard_trace_handler(".profiling/log/modelv1")) as prof:
    for i in range(10):
        model(images)
        prof.step()

STAGE:2024-01-18 12:12:47 7025:7025 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-01-18 12:12:47 7025:7025 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-18 12:12:47 7025:7025 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [33]:
from FishEye.train_model import train
prof_cfg = OmegaConf.load(".profiling/prof_config.yaml")

prof_cfg.trainer_hyperparameters.batch_size = 256

DEVICE = "cuda"

model.to(DEVICE)
images = images.to(DEVICE)

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True, on_trace_ready=tensorboard_trace_handler(".profiling/log/trainv3")) as prof:
    train(prof_cfg)

STAGE:2024-01-18 14:44:03 7025:7025 ActivityProfilerController.cpp:312] Completed Stage: Warm Up


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▃▂▅▂▁▂▅█▇▆
train_loss_epoch,▁▄▇██▅▅▃▂▂
trainer/global_step,▁▂▂▃▄▄▅▅▆▇███
val_acc,█▁
val_loss,█▁

0,1
epoch,10.0
test_acc,0.11628
test_loss,19237.50391
train_acc_epoch,0.22384
train_loss_epoch,15477.79395
trainer/global_step,20.0
val_acc,0.09302
val_loss,20753.71289


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eigil/miniconda3/envs/FishEye/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /home/eigil/DTU/02476-ML-Ops/models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | accuracy   | MulticlassAccuracy | 0     
1 | classifier | Sequential         | 9.4 M 
2 | criterion  | CrossEntropyLoss   | 0     
--------------------------------------------------
9.4 M     Trainable params
0         Non-trainable params
9.4 M     Total params
37.596    Total estimated model params size (MB)


                                                                            

/home/eigil/miniconda3/envs/FishEye/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
/home/eigil/miniconda3/envs/FishEye/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
/home/eigil/miniconda3/envs/FishEye/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  3.94it/s, v_num=9wq1, train_acc_step=0.250, train_loss_step=1.11e+4, train_acc_epoch=0.326, train_loss_epoch=9.56e+3, val_acc=0.140, val_loss=8.83e+3] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  3.93it/s, v_num=9wq1, train_acc_step=0.250, train_loss_step=1.11e+4, train_acc_epoch=0.326, train_loss_epoch=9.56e+3, val_acc=0.140, val_loss=8.83e+3]


Restoring states from the checkpoint path at /home/eigil/DTU/02476-ML-Ops/models/epoch=9-step=20-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/eigil/DTU/02476-ML-Ops/models/epoch=9-step=20-v1.ckpt
/home/eigil/miniconda3/envs/FishEye/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 113.26it/s]


STAGE:2024-01-18 14:44:13 7025:7025 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-18 14:44:13 7025:7025 ActivityProfilerController.cpp:322] Completed Stage: Post Processing
