# Examples of using the model inspection class and the profiling functions

The inspection class can gather model parameters, gradients, activations and activation gradients.

The profiling module provide information about cpu and GPU timing, memory usage and flops operations.

In [1]:
import torch
from torchtrainer.models.resunet import ResUNet
from torchtrainer.inspector import Inspector
from torchtrainer.profiling import benchmark_model

# Dummy data. In a real application this should be a batch of the dataset.
batch = torch.rand(8, 1, 224, 224)
labels = torch.randint(0, 1000, (8,))

model = ResUNet((3, 3, 3), (16, 32, 64))
model.eval();

### Model inspection

In [2]:
# Check intermediate activations of the model, as well as gradients and parameters
insp = Inspector(model)
# For activations, we need to explicitly start tracking to set up the forward hooks.
insp.start_tracking_activations()

# Apply model to batch
res = model(batch)
# Dummy calculation of loss and gradients, just as an example.
loss = res.sum().backward()

# Remove the forward hooks
insp.stop_tracking_activations()

# Activations
acts = insp.get_activations()
# Parameters
params = insp.get_params()
# Gradients
grads = insp.get_grads()

print(acts)

[('resunet.conv1', tensor([[[[-4.6665e-02, -5.4258e-02,  3.9909e-02,  ...,  8.8836e-02,
            1.2364e-01,  6.7005e-02],
          [-1.3328e-01,  7.0730e-03,  6.0013e-02,  ...,  1.7016e-01,
            2.7159e-01,  1.2346e-01],
          [-2.9883e-02,  1.0430e-01,  1.3455e-01,  ...,  2.8247e-01,
            4.0626e-02,  7.4960e-02],
          ...,
          [-1.3242e-01, -1.3870e-01,  3.6823e-02,  ...,  1.1227e-01,
            4.4712e-02,  1.8767e-02],
          [-1.9456e-01, -1.9664e-01, -4.6604e-02,  ..., -2.5892e-02,
            1.5835e-01, -2.8012e-02],
          [ 4.3989e-02, -8.3725e-02,  1.0912e-02,  ...,  1.1101e-01,
            9.6274e-02, -9.5274e-03]],

         [[ 1.9907e-01,  4.8187e-02,  2.0805e-01,  ...,  1.1153e-01,
            1.9770e-01,  1.1518e-01],
          [ 1.3899e-01,  1.2978e-01,  2.4191e-01,  ...,  1.4351e-01,
            2.1882e-01,  1.1649e-01],
          [ 2.7736e-01,  1.5601e-01,  3.1698e-01,  ...,  1.8608e-01,
            1.8851e-01,  2.8714e-01],
 

Tracking activations involves copying all the data from the GPU to the CPU (to preserve GPU memory), which is expensive. We can provide and aggregation function that will be applied to the data before copying

In [3]:
def agg_func(data, module_name, data_type):
    return torch.tensor([data.min(), data.max()])

insp = Inspector(model, agg_func=agg_func)
insp.start_tracking_activations()
res = model(batch)
insp.stop_tracking_activations()

acts = insp.get_activations()
print(acts)

[('resunet.conv1', tensor([-0.8424,  0.6387])), ('resunet.bn1', tensor([-0.8424,  0.6387])), ('resunet.encoder.stage_0.0.conv1', tensor([-0.6121,  0.5898])), ('resunet.encoder.stage_0.0.bn1', tensor([-0.6121,  0.5898])), ('resunet.encoder.stage_0.0.conv2', tensor([-0.4284,  0.4042])), ('resunet.encoder.stage_0.0.bn2', tensor([-0.4284,  0.4042])), ('resunet.encoder.stage_0.0.downsample.0', tensor([-0.8522,  0.7301])), ('resunet.encoder.stage_0.0.downsample.1', tensor([-0.8522,  0.7301])), ('resunet.encoder.stage_0.0', tensor([0.0000, 0.8390])), ('resunet.encoder.stage_0.1.conv1', tensor([-0.8038,  0.5512])), ('resunet.encoder.stage_0.1.bn1', tensor([-0.8038,  0.5512])), ('resunet.encoder.stage_0.1.conv2', tensor([-0.5279,  0.6430])), ('resunet.encoder.stage_0.1.bn2', tensor([-0.5279,  0.6430])), ('resunet.encoder.stage_0.1', tensor([0.0000, 0.8425])), ('resunet.encoder.stage_0.2.conv1', tensor([-0.7371,  1.1194])), ('resunet.encoder.stage_0.2.bn1', tensor([-0.7371,  1.1194])), ('resunet

We can also only track individual modules

In [4]:
insp = Inspector(model, [model.conv1,model.mid_block], agg_func)
insp.start_tracking_activations()
res = model(batch)
insp.stop_tracking_activations()

acts = insp.get_activations()
print(acts)

[('resunet.conv1', tensor([-0.8424,  0.6387])), ('resunet.mid_block', tensor([0.0000, 9.1394]))]


It is also possible to track activation gradients, but it is important to note that they do not work for a layer if the previous layer has an inplace operation.

In [5]:
insp = Inspector(model, [model.conv1])
insp.start_tracking_act_grads()
res = model(batch)
insp.stop_tracking_act_grads()
loss = res.sum().backward()

act_grads = insp.get_act_grads()
print(act_grads)

[('resunet.conv1', [tensor([[[[ 0.0000e+00,  0.0000e+00,  2.4806e+03,  ..., -1.0865e+03,
           -5.4773e+02, -1.5957e+02],
          [ 0.0000e+00, -4.3985e+02, -9.8046e+02,  ...,  1.1040e+03,
           -3.8999e+02, -2.2753e+02],
          [ 0.0000e+00, -1.1126e+03, -1.7470e+03,  ...,  1.0601e+03,
           -1.1301e+03, -1.6156e+02],
          ...,
          [ 0.0000e+00,  0.0000e+00,  3.1023e+02,  ...,  1.1939e+03,
           -1.3787e+02,  1.2163e+02],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           -5.0614e+02,  0.0000e+00],
          [-8.5486e+01,  0.0000e+00,  2.9515e+02,  ..., -2.9359e+02,
           -9.0852e+01,  0.0000e+00]],

         [[ 3.1095e+03, -4.7413e+03, -1.1404e+03,  ..., -1.5802e+02,
           -3.5118e+03,  4.7497e+01],
          [ 7.4652e+02, -4.2748e+02,  1.8760e+03,  ...,  6.3815e+02,
            1.2329e+03, -4.5365e+02],
          [ 5.0214e+03, -5.8283e+02,  5.6391e+03,  ...,  2.7216e+03,
            1.3817e+03, -3.3860e+02],


### Model profiling

In [6]:
tensor_shape = (8, 1, 224, 224)
# Benchmark the model for training
stats_train = benchmark_model(model, tensor_shape, no_grad=False, call_backward=True, use_float16=True, return_model_info=True)
# Benchmark for inference
stats_val = benchmark_model(model, tensor_shape, no_grad=True, call_backward=False, use_float16=True, return_model_info=False)
# The units for each metric are also included in the dictionary
print(stats_train)
print(stats_val)

Unsupported operator aten::batch_norm encountered 47 time(s)
Unsupported operator aten::add_ encountered 19 time(s)
Unsupported operator aten::upsample_nearest2d encountered 3 time(s)
Unsupported operator aten::add_ encountered 19 time(s)


{'params': 0.450128, 'activations': 0.090116096, 'flops': 15.10699008, 'memory': 0.39917755126953125, 'time_cpu': 0.03401064872741699, 'time_gpu': 0.17510195922851562, 'info': ['params: M', 'activations: G', 'flops: G', 'memory: GiB', 'time_cpu: s', 'time_gpu: s']}
{'params': 0.0, 'activations': 0.0, 'flops': 0.0, 'memory': 0.15419673919677734, 'time_cpu': 0.013000726699829102, 'time_gpu': 0.01195849609375, 'info': ['params: M', 'activations: G', 'flops: G', 'memory: GiB', 'time_cpu: s', 'time_gpu: s']}
