# TREx API Examples

This notebook provides a set of TREx API examples.

In [1]:
import trex

engine_name = "../tests/inputs/mobilenet.qat.onnx.engine"
plan = trex.EnginePlan(f"{engine_name}.graph.json", f"{engine_name}.profile.json", f"{engine_name}.metadata.json")

## List `k` slowest layers

List the k-slowest layers:

In [2]:
top3 = plan.df.nlargest(3, 'latency.pct_time')
for i in range(len(top3)):
    layer = top3.iloc[i]
    print("%s: %s" % (layer["Name"], layer["type"]))

features.15.conv.2.weight + QuantizeLinear_722 + Conv_726 + Add_728: Convolution
features.16.conv.2.weight + QuantizeLinear_771 + Conv_775 + Add_777: Convolution
features.13.conv.2.weight + QuantizeLinear_625 + Conv_629 + Add_631: Convolution


Compute the latency of the top-3 slowest layers:

In [3]:
top3_latency = top3['latency.avg_time'].sum()
top3_percent = top3['latency.pct_time'].sum()
print(f"top3 latency: {top3_latency:.6f} ms ({top3_percent:.2f}%)")

top3 latency: 0.045236 ms (9.62%)


In [4]:
ltype = "Convolution"
convs = plan.df.query(f"type == \"{ltype}\"")
print(f"There are {len(convs)} convolutions")
print(convs['latency.avg_time'].median())

There are 53 convolutions
0.00586459


In [5]:
convs2 = plan.get_layers_by_type('Convolution')
print(f"There are {len(convs2)} convolutions")
print(convs['latency.avg_time'].median())

There are 53 convolutions
0.00586459


## Access layer activations

There are several ways to access a layer's inputs and outputs

In [6]:
print(convs.iloc[0]['Inputs'])

[{'Name': '317', 'Location': 'Device', 'Dimensions': [1, 3, 224, 224], 'Format/Datatype': 'Four wide channel vectorized row major Int8 format'}]


In [7]:
clean_convs = trex.clean_df(convs2.copy(), inplace=True)
clean_convs.iloc[0]['Inputs']

'Int8 NC/4HW4'

As an `Activation` instance:

In [8]:
inputs, outputs = trex.create_activations(convs.iloc[0])
print(inputs[0].name)
print(inputs[0].shape)
print(inputs[0].precision)
print(inputs[0].format)
print(inputs[0].size_bytes)

317
[1, 3, 224, 224]
INT8
Int8 NC/4HW4
150528


## Query and Grouping

In [9]:
# Group by type, and perform a sum reduction on the latency
plan.df.groupby(["type"]).sum()[["latency.avg_time", "latency.pct_time"]]

Unnamed: 0_level_0,latency.avg_time,latency.pct_time
type,Unnamed: 1_level_1,Unnamed: 2_level_1
Convolution,0.383324,81.491013
Pooling,0.004525,0.962048
Reformat,0.082539,17.546964


In [10]:
# trex provides another way to do the same thing
trex.group_sum_attr(plan.df,"type", "latency.avg_time")

Unnamed: 0,type,latency.avg_time
0,Convolution,0.383324
1,Pooling,0.004525
2,Reformat,0.082539


In [11]:
# Another trex convenience wrapper: group by 'type' and count the number of members in each group
trex.group_count(plan.df, "type")

Unnamed: 0,type,count
0,Convolution,53
1,Pooling,1
2,Reformat,18


In [12]:
convs_1x1 = clean_convs[clean_convs["attr.kernel"] == (1,1)]
convs_3x3 = clean_convs[clean_convs["attr.kernel"] == (3,3)]

# Group by convolution kernel shape, and count
print(trex.group_count(clean_convs, "attr.kernel"))

# Display the dataframe of all convolutions with kernel shape = (1, 1)
trex.display_df(convs_1x1)

  attr.kernel  count
0      (1, 1)     35
1      (3, 3)     18




In [13]:
# Display the dataframe of all INT8 convolutions with 
convs_1x1_fp32 = convs_1x1[convs_1x1["Outputs"].str.startswith("FP32")]
trex.display_df(convs_1x1_fp32)

Address already in use
Port 40000 is in use by another program. Either identify and stop that program, or start the server with a different port.




2023-04-18 15:32:02,269 - INFO     - Executing shutdown due to inactivity...
2023-04-18 15:32:02,935 - INFO     - Executing shutdown due to inactivity...
