In [2]:
import torch
from torch import nn
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity
import cProfile

In [3]:
x = torch.rand(256, 512)
linear = nn.Linear(512, 256).eval()

with torch.inference_mode():
    with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
        linear(x)
print(prof.key_averages().table())

----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
          aten::linear         4.86%      88.000us       100.00%       1.811ms       1.811ms             1  
               aten::t         2.15%      39.000us        26.50%     480.000us     480.000us             1  
       aten::transpose        24.02%     435.000us        24.35%     441.000us     441.000us             1  
      aten::as_strided         0.39%       7.000us         0.39%       7.000us       3.500us             2  
           aten::addmm        48.81%     884.000us        68.64%       1.243ms       1.243ms             1  
          aten::expand         1.05%      19.000us         1.10%      20.000us      20.000us             1  
           aten::co

STAGE:2023-11-15 17:32:58 39315:39315 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2023-11-15 17:32:58 39315:39315 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2023-11-15 17:32:58 39315:39315 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [6]:
from torch.ao.quantization import get_default_qconfig
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
from torch.ao.quantization import QConfigMapping

with torch.inference_mode():

    qconfig = get_default_qconfig('x86')
    qconfig_mapping = QConfigMapping().set_global(qconfig)
    example_inputs = torch.randn_like(x)
    prepared_model = prepare_fx(linear, qconfig_mapping, example_inputs)
    for _ in range(16):
        prepared_model(torch.randn_like(x))

    quantized_model = convert_fx(prepared_model)

    with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
        quantized_model(x)

print(prof.key_averages().table())
print(quantized_model.code)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
        aten::quantize_per_tensor        55.13%     543.000us        55.74%     549.000us     549.000us             1  
                       aten::item         0.71%       7.000us         1.02%      10.000us       2.500us             4  
        aten::_local_scalar_dense         0.30%       3.000us         0.30%       3.000us       0.750us             4  
                quantized::linear        39.80%     392.000us        41.32%     407.000us     407.000us             1  
    aten::_empty_affine_quantized         0.61%       6.000us         0.61%       6.000us       6.000us             1  
                    aten::q_scale       

STAGE:2023-11-15 17:41:34 39315:39315 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2023-11-15 17:41:34 39315:39315 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2023-11-15 17:41:34 39315:39315 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [8]:
from torch.ao.quantization import get_default_qconfig
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
from torch.ao.quantization import QConfigMapping

with torch.inference_mode():

    qconfig = get_default_qconfig('fbgemm')
    qconfig_mapping = QConfigMapping().set_global(qconfig)
    example_inputs = torch.randn_like(x)
    prepared_model = prepare_fx(linear, qconfig_mapping, example_inputs)
    for _ in range(16):
        prepared_model(torch.randn_like(x))

    quantized_model = convert_fx(prepared_model)

    with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
        quantized_model(x)

print(prof.key_averages().table())
print(quantized_model.code)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
        aten::quantize_per_tensor        19.57%      92.000us        20.64%      97.000us      97.000us             1  
                       aten::item         2.13%      10.000us         2.34%      11.000us       2.750us             4  
        aten::_local_scalar_dense         0.21%       1.000us         0.21%       1.000us       0.250us             4  
                quantized::linear        71.28%     335.000us        73.40%     345.000us     345.000us             1  
    aten::_empty_affine_quantized         0.85%       4.000us         0.85%       4.000us       4.000us             1  
                    aten::q_scale       

STAGE:2023-11-15 17:41:44 39315:39315 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2023-11-15 17:41:44 39315:39315 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2023-11-15 17:41:44 39315:39315 ActivityProfilerController.cpp:322] Completed Stage: Post Processing
