# Tutorial 3: PAPI profiling PyTorch on CPU and GPU

In this tutorial we will see how `CyPAPI` can be used to profile computation executed by PyTorch on CPU as well as GPU.

The computation to profile is simply
- Create two 1000x1000 matrices populated with random numbers
- Perform matrix multiplication and get the resulting matrix

In [3]:
from cypapi import *
pyPAPI_library_init()

In [4]:
import torch

## Running on the CPU

In [3]:
eventset = PyPAPI_EventSet()

In [4]:
eventset.cleanup()
eventset.add_named_event('perf::INSTRUCTIONS')
eventset.add_named_event('perf::CPU-CYCLES')

In [5]:
# Set random seed for reproducibility
eventset.start()
torch.manual_seed(42)

# Generate random matrices
matrix_A = torch.rand(1000, 1000)
matrix_B = torch.rand(1000, 1000)

# Perform matrix multiplication
result = torch.mm(matrix_A, matrix_B)

# Measure events and print
values = eventset.stop()
print(values)

# Print the matrices and the result
print("Matrix A:")
print(matrix_A)
print("\nMatrix B:")
print(matrix_B)
print("\nMatrix multiplication result:")
print(result)


[140032280, 48909058]
Matrix A:
tensor([[0.8823, 0.9150, 0.3829,  ..., 0.5472, 0.4700, 0.0297],
        [0.7294, 0.2729, 0.2407,  ..., 0.7834, 0.1775, 0.4530],
        [0.3940, 0.1965, 0.7266,  ..., 0.8451, 0.0134, 0.5785],
        ...,
        [0.2562, 0.4331, 0.6588,  ..., 0.7695, 0.6146, 0.7825],
        [0.1376, 0.6645, 0.2203,  ..., 0.7154, 0.8790, 0.2222],
        [0.5079, 0.6174, 0.7933,  ..., 0.0663, 0.4269, 0.6613]])

Matrix B:
tensor([[0.5722, 0.8092, 0.4314,  ..., 0.6777, 0.5199, 0.4520],
        [0.7586, 0.1246, 0.9024,  ..., 0.5859, 0.3754, 0.9578],
        [0.6388, 0.5677, 0.4634,  ..., 0.9149, 0.0695, 0.9546],
        ...,
        [0.7106, 0.9383, 0.1809,  ..., 0.3854, 0.7740, 0.6266],
        [0.2585, 0.1205, 0.9097,  ..., 0.9540, 0.9706, 0.5621],
        [0.6839, 0.7743, 0.9107,  ..., 0.6092, 0.9388, 0.6612]])

Matrix multiplication result:
tensor([[235.0628, 243.5534, 255.2339,  ..., 244.6632, 245.5978, 239.0247],
        [249.6568, 255.2137, 261.4881,  ..., 255.2853,

## Running on the GPU

In [5]:
evtsetgpu = PyPAPI_EventSet()

In [None]:
evtsetgpu.cleanup()
evtsetgpu.add_named_event('cuda:::dram__bytes_read.sum:device=0')
evtsetgpu.add_named_event('cuda:::sm__warps_launched.sum:device=0')

In [1]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

evtsetgpu.start()
# Generate random matrices on the GPU
matrix_A = torch.rand(1000, 1000, device=device)
matrix_B = torch.rand(1000, 1000, device=device)

# Perform matrix multiplication
result = torch.mm(matrix_A, matrix_B)

# Transfer the result back to CPU if needed
result_cpu = result.to("cpu")

valuesgpu = evtsetgpu.stop()
print(valuesgpu)

# Print the matrices and the result
print("Matrix A:")
print(matrix_A)
print("\nMatrix B:")
print(matrix_B)
print("\nMatrix multiplication result:")
print(result_cpu)


NameError: name 'torch' is not defined