In [None]:
from collections import namedtuple
import numpy as np
import pandas as pd
import torch

In [None]:
pd.options.display.max_colwidth = 200

In [None]:
file = '/workspace/DeepLearningMisc/resnet152_b80-nvtx.csv'
initdf = pd.read_csv(file, skiprows=5,
                 names=['start','duration','gridX','gridY','gridZ','blockX','blockY','blockZ',
                        'registersPerThread','staticSMem','dynamicSMem','size','throughput',
                        'srcMemType','dstMemType','device','context','stream','name','corrid'])
# staticSMem - KB, dynamicSMem - KB, size - MB, throughput - GB/s
# print(initdf.shape)
initdf.drop(['gridX','gridY','gridZ','blockX','blockY','blockZ','srcMemType','dstMemType','device'], axis=1, inplace=True)

# demangling the name
initdf['name'] = initdf['name'].apply(torch._C._demangle)

startprof = initdf.index[initdf['name'].str.contains("\[Marker\] __start_profile")].tolist()
assert len(startprof) == 1
stopprof = initdf.index[initdf['name'].str.contains("\[Marker\] __stop_profile")].tolist()
assert len(stopprof) == 1
initdf = initdf.loc[startprof[0]:stopprof[0], :] 
# print(initdf.shape)

df = initdf.dropna(subset=['registersPerThread','staticSMem','dynamicSMem','size','throughput'], how='all')
# print(df.shape)
# display(df.head())

## Working with markers

In [None]:
#contains markers and Cuda Launch Kernels
markers = initdf[(initdf['name'].str.contains("\[Range start\]")) | (initdf['name'].str.contains("\[Range end\]")) | (initdf['name'].str.contains("Marker")) | (initdf['name'] == "cudaLaunchKernel")]
# print(markers.shape)
# display(markers.head(50))

In [None]:
Marker = namedtuple('Marker', 'index name depth')
# Op = namedtuple('Op', [])

In [None]:
stack = []
opsToCorrid = {}  # marker operation index -> cuda launch kernel correlation id

# # Helper dicts (not essential)
# opsIndexToName = {}  # marker operation index -> pytorch operation name

for index, row in markers.iloc[1:-1].iterrows():
    name = row['name']
    if "[Range start]" in name:
        marker = Marker(index, name.replace('[Range start] ', '').replace(' (Domain: <unnamed>)',''), len(stack))
#         print("pushing into stack: {}, {}".format(index, row['name']))
        stack.append(marker)
        opsToCorrid[marker] = []
        
    elif "[Range end]" in name:
        marker = stack.pop()
        top = markers.loc[marker.index, 'name']
        match = top.replace("start","end")
        if match != name:
#             print("Popped from stack: {}, {}".format(i, top))
#         else:
            print(" *** does not match; this shouldn't happen ideally")
            stack.append(marker)
        
    elif name == "cudaLaunchKernel":
#         print("cuda launch kernel: {}".format(row['corrid']))
        for marker in stack:
            opsToCorrid[marker].append(row['corrid'])
        if len(stack) == 0:
            print(" *** Kernel with corrid: {} doesn't lie between any markers".format(row['corrid']))
    else:
        print(" *** wrong option")

# print(len(opsToCorrid))

In [None]:
delkeys = []
for i, corrids in opsToCorrid.items():
    if len(corrids) == 0:
        delkeys.append(i)

for key in delkeys:
    opsToCorrid.pop(key, None)
    
# print(len(opsToCorrid))

In [None]:
allCorrids = []
for i, corrids in opsToCorrid.items():
    allCorrids.extend(corrids)
print(len(allCorrids))

allCorrids = set(allCorrids)
# print(len(allCorrids))

# print(opsToCorrid[7096])

In [None]:
corridToKernelIndex = {}  # cuda launch kernel correlation id -> index of kernel with corresponding correlation id
# Helper dicts (not essential)
kernelIndexToRow = {}

for corrid in allCorrids:
    rowIndex = df.index[df['corrid'] == int(corrid)].tolist()
    assert len(rowIndex) == 1, "multiple kernels with same corrid"
    corridToKernelIndex[corrid] = rowIndex[0]
    
    # can remove
    kernelIndexToRow[rowIndex[0]] = df.loc[rowIndex[0]]
# print(len(corridToKernelIndex))

In [None]:
opsToKernelIndex = {}  # marker operation index -> index of corresponding kernel call
for opIndex, corrids in opsToCorrid.items():
    opsToKernelIndex[opIndex] = []
    for corrid in corrids:
        opsToKernelIndex[opIndex].append(corridToKernelIndex[corrid])

print(len(opsToKernelIndex))
# print(opsToKernelIndex)  # add 6 to indices to get line numbers in csv

In [None]:
mappingdf = pd.DataFrame(list([marker.name, 
                               marker.depth, 
                               [kernelIndexToRow[kid]['name']+"["+str(kernelIndexToRow[kid]['stream'])+"]" for kid in kids], 
                               len(kids),
                               kernelIndexToRow[kids[0]]['start'],
                               sum([kernelIndexToRow[kid]['duration'] for kid in sorted(kids)])
                              ] for marker,kids in opsToKernelIndex.items()), 
                         columns=['pyName', 'depth', 'kernelNames', 'numKernels', 'startTime', 'sumDuration'])
# print(mappingdf.shape)
# display(mappingdf.head(50))
mappingdf.to_csv('/workspace/logs/resnet152_b80-mapping.csv')

In [None]:
topdf = mappingdf[mappingdf['depth']==0]
# print(topdf.shape)
# display(topdf.head())

In [None]:
topdf['duration'] = topdf['startTime'].shift(-1, axis=0) - topdf['startTime']
# print(topdf.shape)
# display(topdf.head())
topdf.to_csv('/workspace/logs/resnet152_b80-mapping-top.csv')