# NCCL GPU Overlap Trace

This notebook provides the percentage of overlap for communication and compute kernels.

In [None]:
import pandas as pd
import plotly.offline as pyo

from IPython.display import display, HTML, Markdown
import matplotlib.pyplot as plt

import nsys_display

display(HTML("<style>.container { width:95% !important; }</style>"))
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pyo.init_notebook_mode()

## Statics

this part shows the compute, communication statics

In [None]:
# input compute, and each communication streamID.
COMPUTE_ID =        # 7
TP_ID =             # 60
PP_ID =             #56     # include bubble time
DP_ID =             #72
Others_ID =         # [1,2,3]

In [None]:
stream_df = pd.read_parquet('grouped_stream.parquet')
total_duration = (stream_df["Duration"].sum()-stream_df["Communication Overlap Duration"].sum())
compute_duration = stream_df.loc[COMPUTE_ID]["Exclude Compute Overlap Duration"]
compute_percent = compute_duration/total_duration*100
tp_duration = stream_df.loc[TP_ID]["Exclude Compute Overlap Duration"]
tp_percent = tp_duration/total_duration*100
pp_duration = stream_df.loc[PP_ID]["Exclude Compute Overlap Duration"]
pp_percent = pp_duration/total_duration*100
dp_duration = stream_df.loc[DP_ID]["Exclude Compute Overlap Duration"]
dp_percent = dp_duration/total_duration*100
others_duration = stream_df.loc[Others_ID]["Exclude Compute Overlap Duration"].sum()
others_percent = others_duration/total_duration*100

In [None]:
sizes = [compute_percent, tp_percent, pp_percent, dp_percent, others_percent]
sizes_round = [round(num, 2) for num in sizes]
labels = ['compute','tp','pp','dp','others']
explode = (0, 0.6, 0.5, 0.4, 0.3)  # 突出第二个扇区
colors = ['green', 'yellowgreen', 'blue', 'lightskyblue', 'gold']

plt.pie(sizes_round, explode=explode, labels=labels, colors=colors,startangle=140)

# 创建包含数据比例的新标签
percentages = [f'{label} - {size}%' for label, size in zip(labels, sizes_round)]

# 添加图例
plt.legend(title="Compute Communication percent", labels=percentages, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

plt.axis('equal')  # 确保饼图是圆的
plt.show()

## Per-kernel traces

The table displays overlap percentages for each kernel, corresponding to the individual rank selected from the drop-down menu.
All time values are in nanoseconds.

In [None]:
df = pd.read_parquet('rank_trace.parquet')
nsys_display.display_table_per_rank(df)

## Grouped traces

The table presents overlap percentages for each kernel, grouped by kernel name across all ranks.

In [None]:
grouped_df = pd.read_parquet('grouped_trace.parquet')
display(grouped_df)

## Files

The table associates each rank number with the original filename. Ranks are assigned assuming that the file names include the rank with sufficient zero padding for proper sorting. Otherwise, the actual rank may differ from the assigned ID.

In [None]:
files_df = pd.read_parquet("files.parquet")
display(files_df)