# Usage of GC.Analysis.API for CPU Analysis 

In [None]:
#r "nuget: Microsoft.Diagnostics.Tracing.TraceEvent, 3.1.13"
#r "nuget: XPlot.Plotly"
#r "nuget: XPlot.Plotly.Interactive"
#r "nuget: Microsoft.Data.Analysis"
#r "nuget: Newtonsoft.Json"
#r "nuget: YamlDotNet"

using Etlx = Microsoft.Diagnostics.Tracing.Etlx;
using Microsoft.Data.Analysis;
using Microsoft.Diagnostics.Tracing.Analysis.GC;
using Microsoft.Diagnostics.Tracing.Analysis;
using Microsoft.Diagnostics.Tracing.Parsers.Clr;
using Microsoft.Diagnostics.Tracing;
using XPlot.Plotly;

using System.IO;
using Newtonsoft.Json;

## Building and Using The GC Analysis API

In [None]:
dotnet build -c Release "..\..\GC.Analysis.API\"

In [4]:
#r "..\..\..\..\..\..\artifacts\bin\GC.Analysis.API\Release\net8.0\GC.Analysis.API.dll"

using GC.Analysis.API;

## Creating the Analyzer

In [5]:
string BASELINE_TRACE_PATH = @".\Traces\CPU_Baseline.etl.zip";
string COMPARAND_TRACE_PATH = @".\Traces\CPU_Comparand.etl.zip";
string processName = "Benchmarks";

In [6]:
Analyzer baselineAnalyzer = new Analyzer(tracePath: BASELINE_TRACE_PATH, processNames: new HashSet<string> { processName });

In [7]:
Analyzer comparandAnalyzer = new Analyzer(tracePath: COMPARAND_TRACE_PATH, processNames: new HashSet<string> { processName });

## Summarization

In [None]:
baselineAnalyzer.SummarizeTrace(processName: processName)

In [None]:
comparandAnalyzer.SummarizeTrace(processName: processName)

In [None]:
baselineAnalyzer.GetProcessGCData(processName).Single().Compare(new[] { comparandAnalyzer.GetProcessGCData(processName).Single() })

## CPU Analysis

### Creating the CPU Analysis

Ensure you have followed the instructions [here](https://github.com/dotnet/performance/tree/main/src/benchmarks/gc/GC.Infrastructure/GC.Analysis.API#setting-up-for-cpu-analysis) before proceeding for complete results.

In [11]:
baselineAnalyzer.AddCPUAnalysis(yamlPath: @"..\..\GC.Analysis.API\CPUAnalysis\DefaultMethods.yaml")

In [12]:
comparandAnalyzer.AddCPUAnalysis(yamlPath: @"..\..\GC.Analysis.API\CPUAnalysis\DefaultMethods.yaml")

In [13]:
CPUProcessData baselineCPUData  = baselineAnalyzer.CPUAnalyzer.GetCPUDataForProcessName(processName).Single();
CPUProcessData comparandCPUData = comparandAnalyzer.CPUAnalyzer.GetCPUDataForProcessName(processName).Single();

### Summarization

#### Major GC Phase Summarization

In [None]:
baselineCPUData.GetPerPhaseSummary()

In [None]:
baselineCPUData.Compare(new [] { comparandCPUData })

#### Summarization By GC Type and Generation

In [None]:
comparandCPUData.GetPerGenerationSummary() // TODO: Check where the background phases went. BGC count - attribution.

#### Aux Methods

In [None]:
baselineCPUData.GetPerGCMethodCost("gc_heap::mark_phase").Select(gc => new { Number = gc.GC.Number, Count = gc.Count })

In [None]:
comparandCPUData.GetPerGCMethodCost(methodName: "gc_heap::mark_through_cards_for_segments", 
                                   caller: "gc_heap::relocate_phase").Select(gc => new { Number = gc.GC.Number, Count = gc.Count })

### Charting

#### Charting Counts For a Single GC Method

##### Charting Counts For a Single GC Method Using A Sorted Dictionary 

In [None]:
(string, List<CPUInfo>) data = 
    ("gc_heap::mark_phase", comparandCPUData.GetPerGCMethodCost("gc_heap::mark_phase"));

CPUCharting.ChartCountForGCMethod(data, "Mark Phase Data")

##### Charting Counts For a Single Method GC Using CPU Process Data

In [None]:
// With Inclusive Count.
comparandCPUData.ChartCountForGCMethod(methodName: "gc_heap::plan_phase", title: "Plan Phase")

In [None]:
// With Exclusive Count.
baselineCPUData.ChartCountForGCMethod(methodName: "gc_heap::plan_phase", title: "Plan Phase - Exclusive Count", isInclusiveCount: false)

### Charting Counts For a Single GC Method With Caller

In [None]:
comparandCPUData.ChartCountForGCMethod(methodName: "gc_heap::mark_through_cards_for_segments", 
                                      title: "Inc. cost of mark_through_cards_for_segments for the Mark Phase", 
                                      caller: "gc_heap::mark_phase")

In [None]:
baselineCPUData.ChartCountForGCMethod(methodName: "gc_heap::mark_through_cards_for_segments", 
                                      title: "Inc. cost of mark_through_cards_for_segments for the Relocate Phase", 
                                      caller: "gc_heap::relocate_phase")

#### Charting Counts For Multiple GC Methods

##### Charting Counts For Multiple GC Methods Using A List

In [None]:
(string, List<CPUInfo>) markPhaseData = 
    ("gc_heap::mark_phase", baselineCPUData.GetPerGCMethodCost("gc_heap::mark_phase"));

(string, List<CPUInfo>) planPhaseData = 
    ("gc_heap::plan_phase", baselineCPUData.GetPerGCMethodCost("gc_heap::plan_phase"));

(string, List<CPUInfo>) relocate_phase = 
    ("gc_heap::relocate_phase", baselineCPUData.GetPerGCMethodCost("gc_heap::relocate_phase"));

CPUCharting.ChartCountForGCMethods(new [] { markPhaseData, planPhaseData, relocate_phase }, "Custom Phase Data")

In [None]:
(string, List<CPUInfo>) markPhaseData = 
    ("gc_heap::mark_phase", comparandCPUData.GetPerGCMethodCost(methodName: "gc_heap::mark_phase", gcsToConsider: new HashSet<int>{ 196, 200 }));

(string, List<CPUInfo>) planPhaseData = 
    ("gc_heap::plan_phase", comparandCPUData.GetPerGCMethodCost(methodName: "gc_heap::plan_phase", gcsToConsider: new HashSet<int> { 196, 200 }) );

CPUCharting.ChartCountForGCMethods(new [] { markPhaseData, planPhaseData }, "Custom Phase Data")

##### Charting Counts For The Same Phase Between For Different Traces

In [None]:
(string, List<CPUInfo>) markPhaseData = 
    ("gc_heap::mark_phase - Baseline", baselineCPUData.GetPerGCMethodCost(methodName: "gc_heap::mark_phase"));

(string, List<CPUInfo>) planPhaseData = 
    ("gc_heap::plan_phase - Comparand", comparandCPUData.GetPerGCMethodCost(methodName: "gc_heap::mark_phase"));

CPUCharting.ChartCountForGCMethods(new [] { markPhaseData, planPhaseData }, "Mark Phase: Baseline vs. Comparand")

##### Charting Counts For Multiple GC Methods Using CPU Process Data

In [None]:
baselineCPUData.ChartCountForGCMethods(methodNames: new [] { "gc_heap::mark_phase", "gc_heap::plan_phase", "gc_heap::relocate_phase", "gc_heap::compact_phase" }, 
                                 title: "Major GC Phases")

#### Charting GC Data With CPU Data

In [28]:
GCProcessData processData = comparandAnalyzer.GetProcessGCData(processName).Single();

In [None]:
(string, List<CPUInfo>) cpuData = ("Mark Phase", baselineCPUData.GetPerGCMethodCost("gc_heap::mark_phase"));
(string, List<double>) gcData  = ("Msec", processData.GCs.Select(gc => gc.PauseDurationMSec));
CPUCharting.ChartCountForGCMethodWithGCData(cpuData, gcData, "Mark Phase Inc. Count vs. Pause Duration (MSec)")

### Generic Charting

In [None]:
(string, List<CPUInfo>) markPhaseData = 
    ("gc_heap::mark_phase", baselineCPUData.GetPerGCMethodCost(methodName: "gc_heap::mark_phase"));

bool Gen0Filter(CPUInfo cpuInfo)
{
    return cpuInfo.GC.Generation == 0;
}

bool Gen0Filter(TraceGC gc)
{
    return gc.Generation == 0;
}

List<CPUInfo> filteredGen0CPUData = markPhaseData.Item2.Where(Gen0Filter);
List<TraceGC> traceGC = processData.GCs.Where(Gen0Filter);

GCCharting.ChartGCData("Mark Phase Inc. Count Per GC", 
                       new AxisInfo 
                       { 
                           Name = "Mark Phase Inc", 
                           XAxis = traceGC.Select(gc => (double)gc.Number), 
                           YAxis = filteredGen0CPUData.Select(gc => (double)gc.Count) 
                       })

### Mark Phase Analysis

#### Chart Per Generation and Type 

In [None]:
processData.ChartStatisticsOfMarkPhaseByType(generation: 0, type: MarkRootType.MarkStack)

#### Chart All Mark Root Types For a Generation 

In [None]:
processData.ChartAverageMarkPhaseTimeByMarkType(generation: 0, types: new [] { MarkRootType.MarkStack, MarkRootType.MarkHandles, MarkRootType.MarkOlder })

## Source Code Analysis

In [None]:
baselineCPUData.SetSourcePath(@"C:\runtime\");
baselineCPUData.Annotate("gc_heap::garbage_collect")

## Affinitized CPU Analysis

### Summary

In [33]:
var gcData = baselineAnalyzer.GetProcessGCData(processName).First();

Dictionary<string, AffinitizedCPUData> get_stats_gcs_with_large_diff(List<TraceGC> gcs, double threshold)
{
    // Per process name affinitized cpu data.
    Dictionary<string, AffinitizedCPUData> data = new();

    for(int i = 0; i < gcs.Count; i++)
    {
        double diff_between_suspend_ee_stop_and_gc_start = gcs[i].StartRelativeMSec - (gcs[i].PauseStartRelativeMSec + gcs[i].SuspendDurationMSec);
        if (diff_between_suspend_ee_stop_and_gc_start > threshold)
        {
            // Processor -> < ProcessID, AffinitizedCPUData >
            Dictionary<int, Dictionary<int, AffinitizedCPUData>> affinitizedAnalysis = gcData.GetAffinitizedAnalysis(gcs[i].PauseStartRelativeMSec + gcs[i].SuspendDurationMSec, gcs[i].StartRelativeMSec);
            foreach(var processor in affinitizedAnalysis)
            {
                foreach(var process in processor.Value)
                {
                    if (!data.TryGetValue(process.Value.Name, out var affinitizedCPUData))
                    {
                        data[process.Value.Name] = affinitizedCPUData = new AffinitizedCPUData
                        {
                            Name = process.Value.Name,
                            ProcessorNumber = -1, // All processors
                        };
                    }

                    affinitizedCPUData.NumberOfSamples += process.Value.NumberOfSamples;
                }
            }
        }
    }

    return data;
}

Dictionary<string, AffinitizedCPUData> stats = get_stats_gcs_with_large_diff(baselineAnalyzer.GetProcessGCData(processName).First().GCs, 8);

#### Number of Samples

In [None]:
float total_number_of_samples_from_non_gc_threads = 0;
float total_number_of_samples_from_gc_threads = 0;

foreach(var process_data in stats)
{
    if (process_data.Key != "GC Thread")
    {
        total_number_of_samples_from_non_gc_threads += process_data.Value.NumberOfSamples;
    }

    else
    {
        total_number_of_samples_from_gc_threads += process_data.Value.NumberOfSamples;
    }
}

Console.WriteLine($"Number of Samples from Threads >= GC Thread Priority of 14: {total_number_of_samples_from_non_gc_threads}");
Console.WriteLine($"Number of Samples from GC Threads : {total_number_of_samples_from_gc_threads}");

#### Per Process Summarization

In [35]:
// Get total samples

float total_samples = 0;
foreach (var process_data in stats)
{
    total_samples += process_data.Value.NumberOfSamples;
}

StringDataFrameColumn process_name = new("Process Name");
DoubleDataFrameColumn number_of_samples = new("Number of Samples");
DoubleDataFrameColumn percentage_of_samples = new("% of Samples");

foreach (var process_data in stats)
{
    process_name.Append(process_data.Key);
    number_of_samples.Append(process_data.Value.NumberOfSamples);
    percentage_of_samples.Append(Math.Round(process_data.Value.NumberOfSamples / total_samples * 100, 2));
    Console.WriteLine($"For: {process_data.Key}, Number of Samples: {process_data.Value.NumberOfSamples}, % of Samples: {Math.Round((process_data.Value.NumberOfSamples / total_samples) * 100, 2)}%");
}

var df = new DataFrame(process_name, number_of_samples, percentage_of_samples);

#### Save Results To Markdown 

In [None]:
df.Display();
df.ToMarkdown("./AffinitizedResults.md");

## Debugging

In [None]:
Console.WriteLine($"Current Process ID: {System.Diagnostics.Process.GetCurrentProcess().Id}");

#!about