# Volatility Report: To Quantify Volatility of Multiple Runs

In [None]:
#r "nuget: Microsoft.Diagnostics.Tracing.TraceEvent, 3.0.1"
#r "nuget: YamlDotnet" 
#r "nuget: XPlot.Plotly"
#r "nuget: XPlot.Plotly.Interactive"
#r "nuget: Microsoft.Data.Analysis, 0.19.1"
#r "nuget: Newtonsoft.Json"
#r "nuget: Microsoft.Playwright, 1.16.0"

using Etlx = Microsoft.Diagnostics.Tracing.Etlx;
using Microsoft.Data.Analysis;
using Microsoft.Diagnostics.Tracing.Analysis.GC;
using Microsoft.Diagnostics.Tracing.Analysis;
using Microsoft.Diagnostics.Tracing.Parsers.Clr;
using Microsoft.Diagnostics.Tracing;
using XPlot.Plotly;

using System.IO;
using Newtonsoft.Json;

In [None]:
#r "C:\GC.Analysis.API\GC.Analysis.API\bin\Debug\net6.0\GC.Analysis.API.dll"

using GC.Analysis.API;

## Helper Methods

In [None]:
// Consolidation of the Runs from the Infrastructure.
public void MoveFiles(string basePath)
{
    var etlZipFiles = Directory.GetFiles(basePath, "*.etl.zip");

    foreach (var etlZipFile in etlZipFiles)
    {
        string newPath = "";
        if (etlZipFile.Contains("baseline"))
        {
            newPath = etlZipFile.Replace("baseline.", "");
        }
        
        else
        {
            string replaced = Path.GetFileName(etlZipFile).Replace("run.", "");
            string[] split = replaced.Split(".");
            string newVal = (int.Parse(split[1]) + 10).ToString();
            newPath = basePath + split[0] + "." + newVal + "." + split[2] + "." + split[3];
        }

        File.Move(etlZipFile, newPath);
    }
}

public List<GCProcessData> GetTraces(string basePath)
{
    Dictionary<string, Analyzer> traces = AnalyzerManager.GetAllAnalyzers(basePath);
    List<GCProcessData> data = new();
    foreach (var t in traces)
    {
        GCProcessData processData = t.Value.GetProcessGCData("corerun").First();
        data.Add(processData);
    }

    return data;
}

public class MinMaxDiff
{
    public MinMaxDiff(IEnumerable<double> data)
    {
        Max = data.Max();
        Min = data.Min();
        All = data.ToList();
        var rest = data.ToList();
        rest.Remove(Min);
        rest.Remove(Max);
        Rest = rest;
    }

    public double Max { get; }
    public double Min { get; }
    public double VolPercent => Min != 0 ? Vol / Min * 100 : double.PositiveInfinity;
    public double Vol => (Max - Min);
    public double Mean => All.Average();
    public IReadOnlyList<double> All { get; }
    public IReadOnlyList<double> Rest { get; }
}

public void ShowVolatility(IEnumerable<double> data)
{
    var layout = new Layout.Layout
    {
        xaxis = new Xaxis { title = "Iteration #" },
        yaxis = new Yaxis { title = "Execution Time Diff %" },
        title = "Execution Time Diff % vs. Iterations",
    };

    var first = data.First(); 

    var scatter = new Scatter
    {
        x = Enumerable.Range(0, data.Count()), 
        y = data.Select(g =>  (g - first) / first * 100),
        mode = "markers+lines",
    };

    Chart.Plot(scatter, layout).Display();
}

public void ShowVolatilityComparison(IEnumerable<GCProcessData> baseline, IEnumerable<GCProcessData> comparand)
{
    var layout = new Layout.Layout
    {
        xaxis = new Xaxis { title = "Iteration #" },
        yaxis = new Yaxis { title = "Execution Time Diff %" },
        title = "Execution Time Diff % vs. Iterations",
    };

    List<Scatter> scatters = new();

    // Baseline.
    Scatter GetScatter(IEnumerable<GCProcessData> data, string name)
    {
        var first = data.First(); 

        var scatter = new Scatter
        {
            x = Enumerable.Range(0, data.Count()), 
            y = data.Select(g =>  (g.DurationMSec - first.DurationMSec) / first.DurationMSec * 100),
            mode = "markers+lines",
            name = name
        };

        return scatter;
    }

    Chart.Plot(new [] { GetScatter(baseline, "baseline"), GetScatter(comparand, "comparand")}, layout).Display();
}

public void SummarizeVolatility(List<GCProcessData> gcData)
{
    StringDataFrameColumn criteriaColumn   = new("Criteria"); 
    StringDataFrameColumn volPercentColumn = new("Vol %"); 
    StringDataFrameColumn meanColumn       = new("Mean"); 
    StringDataFrameColumn volColumn        = new("Vol"); 
    StringDataFrameColumn minColumn        = new("Min"); 
    StringDataFrameColumn maxColumn        = new("Max"); 

    // Duration MSec.
    MinMaxDiff durationMinMaxDiff    = new MinMaxDiff(gcData.Select(gc => gc.DurationMSec));
    criteriaColumn.Append("Execution Time (MSec)");
    volPercentColumn.Append(durationMinMaxDiff.VolPercent.ToString("N2"));
    meanColumn.Append(durationMinMaxDiff.Mean.ToString("N2"));
    volColumn.Append(durationMinMaxDiff.Vol.ToString("N2"));
    minColumn.Append(durationMinMaxDiff.Min.ToString("N2"));
    maxColumn.Append(durationMinMaxDiff.Max.ToString("N2"));

    // Total Allocations MB.
    MinMaxDiff allocationMinMaxDiff = new MinMaxDiff(gcData.Select(gc => gc.Stats.TotalAllocatedMB));
    criteriaColumn.Append("Total Allocations (MB)");
    volPercentColumn.Append(allocationMinMaxDiff.VolPercent.ToString("N2"));
    meanColumn.Append(allocationMinMaxDiff.Mean.ToString("N2"));
    volColumn.Append(allocationMinMaxDiff.Vol.ToString("N2"));
    minColumn.Append(allocationMinMaxDiff.Min.ToString("N2"));
    maxColumn.Append(allocationMinMaxDiff.Max.ToString("N2"));

    // Total GC Pause Time. 
    MinMaxDiff totalGCPauseTime = new MinMaxDiff(gcData.Select(gc => gc.Stats.TotalPauseTimeMSec)); 
    criteriaColumn.Append("Total GC Pause Time (MSec)");
    volPercentColumn.Append(totalGCPauseTime.VolPercent.ToString("N2"));
    meanColumn.Append(totalGCPauseTime.Mean.ToString("N2"));
    volColumn.Append(totalGCPauseTime.Vol.ToString("N2"));
    minColumn.Append(totalGCPauseTime.Min.ToString("N2"));
    maxColumn.Append(totalGCPauseTime.Max.ToString("N2"));

    // GC Pause Contribution %.
    MinMaxDiff contributionPercentByGC = new MinMaxDiff(gcData.Select(gc => gc.Stats.TotalPauseTimeMSec / gc.DurationMSec * 100));
    criteriaColumn.Append("GC Pause Contribution %");
    volPercentColumn.Append(contributionPercentByGC.VolPercent.ToString("N2"));
    meanColumn.Append(contributionPercentByGC.Mean.ToString("N2"));
    volColumn.Append(contributionPercentByGC.Vol.ToString("N2"));
    minColumn.Append(contributionPercentByGC.Min.ToString("N2"));
    maxColumn.Append(contributionPercentByGC.Max.ToString("N2"));

    var volDataframe = new DataFrame(criteriaColumn, volPercentColumn, meanColumn, volColumn, minColumn, maxColumn);
    volDataframe.Display();

    ShowVolatility(gcData.Select(gc => gc.DurationMSec));

    GCProcessData min = gcData.MinBy(gc => gc.DurationMSec);
    GCProcessData max = gcData.MaxBy(gc => gc.DurationMSec);
    min.CompareNormalizedByMaxTotalAllocations(new []  { max }).Display();
}

public void SummarizeComparativeVolatility(List<GCProcessData> baseline, List<GCProcessData> comparand)
{
    StringDataFrameColumn criteriaColumn   = new("Criteria"); 
    StringDataFrameColumn baselineVolPercentageColumn = new("Baseline Vol %"); 
    StringDataFrameColumn comparandVolPercentageColumn = new("Comparand Vol %"); 
    StringDataFrameColumn volPercentColumn = new("Δ Vol %"); 

    // Duration MSec.
    MinMaxDiff baselineDurationMinMaxDiff  = new MinMaxDiff(baseline.Select(gc => gc.DurationMSec));
    MinMaxDiff comparandDurationMinMaxDiff = new MinMaxDiff(comparand.Select(gc => gc.DurationMSec));

    criteriaColumn.Append("Execution Time (MSec)");
    baselineVolPercentageColumn.Append(baselineDurationMinMaxDiff.VolPercent.ToString("N2"));
    comparandVolPercentageColumn.Append(comparandDurationMinMaxDiff.VolPercent.ToString("N2"));
    volPercentColumn.Append((comparandDurationMinMaxDiff.VolPercent - baselineDurationMinMaxDiff.VolPercent).ToString("N2"));

    // Total Allocation.
    MinMaxDiff baselineTotalAllocationMinMaxDiff  = new MinMaxDiff(baseline.Select(gc => gc.Stats.TotalAllocatedMB));
    MinMaxDiff comparandTotalAllocationMinMaxDiff = new MinMaxDiff(comparand.Select(gc => gc.Stats.TotalAllocatedMB));
    criteriaColumn.Append("Total Allocations (MB)");
    baselineVolPercentageColumn.Append(baselineTotalAllocationMinMaxDiff.VolPercent.ToString("N2"));
    comparandVolPercentageColumn.Append(comparandTotalAllocationMinMaxDiff.VolPercent.ToString("N2"));
    volPercentColumn.Append((comparandTotalAllocationMinMaxDiff.VolPercent - baselineTotalAllocationMinMaxDiff.VolPercent).ToString("N2"));

    // Total GC Pause Time.
    MinMaxDiff baselineTotalPauseTimeMinMaxDiff  = new MinMaxDiff(baseline.Select(gc => gc.Stats.TotalAllocatedMB));
    MinMaxDiff comparandTotalPauseTimeMinMaxDiff = new MinMaxDiff(comparand.Select(gc => gc.Stats.TotalAllocatedMB));
    criteriaColumn.Append("Total Pause Time (MSec)");
    baselineVolPercentageColumn.Append(baselineTotalPauseTimeMinMaxDiff.VolPercent.ToString("N2"));
    comparandVolPercentageColumn.Append(comparandTotalPauseTimeMinMaxDiff.VolPercent.ToString("N2"));
    volPercentColumn.Append((comparandTotalPauseTimeMinMaxDiff.VolPercent - baselineTotalAllocationMinMaxDiff.VolPercent).ToString("N2"));

    // GC Pause Contribution % 
    MinMaxDiff baselineGCPauseContributionMinMaxDiff = new MinMaxDiff(baseline.Select(gc => gc.Stats.TotalPauseTimeMSec / gc.DurationMSec * 100));
    MinMaxDiff comparandGCPauseContributionMinMaxDiff = new MinMaxDiff(comparand.Select(gc => gc.Stats.TotalPauseTimeMSec / gc.DurationMSec * 100));
    criteriaColumn.Append("GC Pause Contribution %");
    baselineVolPercentageColumn.Append(baselineGCPauseContributionMinMaxDiff.VolPercent.ToString("N2"));
    comparandVolPercentageColumn.Append(comparandGCPauseContributionMinMaxDiff.VolPercent.ToString("N2"));
    volPercentColumn.Append((baselineGCPauseContributionMinMaxDiff.VolPercent - comparandGCPauseContributionMinMaxDiff.VolPercent).ToString("N2"));

    DataFrame dataFrame = new DataFrame(criteriaColumn, baselineVolPercentageColumn, comparandVolPercentageColumn, volPercentColumn);
    dataFrame.Display();

    ShowVolatilityComparison(baseline, comparand);
}

// File based.

public List<double> GetExecutionTimesFromFiles(string path)
{
    List<double> executionTimes = new();

    string[] files = Directory.GetFiles(path);
    foreach (var f in files)
    {
        string text = File.ReadAllText(f);
        string secondLine = text.Split("\n")[1];
        executionTimes.Add(long.Parse(secondLine.Split(" ")[1].Replace("ms", "")));
    }

    return executionTimes;
}

public Dictionary<string, List<double>> GetAllExecutionTimesFromFiles(string basePath)
{
    Dictionary<string, List<double>> executionTimes = new();
    string[] directories = Directory.GetDirectories(basePath);
    foreach (var d in directories)
    {
        var dir = new DirectoryInfo(d);
        var dirName = dir.Name;
        List<double> times = GetExecutionTimes(d);
        executionTimes[dirName] = times; 
    }

    return executionTimes;
}

public void SummarizeFileVolatility(string basePath)
{
    Dictionary<string, List<double>> data = GetAllExecutionTimes(basePath);
    StringDataFrameColumn scenarioColumn   = new("Scenario"); 
    StringDataFrameColumn volPercentColumn = new("Vol %"); 
    StringDataFrameColumn meanColumn       = new("Mean"); 
    StringDataFrameColumn volColumn        = new("Vol"); 
    StringDataFrameColumn minColumn        = new("Min"); 
    StringDataFrameColumn maxColumn        = new("Max"); 

    foreach (var d in data)
    {
        MinMaxDiff durationMinMaxDiff = new MinMaxDiff(d.Value);

        scenarioColumn.Append(d.Key);
        volPercentColumn.Append(durationMinMaxDiff.VolPercent.ToString("N2"));
        meanColumn.Append(durationMinMaxDiff.Mean.ToString("N2"));
        volColumn.Append(durationMinMaxDiff.Vol.ToString("N2"));
        minColumn.Append(durationMinMaxDiff.Min.ToString("N2"));
        maxColumn.Append(durationMinMaxDiff.Max.ToString("N2"));
    }

    DataFrame dataFrame = new(scenarioColumn, volPercentColumn, meanColumn, volColumn, minColumn, maxColumn); 
    dataFrame.Display();
}

# Volatility Based on Trace Duration

In [None]:
string BASE_PATH = @"ADD YOUR PATH.";

var traces = GetTraces(BASE_PATH);

## Summarize For A Single Set of Runs

In [None]:
SummarizeVolatility(traces)

## Summarize Comparison

In [None]:
var baselinePath  = @"SOME BASELINE PATH";
var comparandPath = @"SOME COMPARAND PATH";

List<GCProcessData> baselineTraces  = GetTraces(baselinePath);
List<GCProcessData> comparandTraces = GetTraces(comparandPath);

In [None]:
SummarizeComparativeVolatility(baselineTraces, comparandTraces)

## Debugging

In [None]:
System.Diagnostics.Process.GetCurrentProcess().Id

In [None]:
#!about