From 743c88978ab88da7dd20d53dddca6bc8c0f07e15 Mon Sep 17 00:00:00 2001 From: lancelui-amzn Date: Mon, 13 Oct 2025 14:58:10 -0500 Subject: [PATCH] add aperf standard report data format (#47) * add aperf standard report data format * modify graph data format, add already processed data handling --- src/data.rs | 11 +++- src/data/cpu_utilization.rs | 45 +++++++++++++ src/data/data_formats.rs | 119 ++++++++++++++++++++++++++++++++-- src/data/java_profile.rs | 29 +++++---- src/html_files/flamegraphs.ts | 13 ++-- src/lib.rs | 11 ++++ src/report.rs | 46 ++++++++++++- src/visualizer.rs | 43 ++++++++++++ 8 files changed, 292 insertions(+), 25 deletions(-) diff --git a/src/data.rs b/src/data.rs index 2fde0e76..19ea18d9 100644 --- a/src/data.rs +++ b/src/data.rs @@ -2,7 +2,7 @@ pub mod aperf_runlog; pub mod aperf_stats; pub mod constants; pub mod cpu_utilization; -mod data_formats; +pub mod data_formats; pub mod diskstats; pub mod flamegraphs; pub mod hotline; @@ -19,6 +19,7 @@ pub mod systeminfo; pub mod utils; pub mod vmstat; +use crate::data::data_formats::AperfData; use crate::utils::{get_data_name_from_type, DataMetrics}; use crate::visualizer::{DataVisualizer, GetData, ReportParams}; use crate::{noop, InitParams, PerformanceData, VisualizationData, APERF_FILE_FORMAT}; @@ -354,6 +355,14 @@ macro_rules! processed_data { } } + pub fn process_raw_data_new(&mut self, raw_data: Vec) -> Result { + match self { + $( + ProcessedData::$processed_data(ref mut value) => Ok(value.process_raw_data_new(raw_data)?), + )* + } + } + pub fn custom_raw_data_parser(&mut self, parser_params: ReportParams) -> Result> { match self { $( diff --git a/src/data/cpu_utilization.rs b/src/data/cpu_utilization.rs index 8c7e3f63..72aede48 100644 --- a/src/data/cpu_utilization.rs +++ b/src/data/cpu_utilization.rs @@ -1,3 +1,4 @@ +use crate::data::data_formats::{AperfData, Series, TimeSeriesData, TimeSeriesMetric}; use crate::data::{CollectData, CollectorParams, Data, ProcessedData, TimeEnum}; use crate::utils::{get_data_name_from_type, DataMetrics, Metric}; use crate::visualizer::GetData; @@ -351,6 +352,50 @@ impl GetData for CpuUtilization { process_gathered_raw_data(buffer) } + fn process_raw_data_new(&mut self, raw_data: Vec) -> Result { + let mut time_series = TimeSeriesData::default(); + let cpu_util_metrics = [ + "aggregate", + "user", + "nice", + "system", + "irq", + "softirq", + "idle", + "iowait", + "steal", + ]; + for metric in cpu_util_metrics { + time_series + .metrics + .insert(metric.to_string(), TimeSeriesMetric::default()); + } + + for metric in cpu_util_metrics { + let series = Series::new(Some(metric.to_string())); + time_series + .metrics + .get_mut("aggregate") + .unwrap() + .series + .push(series); + } + + for buffer in raw_data { + let raw_value = match buffer { + Data::CpuUtilizationRaw(ref value) => value, + _ => panic!("Invalid Data type in raw file"), + }; + let _stat = KernelStats::from_reader(raw_value.data.as_bytes()).unwrap(); + let _time_now = match raw_value.time { + TimeEnum::DateTime(value) => value, + _ => panic!("Has to be datetime"), + }; + //TODO: parse raw values and add to time_series + } + Ok(AperfData::TimeSeries(time_series)) + } + fn get_calls(&mut self) -> Result> { Ok(vec!["keys".to_string(), "values".to_string()]) } diff --git a/src/data/data_formats.rs b/src/data/data_formats.rs index f861f0f3..52c8ca8f 100644 --- a/src/data/data_formats.rs +++ b/src/data/data_formats.rs @@ -1,26 +1,133 @@ use serde::{Deserialize, Serialize}; +use std::collections::HashMap; /** * This module defines generalized data types of all Aperf processed data used by the * frontend JavaScripts. */ +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(untagged)] +pub enum AperfData { + TimeSeries(TimeSeriesData), + Text(TextData), + KeyValue(KeyValueData), + Graph(GraphData), +} + +impl AperfData { + pub fn get_format_name(&self) -> String { + match self { + AperfData::TimeSeries(_) => "time_series".to_string(), + AperfData::Text(_) => "text".to_string(), + AperfData::KeyValue(_) => "key_value".to_string(), + AperfData::Graph(_) => "graph".to_string(), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct TimeSeriesData { + pub metrics: HashMap, + pub sorted_keys: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct TimeSeriesMetric { + pub series: Vec, + pub metadata: HashMap, + pub stats: Statistics, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Series { + #[serde(skip_serializing_if = "Option::is_none")] + pub series_name: Option, + pub time_diff: Vec, + pub values: Vec, +} + +impl Series { + pub fn new(series_name: Option) -> Self { + Series { + series_name, + time_diff: Vec::new(), + values: Vec::new(), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct Statistics { + pub avg: f64, + pub std: f64, + pub min: f64, + pub max: f64, + pub p50: f64, + pub p90: f64, + pub p99: f64, + pub p99_9: f64, +} + +impl Statistics { + pub fn new() -> Self { + Statistics { + avg: 0.0, + std: 0.0, + min: 0.0, + max: 0.0, + p50: 0.0, + p90: 0.0, + p99: 0.0, + p99_9: 0.0, + } + } +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct TextData { + pub lines: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct KeyValueData { + pub key_value_groups: HashMap, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct KeyValueGroup { + pub key_values: HashMap, +} + +impl KeyValueGroup { + pub fn new() -> Self { + KeyValueGroup { + key_values: HashMap::new(), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct GraphData { + pub graph_groups: HashMap, +} + #[derive(Serialize, Deserialize, Debug, Default, Clone)] -pub struct HtmlData { - pub data_type: String, - pub graphs: Vec, +pub struct GraphGroup { + pub group_name: String, + pub graphs: HashMap, } #[derive(Serialize, Deserialize, Debug, Clone)] -pub struct HtmlDataGraph { +pub struct Graph { pub graph_name: String, pub graph_path: String, pub graph_size: Option, } -impl HtmlDataGraph { +impl Graph { pub fn new(graph_name: String, graph_path: String, graph_size: Option) -> Self { - HtmlDataGraph { + Graph { graph_name, graph_path, graph_size, diff --git a/src/data/java_profile.rs b/src/data/java_profile.rs index dc620ecc..6c1779f6 100644 --- a/src/data/java_profile.rs +++ b/src/data/java_profile.rs @@ -1,4 +1,4 @@ -use crate::data::data_formats::{HtmlData, HtmlDataGraph}; +use crate::data::data_formats::{Graph, GraphGroup}; use crate::data::{CollectData, CollectorParams, ProcessedData}; use crate::utils::{get_data_name_from_type, DataMetrics}; use crate::visualizer::GetData; @@ -301,7 +301,7 @@ impl CollectData for JavaProfileRaw { #[derive(Serialize, Deserialize, Debug, Default, Clone)] pub struct JavaProfile { - pub data: Vec, + pub data: Vec, } impl JavaProfile { @@ -328,8 +328,8 @@ impl GetData for JavaProfile { let mut profile_metrics = Vec::from(PROFILE_METRICS); profile_metrics.push("legacy"); for metric in profile_metrics { - let mut java_profile_data = HtmlData::default(); - java_profile_data.data_type = String::from(metric); + let mut java_profile_data = GraphGroup::default(); + java_profile_data.group_name = String::from(metric); for (process, process_names) in &process_map { let filename = if metric == "legacy" { @@ -349,16 +349,19 @@ impl GetData for JavaProfile { ¶ms.data_dir, ¶ms.report_dir.join(relative_path), ) { - java_profile_data.graphs.push(HtmlDataGraph::new( - format!( - "JVM: {}, PID: {} ({})", - process_names.first().map_or("unknown", |s| s.as_str()), - process, - metric + java_profile_data.graphs.insert( + process.clone(), + Graph::new( + format!( + "JVM: {}, PID: {} ({})", + process_names.first().map_or("unknown", |s| s.as_str()), + process, + metric + ), + format!("{}/{}", relative_path, filename), + Some(file_size), ), - format!("{}/{}", relative_path, filename), - Some(file_size), - )); + ); } } diff --git a/src/html_files/flamegraphs.ts b/src/html_files/flamegraphs.ts index e951eae8..78c194e9 100644 --- a/src/html_files/flamegraphs.ts +++ b/src/html_files/flamegraphs.ts @@ -4,15 +4,20 @@ function getJavaFlamegraphInfo(run, container_id, run_data, metric){ if (handleNoData(container_id, run_data)) return; let values = JSON.parse(run_data['values']); - let data = values.find((d) => d['data_type'] == metric); + let data = values.find((d) => d['group_name'] == metric); - let sorted = data['graphs'].filter((graph) => !graph["graph_name"].includes('-')).toSorted((x, y) => y["graph_size"] - x["graph_size"]); - - if(sorted.length == 0){ + if (!data || !data['graphs'] || Object.keys(data['graphs']).length === 0) { var h3 = document.createElement('h3'); h3.innerText = `No data collected.`; addElemToNode(container_id, h3); + return; + } + + let graphs = []; + for (let key in data['graphs']) { + graphs.push(data['graphs'][key]); } + let sorted = graphs.filter((graph) => !graph["graph_name"].includes('-')).sort((x, y) => y["graph_size"] - x["graph_size"]); for(let graph of sorted){ var h3 = document.createElement('h3'); diff --git a/src/lib.rs b/src/lib.rs index ad7dbdc2..e7a85f6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -469,6 +469,17 @@ impl VisualizationData { Ok(()) } + pub fn unpack_data_new(&mut self, name: String) -> Result<()> { + for (dvname, datavisualizer) in self.visualizers.iter_mut() { + debug!("Unpacking data for: {}", dvname); + if datavisualizer.process_raw_data_new(name.clone()).is_err() { + // TODO: remove once all are implemented + debug!("process_raw_data_new not implemented for: {}", dvname); + } + } + Ok(()) + } + pub fn get_api(&mut self, name: String) -> Result { let api = self.visualizers.get(&name).unwrap().api_name.clone(); Ok(api) diff --git a/src/report.rs b/src/report.rs index 76d2f886..79cb9f84 100644 --- a/src/report.rs +++ b/src/report.rs @@ -1,3 +1,4 @@ +use crate::data::data_formats::AperfData; use crate::data::JS_DIR; use crate::{data, PDError, VisualizationData}; use anyhow::Result; @@ -22,6 +23,23 @@ pub struct Report { pub name: Option, } +#[derive(Serialize, Deserialize, Debug, Clone)] +struct ReportData { + data_name: String, + data_format: String, + runs: HashMap, +} + +impl ReportData { + fn new(data_name: String) -> Self { + ReportData { + data_name, + data_format: String::new(), + runs: HashMap::new(), + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone)] struct Api { name: String, @@ -245,7 +263,8 @@ pub fn report(report: &Report, tmp_dir: &PathBuf) -> Result<()> { /* Init visualizers */ for dir in dir_paths { let name = visualization_data.init_visualizers(dir.to_owned(), tmp_dir, &report_name)?; - visualization_data.unpack_data(name)?; + visualization_data.unpack_data(name.clone())?; + visualization_data.unpack_data_new(name)?; } /* Generate visualizer JS files */ @@ -312,6 +331,31 @@ pub fn report(report: &Report, tmp_dir: &PathBuf) -> Result<()> { write!(out_file, "{}", str_out_data)?; } + /* Get visualizer data unified */ + let visualizer_names = visualization_data.get_visualizer_names()?; // TODO: remove after replacing old get visualizer data + let out_loc = report_name.join("data/js/processed_data.js"); + let mut out_file = File::create(out_loc)?; + writeln!(out_file, "processed_data = {{")?; + for name in visualizer_names { + let mut report_data = ReportData::new(name.clone()); + for run_name in &run_names { + let visualizer = visualization_data + .visualizers + .get_mut(&name) + .ok_or(PDError::VisualizerHashMapEntryError(name.to_string()))?; + let data = match visualizer.run_values_new.get(run_name) { + Some(data) => data, + None => continue, + }; + report_data.runs.insert(run_name.clone(), data.clone()); + report_data.data_format = data.get_format_name(); + } + let out_data = serde_json::to_string(&report_data)?; + write!(out_file, r#""{}": "#, name.clone())?; + writeln!(out_file, "{},", out_data)?; + } + write!(out_file, "}}")?; + let out_analytics = report_name.join("data/js/analytics.js"); let mut out_file = File::create(out_analytics)?; let stats = visualization_data.get_analytics()?; diff --git a/src/visualizer.rs b/src/visualizer.rs index 82a3c576..b9302419 100644 --- a/src/visualizer.rs +++ b/src/visualizer.rs @@ -1,3 +1,4 @@ +use crate::data::data_formats::AperfData; use crate::utils::DataMetrics; use crate::{data::Data, data::ProcessedData, get_file, PDError}; use anyhow::Result; @@ -33,6 +34,7 @@ pub struct DataVisualizer { pub data: ProcessedData, pub file_handle: Option, pub run_values: HashMap>, + pub run_values_new: HashMap, pub js_file_name: String, pub js: String, pub api_name: String, @@ -53,6 +55,7 @@ impl DataVisualizer { data, file_handle: None, run_values: HashMap::new(), + run_values_new: HashMap::new(), js_file_name, js, api_name, @@ -151,6 +154,42 @@ impl DataVisualizer { Ok(()) } + pub fn process_raw_data_new(&mut self, name: String) -> Result<()> { + if !self.data_available.get(&name).unwrap() { + debug!("Raw data unavailable for: {}", self.api_name); + return Ok(()); + } + debug!("Processing raw data new for: {}", self.api_name); + + let mut raw_data = Vec::new(); + loop { + match bincode::deserialize_from::<_, Data>(self.file_handle.as_ref().unwrap()) { + Ok(v) => raw_data.push(v), + Err(e) => match *e { + // EOF + bincode::ErrorKind::Io(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + break + } + // Ignore invalid enum variant errors, raw data wont be used by self.data + bincode::ErrorKind::Custom(ref msg) + if msg.contains("expected variant index") => + { + break + } + e => panic!( + "Error when Deserializing {} data at {} : {}", + self.api_name, + self.report_params.data_file_path.display().to_string(), + e + ), + }, + }; + } + self.run_values_new + .insert(name.clone(), self.data.process_raw_data_new(raw_data)?); + Ok(()) + } + pub fn get_data( &mut self, name: String, @@ -281,6 +320,10 @@ pub trait GetData { unimplemented!(); } + fn process_raw_data_new(&mut self, _raw_data: Vec) -> Result { + Err(PDError::VisualizerUnsupportedAPI.into()) // TODO: remove when all are implemented + } + fn custom_raw_data_parser(&mut self, _params: ReportParams) -> Result> { unimplemented!(); }