Skip to content

Commit

Permalink
Made multiple improvements to summarize script
Browse files Browse the repository at this point in the history
The following features were added:

   * Ability to summarize more than one file
   * Calculate standard deviation
   * Print summary in table
  • Loading branch information
kyleknap committed Aug 23, 2016
1 parent 1791893 commit f6417aa
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 25 deletions.
1 change: 1 addition & 0 deletions requirements-dev.txt
@@ -1,2 +1,3 @@
-r requirements-test.txt
psutil>=4.1.0,<5.0.0
tabulate==0.7.5
158 changes: 133 additions & 25 deletions scripts/performance/summarize
Expand Up @@ -12,14 +12,45 @@ Run this script with::
And that should output::
Total time: 1.810 seconds
Max memory: 114.0 MiB
Max cpu: 208.3 percent
Average memory: 67.3 MiB
Average cpu: 140.5 percent
+-----------------------+----------+----------------------+
| Metric | Mean | Standard Deviation |
+=======================+==========+======================+
| Total Time (seconds) | 1.200 | 0.0 |
+-----------------------+----------+----------------------+
| Maximum Memory | 42.3 MiB | 0 Bytes |
+-----------------------+----------+----------------------+
| Maximum CPU (percent) | 88.1 | 0.0 |
+-----------------------+----------+----------------------+
| Average Memory | 33.9 MiB | 0 Bytes |
+-----------------------+----------+----------------------+
| Average CPU (percent) | 30.5 | 0.0 |
+-----------------------+----------+----------------------+
The script can also be ran with multiple files:
./summarize -f performance.csv performance-2.csv
And will have a similar output:
+-----------------------+----------+----------------------+
| Metric | Mean | Standard Deviation |
+=======================+==========+======================+
| Total Time (seconds) | 1.155 | 0.0449999570847 |
+-----------------------+----------+----------------------+
| Maximum Memory | 42.5 MiB | 110.0 KiB |
+-----------------------+----------+----------------------+
| Maximum CPU (percent) | 94.5 | 6.45 |
+-----------------------+----------+----------------------+
| Average Memory | 35.6 MiB | 1.7 MiB |
+-----------------------+----------+----------------------+
| Average CPU (percent) | 27.5 | 3.03068181818 |
+-----------------------+----------+----------------------+
"""
import argparse
import csv
from math import sqrt

from tabulate import tabulate


def human_readable_size(value):
Expand Down Expand Up @@ -51,8 +82,16 @@ class Summarizer(object):
self._start_time = None
self._end_time = None
self._totals = {
'time': [],
'average_memory': [],
'average_cpu': [],
'max_memory': [],
'max_cpu': [],
}
self._averages = {
'memory': 0.0,
'cpu': 0.0
'cpu': 0.0,

}
self._maximums = {
'memory': 0.0,
Expand All @@ -61,43 +100,90 @@ class Summarizer(object):

@property
def total_time(self):
return self._end_time - self._start_time
return self._average_across_all_files('time')

@property
def max_cpu(self):
return self._maximums['cpu']
return self._average_across_all_files('max_cpu')

@property
def max_memory(self):
return human_readable_size(self._maximums['memory'])
return human_readable_size(
self._average_across_all_files('max_memory'))

@property
def average_cpu(self):
return self._average('cpu')
return self._average_across_all_files('average_cpu')

@property
def average_memory(self):
return human_readable_size(self._average('memory'))
return human_readable_size(
self._average_across_all_files('average_memory'))

@property
def std_dev_total_time(self):
return self._standard_deviation_across_all_files('time')

@property
def std_dev_max_cpu(self):
return self._standard_deviation_across_all_files('max_cpu')

@property
def std_dev_max_memory(self):
return human_readable_size(
self._standard_deviation_across_all_files('max_memory'))

@property
def std_dev_average_cpu(self):
return self._standard_deviation_across_all_files('average_cpu')

@property
def std_dev_average_memory(self):
return human_readable_size(
self._standard_deviation_across_all_files('average_memory'))

def _average(self, name):
return self._totals[name]/self._num_rows
def _average_across_all_files(self, name):
return sum(self._totals[name])/len(self._totals[name])

def _standard_deviation_across_all_files(self, name):
mean = self._average_across_all_files(name)
differences = [total - mean for total in self._totals[name]]
sq_differences = [difference ** 2 for difference in differences]
return sqrt(sum(sq_differences)/len(self._totals[name]))

def summarize(self):
"""Prints out the processed data"""
print('Total time: %.3f seconds' % self.total_time)
print('Max memory: %s' % self.max_memory)
print('Max cpu: %.1f percent' % self.max_cpu)
print('Average memory: %s' % self.average_memory)
print('Average cpu: %.1f percent' % self.average_cpu)
table = [
['Total Time (seconds)', '%.3f' % self.total_time,
self.std_dev_total_time],
['Maximum Memory', self.max_memory, self.std_dev_max_memory],
['Maximum CPU (percent)', '%.1f' % self.max_cpu,
self.std_dev_max_cpu],
['Average Memory', self.average_memory,
self.std_dev_average_memory],
['Average CPU (percent)', '%.1f' % self.average_cpu,
self.std_dev_average_cpu],
]
print(
tabulate(
table, headers=['Metric', 'Mean', 'Standard Deviation'],
tablefmt="grid"
)
)

def process(self, args):
"""Processes the data from the CSV file"""
with open(args.benchmark_file, 'rb') as f:
for benchmark_file in args.benchmark_files:
self.process_individual_file(benchmark_file)

def process_individual_file(self, benchmark_file):
with open(benchmark_file, 'rb') as f:
reader = csv.reader(f)
# Process each row from the CSV file
for row in reader:
self.process_data_row(row)
self._end_time = self._get_time(row)
self._end_time = self._get_time(row)
self._finalize_processed_data_for_file()

def process_data_row(self, row):
# If the row is the first row collect the start time.
Expand All @@ -112,14 +198,31 @@ class Summarizer(object):
index = self.DATA_INDEX_IN_ROW[name]
# Get the data point.
data_point = float(row[index])
self._add_to_total(name, data_point)
self._add_to_average(name, data_point)
self._account_for_maximum(name, data_point)

def _finalize_processed_data_for_file(self):
# Add numbers to the total, which keeps track of data over
# all files provided.
self._totals['time'].append(self._end_time - self._start_time)
self._totals['max_cpu'].append(self._maximums['cpu'])
self._totals['max_memory'].append(self._maximums['memory'])
self._totals['average_cpu'].append(
self._averages['cpu']/self._num_rows)
self._totals['average_memory'].append(
self._averages['memory']/self._num_rows)

# Reset some of the data needed to be tracked for each specific
# file.
self._num_rows = 0
self._maximums = self._maximums.fromkeys(self._maximums, 0.0)
self._averages = self._averages.fromkeys(self._averages, 0.0)

def _get_time(self, row):
return float(row[self.DATA_INDEX_IN_ROW['time']])

def _add_to_total(self, name, data_point):
self._totals[name] += data_point
def _add_to_average(self, name, data_point):
self._averages[name] += data_point

def _account_for_maximum(self, name, data_point):
if data_point > self._maximums[name]:
Expand All @@ -129,8 +232,13 @@ class Summarizer(object):
def main():
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
'-f', '--benchmark-file', required=True,
help='The CSV output file from the benchmark script.')
'-f', '--benchmark-files', required=True, nargs='+',
help=(
'The CSV output file from the benchmark script. If you provide'
'more than one of these files, it will give you the average '
'across all of the files for each metric.'
)
)
args = parser.parse_args()
summarizer = Summarizer()
summarizer.process(args)
Expand Down

0 comments on commit f6417aa

Please sign in to comment.