Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_corpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ on:
type: boolean
description: 'Regenerate results'
required: true
default: true
default: false
workflow_call:
inputs:
ref:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/xtest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Run tests
run: |

if [[ "${{ matrix.python }}" == "python3.4" ]]; then
(cd /usr/lib64/python3.4/test && python3.4 make_ssl_certs.py)
elif [[ "${{ matrix.python }}" == "python3.5" ]]; then
Expand Down
166 changes: 121 additions & 45 deletions corpus_test/generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from result import Result, ResultReader

ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', False)
ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', True)


@dataclass
Expand Down Expand Up @@ -64,6 +64,9 @@ def mean_percent_of_original(self) -> float:
def larger_than_original(self) -> Iterable[Result]:
"""Return those entries that have a larger minified size than the original size"""
for result in self.entries.values():
if result.outcome != 'Minified':
continue

if result.original_size < result.minified_size:
yield result

Expand Down Expand Up @@ -91,10 +94,18 @@ def compare_size_increase(self, base: 'ResultSet') -> Iterable[Result]:
"""

for result in self.entries.values():
if result.outcome != 'Minified':
# This result was not minified, so we can't compare
continue

if result.corpus_entry not in base.entries:
continue

base_result = base.entries[result.corpus_entry]
if base_result.outcome != 'Minified':
# The base result was not minified, so we can't compare
continue

if result.minified_size > base_result.minified_size:
yield result

Expand All @@ -104,10 +115,17 @@ def compare_size_decrease(self, base: 'ResultSet') -> Iterable[Result]:
"""

for result in self.entries.values():
if result.outcome != 'Minified':
continue

if result.corpus_entry not in base.entries:
continue

base_result = base.entries[result.corpus_entry]
if base_result.outcome != 'Minified':
# The base result was not minified, so we can't compare
continue

if result.minified_size < base_result.minified_size:
yield result

Expand Down Expand Up @@ -164,6 +182,103 @@ def format_difference(compare: Iterable[Result], base: Iterable[Result]) -> str:
else:
return s

def report_larger_than_original(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Larger than original

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)

for entry in larger_than_original:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'

def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Unstable

| Corpus Entry | Python Version | Original Size |
|--------------|----------------|--------------:|'''

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

unstable = sorted(summary.unstable_minification(), key=lambda result: result.original_size)

for entry in unstable:
yield f'| {entry.corpus_entry} | {python_version} | {entry.original_size} |'

def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Exceptions

| Corpus Entry | Python Version | Exception |
|--------------|----------------|-----------|'''

exceptions_found = False

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

exceptions = sorted(summary.exception(), key=lambda result: result.original_size)

for entry in exceptions:
exceptions_found = True
yield f'| {entry.corpus_entry} | {python_version} | {entry.outcome} |'

if not exceptions_found:
yield ' None | | |'

def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha: str, base_sha: str) -> str:
yield '''
## Top 10 Larger than base

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

there_are_some_larger_than_base = False

for python_version in python_versions:
try:
summary = result_summary(results_dir, python_version, minifier_sha)
except FileNotFoundError:
continue

base_summary = result_summary(results_dir, python_version, base_sha)
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]

for entry in larger_than_original:
there_are_some_larger_than_base = True
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'

if not there_are_some_larger_than_base:
yield '| N/A | N/A | N/A |'

def report_slowest(results_dir: str, python_versions: str, minifier_sha: str) -> str:
yield '''
## Top 10 Slowest

| Corpus Entry | Original Size | Minified Size | Time |
|--------------|--------------:|--------------:|-----:|'''

for python_version in python_versions:
summary = result_summary(results_dir, python_version, minifier_sha)

for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'

def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str, base_sha: str) -> Iterable[str]:
"""
Expand Down Expand Up @@ -236,50 +351,11 @@ def format_size_change_detail() -> str:
)

if ENHANCED_REPORT:
yield '''
## Larger than original

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)
larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)

for entry in larger_than_original:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'

yield '''
## Top 10 Larger than base

| Corpus Entry | Original Size | Minified Size |
|--------------|--------------:|--------------:|'''

there_are_some_larger_than_base = False

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)
base_summary = result_summary(results_dir, python_version, base_sha)
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]

for entry in larger_than_original:
there_are_some_larger_than_base = True
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'

if not there_are_some_larger_than_base:
yield '| N/A | N/A | N/A |'

yield '''
## Top 10 Slowest

| Corpus Entry | Original Size | Minified Size | Time |
|--------------|--------------:|--------------:|-----:|'''

for python_version in ['3.11']:
summary = result_summary(results_dir, python_version, minifier_sha)

for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'
yield from report_larger_than_original(results_dir, ['3.11'], minifier_sha)
yield from report_larger_than_base(results_dir, ['3.11'], minifier_sha, base_sha)
yield from report_slowest(results_dir, ['3.11'], minifier_sha)
yield from report_unstable(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)
yield from report_exceptions(results_dir, ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)


def main():
Expand Down
60 changes: 52 additions & 8 deletions corpus_test/generate_results.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import argparse
import datetime
import gzip
import os
import sys
import time


import logging


import python_minifier
from result import Result, ResultWriter

Expand All @@ -23,8 +29,13 @@ def minify_corpus_entry(corpus_path, corpus_entry):
:rtype: Result
"""

with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
source = f.read()
if os.path.isfile(os.path.join(corpus_path, corpus_entry + '.py.gz')):
with gzip.open(os.path.join(corpus_path, corpus_entry + '.py.gz'), 'rb') as f:
source = f.read()
else:
with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
source = f.read()


result = Result(corpus_entry, len(source), 0, 0, '')

Expand Down Expand Up @@ -72,21 +83,54 @@ def corpus_test(corpus_path, results_path, sha, regenerate_results):
:param str sha: The python-minifier sha we are testing
:param bool regenerate_results: Regenerate results even if they are present
"""
corpus_entries = os.listdir(corpus_path)

python_version = '.'.join([str(s) for s in sys.version_info[:2]])

log_path = 'results_' + python_version + '_' + sha + '.log'
print('Logging in GitHub Actions is absolute garbage. Logs are going to ' + log_path)

logging.basicConfig(filename=os.path.join(results_path, log_path), level=logging.DEBUG)

corpus_entries = [entry[:-len('.py.gz')] for entry in os.listdir(corpus_path)]

results_file_path = os.path.join(results_path, 'results_' + python_version + '_' + sha + '.csv')

if os.path.isfile(results_file_path) and not regenerate_results:
print('Results file already exists: %s', results_file_path)
return
if os.path.isfile(results_file_path):
logging.info('Results file already exists: %s', results_file_path)
if regenerate_results:
os.remove(results_file_path)

total_entries = len(corpus_entries)
logging.info('Testing python-minifier on %d entries' % total_entries)
tested_entries = 0

start_time = time.time()
next_checkpoint = time.time() + 60

with ResultWriter(results_file_path) as result_writer:
logging.info('%d results already present' % len(result_writer))

for entry in corpus_entries:
print(entry)
if entry in result_writer:
continue

logging.debug(entry)

result = minify_corpus_entry(corpus_path, entry)
result_writer.write(result)
tested_entries += 1

sys.stdout.flush()

if time.time() > next_checkpoint:
percent = len(result_writer) / total_entries * 100
time_per_entry = (time.time() - start_time) / tested_entries
entries_remaining = len(corpus_entries) - len(result_writer)
time_remaining = int(entries_remaining * time_per_entry)
logging.info('Tested %d/%d entries (%d%%) %s seconds remaining' % (len(result_writer), total_entries, percent, time_remaining))
sys.stdout.flush()
next_checkpoint = time.time() + 60

logging.info('Finished')

def bool_parse(value):
return value == 'true'
Expand Down
32 changes: 31 additions & 1 deletion corpus_test/result.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os


class Result(object):

def __init__(self, corpus_entry, original_size, minified_size, time, outcome):
Expand All @@ -21,15 +24,37 @@ def __init__(self, results_path):
:param str results_path: The path to the results file
"""
self._results_path = results_path
self._size = 0
self._existing_result_set = set()

if not os.path.isfile(self._results_path):
return

with open(self._results_path, 'r') as f:
for line in f:
if line != 'corpus_entry,original_size,minified_size,time,result\n':
self._existing_result_set.add(line.split(',')[0])

self._size += len(self._existing_result_set)

def __enter__(self):
self.results = open(self._results_path, 'w')
self.results = open(self._results_path, 'a')
self.results.write('corpus_entry,original_size,minified_size,time,result\n')
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.results.close()

def __contains__(self, item):
"""
:param str item: The name of the entry in the corpus
:return bool: True if the entry already exists in the results file
"""
return item in self._existing_result_set

def __len__(self):
return self._size

def write(self, result):
"""
:param Result result: The result to write to the file
Expand All @@ -41,6 +66,7 @@ def write(self, result):
str(result.time) + ',' + result.outcome + '\n'
)
self.results.flush()
self._size += 1


class ResultReader:
Expand All @@ -66,7 +92,11 @@ def __next__(self):
"""
:return Result: The next result in the file
"""

line = self.results.readline()
while line == 'corpus_entry,original_size,minified_size,time,result\n':
line = self.results.readline()

if line == '':
raise StopIteration
else:
Expand Down
7 changes: 7 additions & 0 deletions docs/source/transforms/remove_explicit_return_none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def important(a):
if a > 3:
return a
if a < 2:
return None
a.adjust(1)
return None
Loading