In [1]:
import ast
import glob
import re


def get_n_longest_funcs(dir, n):
    function_definitions = []
    for path in glob.glob(f"{dir}/**/*.py", recursive=True):
        with open(path) as f:
            root = ast.parse(f.read(), path)

        for node in ast.walk(root):
            if isinstance(node, ast.FunctionDef):
                length = node.end_lineno - node.lineno
                definition = (node.name, length, path)
                function_definitions.append(definition)

    function_definitions.sort(key=lambda x: x[1], reverse=True)
    return function_definitions[:n]


print("========== Great Expectations ==========")
for func in get_n_longest_funcs("great_expectations", 20):
    print(func)

print("\n========== Tests ==========")
for func in get_n_longest_funcs("tests", 20):
    print(func)

('expect_column_kl_divergence_to_be_less_than', 450, 'great_expectations/dataset/dataset.py')
('get_dataset', 424, 'great_expectations/self_check/util.py')
('test_yaml_config', 401, 'great_expectations/data_context/data_context.py')
('_register_metric_functions', 396, 'great_expectations/expectations/metrics/map_metric_provider.py')
('_validate', 358, 'great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py')
('validate', 328, 'great_expectations/data_asset/data_asset.py')
('generate_expectation_tests', 309, 'great_expectations/self_check/util.py')
('validate', 279, 'great_expectations/validator/validator.py')
('column_condition_partial', 245, 'great_expectations/expectations/metrics/map_metric_provider.py')
('validation_operator_run', 244, 'great_expectations/cli/v012/validation_operator.py')
('profile_datasource', 231, 'great_expectations/data_context/data_context.py')
('column_pair_condition_partial', 226, 'great_expectations/expectations/metrics/map_metr

In [23]:
import statistics

def parse_test_results():
    with open("test_performance.txt") as f:
        contents = f.read()

    pattern = r"(\d*\.\d{2})s call" # Can check: 'call', 'setup' and 'teardown'
    r = re.compile(pattern)

    durations = [float(t) for t in r.findall(contents)]
    return durations
    
test_results = parse_test_results()
print(f"Count: {len(test_results)}")
print(f"Count Over 0.1s: {len([x for x in test_results if x > 0.1])}")
print(f"Slowest: {test_results[:10]}")
print(f"Mean: {statistics.mean(test_results)}")
print(f"Median: {statistics.median(test_results)}")
print(f"Mode: {statistics.mode(test_results)}")

Count: 5909
Count Over 0.1s: 984
Slowest: [37.34, 16.7, 12.07, 9.79, 9.57, 9.46, 9.39, 9.1, 8.58, 8.47]
Mean: 0.12851582332035877
Median: 0.01
Mode: 0.0
