In [1]:
import ast
import glob
import re


def get_n_longest_funcs(dir, n):
    function_definitions = []
    for path in glob.glob(f"{dir}/**/*.py", recursive=True):
        with open(path) as f:
            root = ast.parse(f.read(), path)

        for node in ast.walk(root):
            if isinstance(node, ast.FunctionDef):
                length = node.end_lineno - node.lineno
                definition = (node.name, length, path)
                function_definitions.append(definition)

    function_definitions.sort(key=lambda x: x[1], reverse=True)
    return function_definitions[:n]


print("========== Great Expectations ==========")
for func in get_n_longest_funcs("great_expectations", 20):
    print(func)

print("\n========== Tests ==========")
for func in get_n_longest_funcs("tests", 20):
    print(func)

('expect_column_kl_divergence_to_be_less_than', 450, 'great_expectations/dataset/dataset.py')
('get_dataset', 424, 'great_expectations/self_check/util.py')
('test_yaml_config', 401, 'great_expectations/data_context/data_context.py')
('_register_metric_functions', 396, 'great_expectations/expectations/metrics/map_metric_provider.py')
('_validate', 358, 'great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py')
('validate', 328, 'great_expectations/data_asset/data_asset.py')
('generate_expectation_tests', 309, 'great_expectations/self_check/util.py')
('validate', 279, 'great_expectations/validator/validator.py')
('column_condition_partial', 245, 'great_expectations/expectations/metrics/map_metric_provider.py')
('validation_operator_run', 244, 'great_expectations/cli/v012/validation_operator.py')
('profile_datasource', 231, 'great_expectations/data_context/data_context.py')
('column_pair_condition_partial', 226, 'great_expectations/expectations/metrics/map_metr

In [2]:
def parse_coverage_report():
    with open("test_coverage.txt") as f:
        contents = f.readlines()
        
    vals = []
    for line in contents:
        parts = [l.strip() for l in line.split()]
        if not parts[0].startswith("great_expectations"):
            continue
        name, stmts, miss, cover = parts
        val = (name, int(cover.strip("%")))
        vals.append(val)
        
    vals.sort(key=lambda x: x[1])
    for val in vals:
        print(val)
        
parse_coverage_report()

('great_expectations/cli/build_docs.py', 0)
('great_expectations/cli/checkpoint.py', 0)
('great_expectations/cli/checkpoint_script_template.py', 0)
('great_expectations/cli/datasource.py', 0)
('great_expectations/cli/docs.py', 0)
('great_expectations/cli/init.py', 0)
('great_expectations/cli/project.py', 0)
('great_expectations/cli/python_subprocess.py', 0)
('great_expectations/cli/store.py', 0)
('great_expectations/cli/util.py', 0)
('great_expectations/cli/v012/checkpoint_script_template.py', 0)
('great_expectations/cli/validation_operator.py', 0)
('great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py', 0)
('great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py', 0)
('great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py', 0)
('great_expectations/expectations/metrics/column_aggregate_metric.py', 0)
('great_expectations/expectations/me

In [109]:
def aggregate_simple_test_performance(filename):
    pattern = r"(\d*\.\d{2})s (\w+)\s+(tests.+)"
    return _aggregate_test_performance(filename, pattern)

def aggregate_complex_test_performance(filename):
    pattern = r"(\d*\.\d{2})s (\w+)\s+\[[\s\w.?]+\]\s*(tests\..+)"
    return _aggregate_test_performance(filename, pattern)

def _aggregate_test_performance(filename, pattern):
    with open(filename) as f:
        contents = f.read()

    r = re.compile(pattern)
    verb_map = {"setup": 0, "call": 1, "teardown": 2}
    performance_map = {}
    
    for duration, verb, test in r.findall(contents):
        idx = verb_map[verb]
        if test not in performance_map:
            performance_map[test] = [0, 0, 0]
        performance_map[test][idx] += float(duration)
        
    res = [(sum(v), v, k) for k,v in performance_map.items()]
    res.sort(reverse=True)
    print("Slowest Tests:")
    for i, r in enumerate(res[:10]):
        print(f"{i+1}: {r}")
    
    print(f"\nTotal Test Time: {sum(v[0] for v in res)/60:.2f} mins")
    print(f"Average Test Time: {sum(v[0] for v in res)/len(res):.2f} secs")
    print(f"% Above Threshold (0.1s): {len([v[0] for v in res if v[0] > 0.1]) * 100 / len(res):.2f}%")

    return performance_map

In [110]:
# Compatability 3.8
compatability = aggregate_complex_test_performance("performance/compatability.txt")

Slowest Tests:
1: (15.72, [0.0, 15.72, 0.0], 'tests.data_context.store.test_store_backends/test_TupleS3StoreBackend_list_over_1000_keys')
2: (10.67, [0.26, 10.41, 0.0], 'tests.profile.test_user_configurable_profiler_v2_batch_kwargs/test_profiler_all_expectation_types')
3: (9.64, [0.96, 8.68, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiled_dataset_passes_own_validation')
4: (9.059999999999999, [0.54, 8.52, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiler_all_expectation_types_pandas')
5: (8.379999999999999, [0.87, 7.51, 0.0], 'tests.rule_based_profiler.test_profiler_user_workflows/test_bobby_profiler_user_workflow_multi_batch_row_count_range_rule_and_column_ranges_rule_oneshot_sampling_method')
6: (7.57, [0.0, 7.57, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/one_multi_batch_request_one_validator.py]')
7: (6.92, [1.05, 5.87, 0.0], 'tests

In [111]:
# Comprehensive 3.8
comprehensive = aggregate_complex_test_performance("performance/comprehensive.txt")

Slowest Tests:
1: (49.28, [4.0, 45.28, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiler_all_expectation_types_spark[test_backends0]')
2: (20.88, [0.0, 20.88, 0.0], 'tests.data_context.store.test_store_backends/test_TupleS3StoreBackend_list_over_1000_keys')
3: (14.14, [0.34, 13.8, 0.0], 'tests.profile.test_user_configurable_profiler_v2_batch_kwargs/test_profiler_all_expectation_types')
4: (12.02, [0.01, 12.01, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/one_multi_batch_request_one_validator.py]')
5: (11.72, [1.15, 10.57, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiled_dataset_passes_own_validation')
6: (11.27, [0.73, 10.54, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiler_all_expectation_types_pandas')
7: (10.15, [0.01, 10.14, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/inte

In [112]:
# Usage Statistics
usage_stats = aggregate_simple_test_performance("performance/usage_stats.txt")

Slowest Tests:
1: (2.43, [0.0, 2.43, 0.0], 'tests/integration/usage_statistics/test_integration_usage_statistics.py::test_graceful_failure_with_no_internet')
2: (1.04, [0.0, 1.04, 0.0], 'tests/integration/usage_statistics/test_usage_statistics_messages.py::test_usage_statistics_message[data_context.test_yaml_config_32]')
3: (0.74, [0.0, 0.74, 0.0], 'tests/integration/usage_statistics/test_usage_stats_common_messages_are_sent_v3api.py::test_common_usage_stats_are_sent_no_mocking')
4: (0.58, [0.0, 0.58, 0.0], 'tests/integration/usage_statistics/test_usage_statistics_messages.py::test_usage_statistics_message[cli.checkpoint.script_3]')
5: (0.39, [0.0, 0.39, 0.0], 'tests/integration/usage_statistics/test_usage_statistics_messages.py::test_usage_statistics_message[cli.project.upgrade_1]')
6: (0.39, [0.0, 0.39, 0.0], 'tests/integration/usage_statistics/test_integration_usage_statistics.py::test_send_malformed_data')
7: (0.33, [0.0, 0.33, 0.0], 'tests/integration/usage_statistics/test_usage_s

In [113]:
# MySQL
mysql = aggregate_complex_test_performance("performance/mysql.txt")

Slowest Tests:
1: (18.47, [0.0, 18.47, 0.0], 'tests.data_context.store.test_store_backends/test_TupleS3StoreBackend_list_over_1000_keys')
2: (12.17, [0.3, 11.87, 0.0], 'tests.profile.test_user_configurable_profiler_v2_batch_kwargs/test_profiler_all_expectation_types')
3: (10.040000000000001, [0.98, 9.06, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiled_dataset_passes_own_validation')
4: (9.690000000000001, [0.63, 9.06, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiler_all_expectation_types_pandas')
5: (9.61, [0.01, 9.6, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/one_multi_batch_request_one_validator.py]')
6: (8.66, [0.01, 8.65, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/multiple_batch_requests_one_validator_one_step.py]')
7: (8.61, [0.01, 8.6, 0.0], 'tests.

In [107]:
# MSSQL
mssql = aggregate_complex_test_performance("performance/mssql.txt")

Slowest Tests:
1: (23.8, [0.0, 23.8, 0.0], 'tests.data_context.store.test_store_backends/test_TupleS3StoreBackend_list_over_1000_keys')
2: (14.92, [0.37, 14.55, 0.0], 'tests.profile.test_user_configurable_profiler_v2_batch_kwargs/test_profiler_all_expectation_types')
3: (13.14, [0.79, 12.35, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiler_all_expectation_types_pandas')
4: (12.61, [1.27, 11.34, 0.0], 'tests.profile.test_user_configurable_profiler_v3_batch_request/test_profiled_dataset_passes_own_validation')
5: (11.73, [0.01, 11.72, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/one_multi_batch_request_one_validator.py]')
6: (10.97, [0.01, 10.96, 0.0], 'tests.integration.test_script_runner/test_integration_tests[tests/integration/fixtures/yellow_tripdata_pandas_fixture/multiple_batch_requests_one_validator_one_step.py]')
7: (10.530000000000001, [1.12, 9.41, 0.0], 'tests.rule

In [108]:
# CLI Integration
cli_integration = aggregate_simple_test_performance("performance/cli_integration.txt")

Slowest Tests:
1: (12.48, [6.02, 6.46, 0.0], 'tests/cli/test_checkpoint.py::test_checkpoint_run_happy_path_with_successful_validation_spark[test_backends0]')
2: (8.31, [0.38, 7.93, 0.0], 'tests/cli/test_suite.py::test_suite_edit_multiple_datasources_with_sql_with_no_additional_args_without_citations_runs_notebook_opens_jupyter[test_backends0]')
3: (8.26, [0.32, 7.94, 0.0], 'tests/cli/test_suite.py::test_suite_edit_multiple_datasources_with_no_additional_args_with_citations_runs_notebook_opens_jupyter')
4: (8.1, [0.39, 7.71, 0.0], 'tests/cli/test_suite.py::test_suite_edit_multiple_datasources_with_sql_with_no_additional_args_with_citations_runs_notebook_opens_jupyter[test_backends0]')
5: (8.01, [0.28, 7.73, 0.0], 'tests/cli/test_suite.py::test_suite_edit_multiple_datasources_with_no_additional_args_without_citations_runs_notebook_opens_jupyter')
6: (7.58, [0.23, 7.35, 0.0], 'tests/cli/test_suite.py::test_suite_new_interactive_valid_batch_request_from_json_file_in_notebook_runs_notebook_