diff --git a/shared/django_apps/dummy_settings.py b/shared/django_apps/dummy_settings.py index a012310e5..e009cf5d8 100644 --- a/shared/django_apps/dummy_settings.py +++ b/shared/django_apps/dummy_settings.py @@ -34,6 +34,7 @@ "shared.django_apps.reports", "shared.django_apps.staticanalysis", "shared.django_apps.test_analytics", + "shared.django_apps.timeseries", ] # Needed for makemigrations to work diff --git a/shared/django_apps/timeseries/__init__.py b/shared/django_apps/timeseries/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/shared/django_apps/timeseries/migrations/0001_initial.py b/shared/django_apps/timeseries/migrations/0001_initial.py new file mode 100644 index 000000000..8a7e77314 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0001_initial.py @@ -0,0 +1,47 @@ +# Generated by Django 3.1.13 on 2022-05-23 20:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="Measurement", + fields=[ + ("timestamp", models.DateTimeField(primary_key=True, serialize=False)), + ("owner_id", models.BigIntegerField()), + ("repo_id", models.BigIntegerField()), + ("flag_id", models.BigIntegerField(null=True)), + ("branch", models.TextField(null=True)), + ("commit_sha", models.TextField(null=True)), + ("name", models.TextField()), + ("value", models.FloatField()), + ], + ), + migrations.RunSQL( + "ALTER TABLE timeseries_measurement DROP CONSTRAINT timeseries_measurement_pkey;", + reverse_sql="", + ), + migrations.AddIndex( + model_name="measurement", + index=models.Index( + fields=[ + "owner_id", + "repo_id", + "flag_id", + "branch", + "name", + "timestamp", + ], + name="timeseries__owner_i_2cc713_idx", + ), + ), + migrations.RunSQL( + "SELECT create_hypertable('timeseries_measurement', 'timestamp');", + reverse_sql="", + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0002_continuous_aggregates.py b/shared/django_apps/timeseries/migrations/0002_continuous_aggregates.py new file mode 100644 index 000000000..2cba7d131 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0002_continuous_aggregates.py @@ -0,0 +1,37 @@ +# Generated by Django 3.1.13 on 2022-05-23 20:46 + +from django.db import migrations + + +class Migration(migrations.Migration): + # cant create this views in a transaction + atomic = False + + dependencies = [ + ("timeseries", "0001_initial"), + ] + + operations = [ + migrations.RunSQL( + f""" + create materialized view timeseries_measurement_summary_{days}day + with (timescaledb.continuous) as + select + owner_id, + repo_id, + flag_id, + branch, + name, + time_bucket(interval '{days} days', timestamp) as timestamp_bin, + avg(value) as value_avg, + max(value) as value_max, + min(value) as value_min, + count(value) as value_count + from timeseries_measurement + group by + owner_id, repo_id, flag_id, branch, name, timestamp_bin; + """, + reverse_sql=f"drop materialized view timeseries_measurement_summary_{days}day;", + ) + for days in [1, 7, 30] + ] diff --git a/shared/django_apps/timeseries/migrations/0003_cagg_policies.py b/shared/django_apps/timeseries/migrations/0003_cagg_policies.py new file mode 100644 index 000000000..51d35611e --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0003_cagg_policies.py @@ -0,0 +1,24 @@ +# Generated by Django 3.1.13 on 2022-05-24 14:51 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0002_continuous_aggregates"), + ] + + operations = [ + migrations.RunSQL( + f""" + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_{name}', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '24 hours' + ); + """, + reverse_sql=f"select remove_continuous_aggregate_policy('timeseries_measurement_summary_{name}');", + ) + for name in ["1day", "7day", "30day"] + ] diff --git a/shared/django_apps/timeseries/migrations/0004_measurement_summaries.py b/shared/django_apps/timeseries/migrations/0004_measurement_summaries.py new file mode 100644 index 000000000..c75568406 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0004_measurement_summaries.py @@ -0,0 +1,84 @@ +# Generated by Django 3.1.13 on 2022-05-25 20:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0003_cagg_policies"), + ] + + operations = [ + migrations.CreateModel( + name="MeasurementSummary1Day", + fields=[ + ( + "timestamp_bin", + models.DateTimeField(primary_key=True, serialize=False), + ), + ("owner_id", models.BigIntegerField()), + ("repo_id", models.BigIntegerField()), + ("flag_id", models.BigIntegerField()), + ("branch", models.TextField()), + ("name", models.TextField()), + ("value_avg", models.FloatField()), + ("value_max", models.FloatField()), + ("value_min", models.FloatField()), + ("value_count", models.FloatField()), + ], + options={ + "db_table": "timeseries_measurement_summary_1day", + "ordering": ["timestamp_bin"], + "abstract": False, + "managed": False, + }, + ), + migrations.CreateModel( + name="MeasurementSummary30Day", + fields=[ + ( + "timestamp_bin", + models.DateTimeField(primary_key=True, serialize=False), + ), + ("owner_id", models.BigIntegerField()), + ("repo_id", models.BigIntegerField()), + ("flag_id", models.BigIntegerField()), + ("branch", models.TextField()), + ("name", models.TextField()), + ("value_avg", models.FloatField()), + ("value_max", models.FloatField()), + ("value_min", models.FloatField()), + ("value_count", models.FloatField()), + ], + options={ + "db_table": "timeseries_measurement_summary_30day", + "ordering": ["timestamp_bin"], + "abstract": False, + "managed": False, + }, + ), + migrations.CreateModel( + name="MeasurementSummary7Day", + fields=[ + ( + "timestamp_bin", + models.DateTimeField(primary_key=True, serialize=False), + ), + ("owner_id", models.BigIntegerField()), + ("repo_id", models.BigIntegerField()), + ("flag_id", models.BigIntegerField()), + ("branch", models.TextField()), + ("name", models.TextField()), + ("value_avg", models.FloatField()), + ("value_max", models.FloatField()), + ("value_min", models.FloatField()), + ("value_count", models.FloatField()), + ], + options={ + "db_table": "timeseries_measurement_summary_7day", + "ordering": ["timestamp_bin"], + "abstract": False, + "managed": False, + }, + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0005_uniqueness_constraints.py b/shared/django_apps/timeseries/migrations/0005_uniqueness_constraints.py new file mode 100644 index 000000000..6aba3f03e --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0005_uniqueness_constraints.py @@ -0,0 +1,35 @@ +# Generated by Django 3.1.13 on 2022-06-07 19:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0004_measurement_summaries"), + ] + + operations = [ + migrations.AddConstraint( + model_name="measurement", + constraint=models.UniqueConstraint( + condition=models.Q(flag_id__isnull=False), + fields=( + "name", + "owner_id", + "repo_id", + "flag_id", + "commit_sha", + "timestamp", + ), + name="timeseries_measurement_flag_unique", + ), + ), + migrations.AddConstraint( + model_name="measurement", + constraint=models.UniqueConstraint( + condition=models.Q(flag_id__isnull=True), + fields=("name", "owner_id", "repo_id", "commit_sha", "timestamp"), + name="timeseries_measurement_noflag_unique", + ), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0006_auto_20220718_1311.py b/shared/django_apps/timeseries/migrations/0006_auto_20220718_1311.py new file mode 100644 index 000000000..3008c3639 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0006_auto_20220718_1311.py @@ -0,0 +1,41 @@ +# Generated by Django 3.2.12 on 2022-07-18 13:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0005_uniqueness_constraints"), + ] + + operations = [ + migrations.CreateModel( + name="Dataset", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.TextField()), + ("repository_id", models.IntegerField()), + ("backfilled", models.BooleanField(default=False)), + ], + ), + migrations.AddIndex( + model_name="dataset", + index=models.Index( + fields=["name", "repository_id"], name="timeseries__name_f96a15_idx" + ), + ), + migrations.AddConstraint( + model_name="dataset", + constraint=models.UniqueConstraint( + fields=("name", "repository_id"), name="name_repository_id_unique" + ), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0007_auto_20220727_2011.py b/shared/django_apps/timeseries/migrations/0007_auto_20220727_2011.py new file mode 100644 index 000000000..e1f2fa715 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0007_auto_20220727_2011.py @@ -0,0 +1,45 @@ +# Generated by Django 3.2.12 on 2022-07-27 20:11 + +from django.db import migrations, models + +from shared.django_apps.core.models import DateTimeWithoutTZField + + +class Migration(migrations.Migration): + """ + BEGIN; + -- + -- Add field created_at to dataset + -- + ALTER TABLE "timeseries_dataset" ADD COLUMN "created_at" timestamp NULL; + -- + -- Add field updated_at to dataset + -- + ALTER TABLE "timeseries_dataset" ADD COLUMN "updated_at" timestamp NULL; + -- + -- Alter field id on dataset + -- + COMMIT; + """ + + dependencies = [ + ("timeseries", "0006_auto_20220718_1311"), + ] + + operations = [ + migrations.AddField( + model_name="dataset", + name="created_at", + field=DateTimeWithoutTZField(null=True), + ), + migrations.AddField( + model_name="dataset", + name="updated_at", + field=DateTimeWithoutTZField(null=True), + ), + migrations.AlterField( + model_name="dataset", + name="id", + field=models.AutoField(primary_key=True, serialize=False), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0008_auto_20220802_1838.py b/shared/django_apps/timeseries/migrations/0008_auto_20220802_1838.py new file mode 100644 index 000000000..a71713308 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0008_auto_20220802_1838.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.12 on 2022-08-02 18:38 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0007_auto_20220727_2011"), + ] + + operations = [ + migrations.RunSQL( + f""" + select remove_continuous_aggregate_policy('timeseries_measurement_summary_{name}'); + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_{name}', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '1 h' + ); + """, + ) + for name in ["1day", "7day", "30day"] + ] diff --git a/shared/django_apps/timeseries/migrations/0009_auto_20220804_1305.py b/shared/django_apps/timeseries/migrations/0009_auto_20220804_1305.py new file mode 100644 index 000000000..af0c37d09 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0009_auto_20220804_1305.py @@ -0,0 +1,37 @@ +# Generated by Django 3.2.12 on 2022-08-04 13:05 + +import datetime + +from django.db import migrations + +from shared.django_apps.core.models import DateTimeWithoutTZField + + +class Migration(migrations.Migration): + """ + BEGIN; + -- + -- Alter field created_at on dataset + -- + -- + -- Alter field updated_at on dataset + -- + COMMIT; + """ + + dependencies = [ + ("timeseries", "0008_auto_20220802_1838"), + ] + + operations = [ + migrations.AlterField( + model_name="dataset", + name="created_at", + field=DateTimeWithoutTZField(default=datetime.datetime.now, null=True), + ), + migrations.AlterField( + model_name="dataset", + name="updated_at", + field=DateTimeWithoutTZField(default=datetime.datetime.now, null=True), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0010_auto_20230123_1453.py b/shared/django_apps/timeseries/migrations/0010_auto_20230123_1453.py new file mode 100644 index 000000000..c725e7857 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0010_auto_20230123_1453.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.12 on 2023-01-23 14:53 + +from django.conf import settings +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0009_auto_20220804_1305"), + ] + + # disable real time aggregates + # https://docs.timescale.com/timescaledb/latest/how-to-guides/continuous-aggregates/real-time-aggregates/#real-time-aggregates + + operations = [ + migrations.RunSQL( + f""" + alter materialized view timeseries_measurement_summary_{name} set (timescaledb.materialized_only = true); + """, + ) + for name in ["1day", "7day", "30day"] + if not settings.TIMESERIES_REAL_TIME_AGGREGATES + ] diff --git a/shared/django_apps/timeseries/migrations/0011_measurement_measurable_id.py b/shared/django_apps/timeseries/migrations/0011_measurement_measurable_id.py new file mode 100644 index 000000000..8f21456df --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0011_measurement_measurable_id.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1.7 on 2023-04-28 19:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0010_auto_20230123_1453"), + ] + + operations = [ + migrations.AddField( + model_name="measurement", + name="measurable_id", + field=models.TextField(null=True), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0012_auto_20230501_1929.py b/shared/django_apps/timeseries/migrations/0012_auto_20230501_1929.py new file mode 100644 index 000000000..926ca1706 --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0012_auto_20230501_1929.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1.7 on 2023-05-01 19:29 + +from django.db import migrations + +from shared.django_apps.migration_utils import RiskyRunSQL + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0011_measurement_measurable_id"), + ] + + operations = [ + RiskyRunSQL( + "update timeseries_measurement set measurable_id = case when name = 'coverage' then repo_id::text when name = 'flag_coverage' then flag_id::text end where measurable_id is null;" + ), + ] diff --git a/shared/django_apps/timeseries/migrations/0013_measurable_indexes_caggs.py b/shared/django_apps/timeseries/migrations/0013_measurable_indexes_caggs.py new file mode 100644 index 000000000..1792ecd5e --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0013_measurable_indexes_caggs.py @@ -0,0 +1,220 @@ +# Generated by Django 4.1.7 on 2023-05-05 13:23 + +from django.conf import settings +from django.db import migrations, models + +from shared.django_apps.migration_utils import RiskyAddConstraint, RiskyAddIndex + + +class Migration(migrations.Migration): + """ + BEGIN; + -- + -- Alter field measurable_id on measurement + -- + ALTER TABLE "timeseries_measurement" ALTER COLUMN "measurable_id" SET NOT NULL; + -- + -- Remove index timeseries__owner_i_2cc713_idx from measurement + -- + DROP INDEX IF EXISTS "timeseries__owner_i_2cc713_idx"; + -- + -- Create index timeseries__owner_i_08d6fe_idx on field(s) owner_id, repo_id, measurable_id, branch, name, timestamp of model measurement + -- + CREATE INDEX "timeseries__owner_i_08d6fe_idx" ON "timeseries_measurement" ("owner_id", "repo_id", "measurable_id", "branch", "name", "timestamp"); + -- + -- Create constraint timeseries_measurement_unique on model measurement + -- + ALTER TABLE "timeseries_measurement" ADD CONSTRAINT "timeseries_measurement_unique" UNIQUE ("name", "owner_id", "repo_id", "measurable_id", "commit_sha", "timestamp"); + -- + -- Raw SQL operation + -- + + drop materialized view timeseries_measurement_summary_1day; + create materialized view timeseries_measurement_summary_1day + with (timescaledb.continuous) as + select + owner_id, + repo_id, + measurable_id, + branch, + name, + time_bucket(interval '1 days', timestamp) as timestamp_bin, + avg(value) as value_avg, + max(value) as value_max, + min(value) as value_min, + count(value) as value_count + from timeseries_measurement + group by + owner_id, repo_id, measurable_id, branch, name, timestamp_bin + with no data; + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_1day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '1 h' + ); + + -- + -- Raw SQL operation + -- + + drop materialized view timeseries_measurement_summary_7day; + create materialized view timeseries_measurement_summary_7day + with (timescaledb.continuous) as + select + owner_id, + repo_id, + measurable_id, + branch, + name, + time_bucket(interval '7 days', timestamp) as timestamp_bin, + avg(value) as value_avg, + max(value) as value_max, + min(value) as value_min, + count(value) as value_count + from timeseries_measurement + group by + owner_id, repo_id, measurable_id, branch, name, timestamp_bin + with no data; + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_7day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '1 h' + ); + + -- + -- Raw SQL operation + -- + + drop materialized view timeseries_measurement_summary_30day; + create materialized view timeseries_measurement_summary_30day + with (timescaledb.continuous) as + select + owner_id, + repo_id, + measurable_id, + branch, + name, + time_bucket(interval '30 days', timestamp) as timestamp_bin, + avg(value) as value_avg, + max(value) as value_max, + min(value) as value_min, + count(value) as value_count + from timeseries_measurement + group by + owner_id, repo_id, measurable_id, branch, name, timestamp_bin + with no data; + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_30day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '1 h' + ); + + -- + -- Raw SQL operation + -- + + alter materialized view timeseries_measurement_summary_1day set (timescaledb.materialized_only = true); + + -- + -- Raw SQL operation + -- + + alter materialized view timeseries_measurement_summary_7day set (timescaledb.materialized_only = true); + + -- + -- Raw SQL operation + -- + + alter materialized view timeseries_measurement_summary_30day set (timescaledb.materialized_only = true); + + COMMIT; + """ + + dependencies = [ + ("timeseries", "0012_auto_20230501_1929"), + ] + + operations = ( + [ + migrations.AlterField( + model_name="measurement", + name="measurable_id", + field=models.TextField(), + ), + migrations.RemoveIndex( + model_name="measurement", + name="timeseries__owner_i_2cc713_idx", + ), + RiskyAddIndex( + model_name="measurement", + index=models.Index( + fields=[ + "owner_id", + "repo_id", + "measurable_id", + "branch", + "name", + "timestamp", + ], + name="timeseries__owner_i_08d6fe_idx", + ), + ), + RiskyAddConstraint( + model_name="measurement", + constraint=models.UniqueConstraint( + fields=( + "name", + "owner_id", + "repo_id", + "measurable_id", + "commit_sha", + "timestamp", + ), + name="timeseries_measurement_unique", + ), + ), + ] + + [ + migrations.RunSQL( + f""" + drop materialized view timeseries_measurement_summary_{days}day; + create materialized view timeseries_measurement_summary_{days}day + with (timescaledb.continuous) as + select + owner_id, + repo_id, + measurable_id, + branch, + name, + time_bucket(interval '{days} days', timestamp) as timestamp_bin, + avg(value) as value_avg, + max(value) as value_max, + min(value) as value_min, + count(value) as value_count + from timeseries_measurement + group by + owner_id, repo_id, measurable_id, branch, name, timestamp_bin + with no data; + select add_continuous_aggregate_policy( + 'timeseries_measurement_summary_{days}day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '1 h' + ); + """ + ) + for days in [1, 7, 30] + ] + + [ + migrations.RunSQL( + f""" + alter materialized view timeseries_measurement_summary_{days}day set (timescaledb.materialized_only = true); + """ + ) + for days in [1, 7, 30] + if not settings.TIMESERIES_REAL_TIME_AGGREGATES + ] + ) diff --git a/shared/django_apps/timeseries/migrations/0014_remove_measurement_timeseries_measurement_flag_unique_and_more.py b/shared/django_apps/timeseries/migrations/0014_remove_measurement_timeseries_measurement_flag_unique_and_more.py new file mode 100644 index 000000000..c94dafb7b --- /dev/null +++ b/shared/django_apps/timeseries/migrations/0014_remove_measurement_timeseries_measurement_flag_unique_and_more.py @@ -0,0 +1,39 @@ +import django.utils.timezone +from django.db import migrations + +from shared.django_apps.core.models import DateTimeWithoutTZField +from shared.django_apps.migration_utils import ( # Generated by Django 4.1.7 on 2023-05-15 20:46 + RiskyRemoveConstraint, + RiskyRemoveField, +) + + +class Migration(migrations.Migration): + dependencies = [ + ("timeseries", "0013_measurable_indexes_caggs"), + ] + + operations = [ + RiskyRemoveConstraint( + model_name="measurement", + name="timeseries_measurement_flag_unique", + ), + RiskyRemoveConstraint( + model_name="measurement", + name="timeseries_measurement_noflag_unique", + ), + RiskyRemoveField( + model_name="measurement", + name="flag_id", + ), + migrations.AlterField( + model_name="dataset", + name="created_at", + field=DateTimeWithoutTZField(default=django.utils.timezone.now, null=True), + ), + migrations.AlterField( + model_name="dataset", + name="updated_at", + field=DateTimeWithoutTZField(default=django.utils.timezone.now, null=True), + ), + ] diff --git a/shared/django_apps/timeseries/migrations/__init__.py b/shared/django_apps/timeseries/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/shared/django_apps/timeseries/models.py b/shared/django_apps/timeseries/models.py new file mode 100644 index 000000000..16ac1548d --- /dev/null +++ b/shared/django_apps/timeseries/models.py @@ -0,0 +1,186 @@ +from datetime import datetime, timedelta +from enum import Enum + +import django.db.models as models +from django.utils import timezone +from django_prometheus.models import ExportModelOperationsMixin + +from shared.django_apps.core.models import DateTimeWithoutTZField + +TIMESERIES_APP_LABEL = "timeseries" + + +class Interval(Enum): + INTERVAL_1_DAY = 1 + INTERVAL_7_DAY = 7 + INTERVAL_30_DAY = 30 + + +class MeasurementName(Enum): + COVERAGE = "coverage" + FLAG_COVERAGE = "flag_coverage" + COMPONENT_COVERAGE = "component_coverage" + # For tracking the entire size of a bundle report by its name + BUNDLE_ANALYSIS_REPORT_SIZE = "bundle_analysis_report_size" + # For tracking the size of a category of assets of a bundle report by its name + BUNDLE_ANALYSIS_JAVASCRIPT_SIZE = "bundle_analysis_javascript_size" + BUNDLE_ANALYSIS_STYLESHEET_SIZE = "bundle_analysis_stylesheet_size" + BUNDLE_ANALYSIS_FONT_SIZE = "bundle_analysis_font_size" + BUNDLE_ANALYSIS_IMAGE_SIZE = "bundle_analysis_image_size" + # For tracking individual asset size via its UUID + BUNDLE_ANALYSIS_ASSET_SIZE = "bundle_analysis_asset_size" + + +class Measurement(ExportModelOperationsMixin("timeseries.measurement"), models.Model): + # TimescaleDB requires that `timestamp` be part of every index (since data is + # partitioned by `timestamp`). Since an auto-incrementing primary key would + # not satisfy this requirement we can make `timestamp` the primary key. + # `timestamp` may not be unique though so we drop the uniqueness constraint in + # a migration. + timestamp = models.DateTimeField(null=False, primary_key=True) + + owner_id = models.BigIntegerField(null=False) + repo_id = models.BigIntegerField(null=False) + measurable_id = models.TextField(null=False) + branch = models.TextField(null=True) + + # useful for updating a measurement if needed + commit_sha = models.TextField(null=True) + + # the name of the measurement (i.e. "coverage") + name = models.TextField(null=False, blank=False) + value = models.FloatField(null=False) + + class Meta: + app_label = TIMESERIES_APP_LABEL + indexes = [ + # for querying measurements + models.Index( + fields=[ + "owner_id", + "repo_id", + "measurable_id", + "branch", + "name", + "timestamp", + ] + ), + ] + constraints = [ + # for updating measurements + models.UniqueConstraint( + fields=[ + "name", + "owner_id", + "repo_id", + "measurable_id", + "commit_sha", + "timestamp", + ], + name="timeseries_measurement_unique", + ), + ] + + +class MeasurementSummary( + ExportModelOperationsMixin("timeseries.measurement_summary"), models.Model +): + timestamp_bin = models.DateTimeField(primary_key=True) + owner_id = models.BigIntegerField() + repo_id = models.BigIntegerField() + measurable_id = models.TextField() + branch = models.TextField() + name = models.TextField() + value_avg = models.FloatField() + value_max = models.FloatField() + value_min = models.FloatField() + value_count = models.FloatField() + + @classmethod + def agg_by(cls, interval: Interval) -> models.Manager: + model_classes = { + Interval.INTERVAL_1_DAY: MeasurementSummary1Day, + Interval.INTERVAL_7_DAY: MeasurementSummary7Day, + Interval.INTERVAL_30_DAY: MeasurementSummary30Day, + } + + model_class = model_classes.get(interval) + if not model_class: + raise ValueError(f"cannot aggregate by '{interval}'") + return model_class.objects + + class Meta: + app_label = TIMESERIES_APP_LABEL + abstract = True + # these are backed by TimescaleDB "continuous aggregates" + # (materialized views) + managed = False + ordering = ["timestamp_bin"] + + +class MeasurementSummary1Day(MeasurementSummary): + class Meta(MeasurementSummary.Meta): + db_table = "timeseries_measurement_summary_1day" + + +# Timescale's origin for time buckets is Monday 2000-01-03 +# Weekly aggregate bins will thus be Monday-Sunday +class MeasurementSummary7Day(MeasurementSummary): + class Meta(MeasurementSummary.Meta): + db_table = "timeseries_measurement_summary_7day" + + +# Timescale's origin for time buckets is 2000-01-03 +# 30 day offsets will be aligned on that origin +class MeasurementSummary30Day(MeasurementSummary): + class Meta(MeasurementSummary.Meta): + db_table = "timeseries_measurement_summary_30day" + + +class Dataset(ExportModelOperationsMixin("timeseries.dataset"), models.Model): + id = models.AutoField(primary_key=True) + + # this will likely correspond to a measurement name above + name = models.TextField(null=False, blank=False) + + # not a true foreign key since repositories are in a + # different database + repository_id = models.IntegerField(null=False) + + # indicates whether the backfill task has completed for this dataset + # TODO: We're not really using this field anymore as a backfill task takes very long for this to be populated when finished. + # The solution would be to somehow have a celery task return when it's done, hence the TODO + backfilled = models.BooleanField(null=False, default=False) + + created_at = DateTimeWithoutTZField(default=timezone.now, null=True) + updated_at = DateTimeWithoutTZField(default=timezone.now, null=True) + + class Meta: + app_label = TIMESERIES_APP_LABEL + indexes = [ + models.Index( + fields=[ + "name", + "repository_id", + ] + ), + ] + constraints = [ + models.UniqueConstraint( + fields=[ + "name", + "repository_id", + ], + name="name_repository_id_unique", + ), + ] + + def is_backfilled(self) -> bool: + """ + Returns `False` for an hour after creation. + + TODO: this should eventually read `self.backfilled` which will be updated via the worker + """ + if not self.created_at: + return False + return datetime.now() > self.created_at + timedelta(hours=1) diff --git a/shared/django_apps/timeseries/tests/__init__.py b/shared/django_apps/timeseries/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/shared/django_apps/timeseries/tests/factories.py b/shared/django_apps/timeseries/tests/factories.py new file mode 100644 index 000000000..31371cbad --- /dev/null +++ b/shared/django_apps/timeseries/tests/factories.py @@ -0,0 +1,28 @@ +import random +from datetime import datetime + +import factory +from factory.django import DjangoModelFactory + +from shared.django_apps.timeseries import models + + +class MeasurementFactory(DjangoModelFactory): + class Meta: + model = models.Measurement + + owner_id = 1 + repo_id = 1 + name = "testing" + branch = "master" + value = factory.LazyAttribute(lambda _: random.random() * 1000) + timestamp = factory.LazyAttribute(lambda _: datetime.now()) + + +class DatasetFactory(DjangoModelFactory): + class Meta: + model = models.Dataset + + repository_id = 1 + name = "testing" + backfilled = False diff --git a/shared/django_apps/timeseries/tests/test_db.py b/shared/django_apps/timeseries/tests/test_db.py new file mode 100644 index 000000000..66d845e01 --- /dev/null +++ b/shared/django_apps/timeseries/tests/test_db.py @@ -0,0 +1,31 @@ +from unittest.mock import patch + +import pytest +from django.conf import settings +from django.db import connections +from django.test import TransactionTestCase + + +@pytest.mark.skipif( + not settings.TIMESERIES_ENABLED, reason="requires timeseries data storage" +) +class DatabaseTests(TransactionTestCase): + databases = {"timeseries"} + + @patch("django.db.backends.postgresql.base.DatabaseWrapper.is_usable") + def test_db_reconnect(self, is_usable): + timeseries_database_engine = settings.DATABASES["timeseries"]["ENGINE"] + settings.DATABASES["timeseries"]["ENGINE"] = "codecov.db" + + is_usable.return_value = True + + with connections["timeseries"].cursor() as cursor: + cursor.execute("SELECT 1") + + is_usable.return_value = False + + # it should reconnect and not raise an error + with connections["timeseries"].cursor() as cursor: + cursor.execute("SELECT 1") + + settings.DATABASES["timeseries"]["ENGINE"] = timeseries_database_engine diff --git a/shared/django_apps/timeseries/tests/test_models.py b/shared/django_apps/timeseries/tests/test_models.py new file mode 100644 index 000000000..413ad3594 --- /dev/null +++ b/shared/django_apps/timeseries/tests/test_models.py @@ -0,0 +1,132 @@ +from datetime import datetime, timezone + +import pytest +from django.conf import settings +from django.test import TransactionTestCase +from freezegun import freeze_time + +from shared.django_apps.timeseries.models import Dataset, Interval, MeasurementSummary + +from .factories import DatasetFactory, MeasurementFactory + + +@pytest.mark.skipif( + not settings.TIMESERIES_ENABLED, reason="requires timeseries data storage" +) +class MeasurementTests(TransactionTestCase): + databases = {"timeseries"} + + def test_measurement_agg_1day(self): + MeasurementFactory( + timestamp=datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc), value=1 + ) + MeasurementFactory( + timestamp=datetime(2022, 1, 1, 1, 0, 0, tzinfo=timezone.utc), value=2 + ) + MeasurementFactory( + timestamp=datetime(2022, 1, 1, 1, 0, 1, tzinfo=timezone.utc), value=3 + ) + MeasurementFactory( + timestamp=datetime(2022, 1, 2, 0, 0, 0, tzinfo=timezone.utc), value=4 + ) + MeasurementFactory( + timestamp=datetime(2022, 1, 2, 0, 1, 0, tzinfo=timezone.utc), value=5 + ) + + results = MeasurementSummary.agg_by(Interval.INTERVAL_1_DAY).all() + + assert len(results) == 2 + assert results[0].value_avg == 2 + assert results[0].value_min == 1 + assert results[0].value_max == 3 + assert results[0].value_count == 3 + assert results[1].value_avg == 4.5 + assert results[1].value_min == 4 + assert results[1].value_max == 5 + assert results[1].value_count == 2 + + def test_measurement_agg_7day(self): + # Week 1: Monday, Tuesday, Sunday + MeasurementFactory(timestamp=datetime(2022, 1, 3), value=1) + MeasurementFactory(timestamp=datetime(2022, 1, 4), value=2) + MeasurementFactory(timestamp=datetime(2022, 1, 9), value=3) + + # Week 2: Monday, Sunday + MeasurementFactory(timestamp=datetime(2022, 1, 10), value=4) + MeasurementFactory(timestamp=datetime(2022, 1, 16), value=5) + + results = MeasurementSummary.agg_by(Interval.INTERVAL_7_DAY).all() + + assert len(results) == 2 + assert results[0].value_avg == 2 + assert results[0].value_min == 1 + assert results[0].value_max == 3 + assert results[0].value_count == 3 + assert results[1].value_avg == 4.5 + assert results[1].value_min == 4 + assert results[1].value_max == 5 + assert results[1].value_count == 2 + + def test_measurement_agg_30day(self): + # Timescale's origin for time buckets is 2000-01-03 + # 30 day offsets will be aligned on that origin + + MeasurementFactory(timestamp=datetime(2000, 1, 3), value=1) + MeasurementFactory(timestamp=datetime(2000, 1, 4), value=2) + MeasurementFactory(timestamp=datetime(2000, 2, 1), value=3) + + MeasurementFactory(timestamp=datetime(2000, 2, 2), value=4) + MeasurementFactory(timestamp=datetime(2000, 2, 11), value=5) + + results = MeasurementSummary.agg_by(Interval.INTERVAL_30_DAY).all() + + assert len(results) == 2 + assert results[0].value_avg == 2 + assert results[0].value_min == 1 + assert results[0].value_max == 3 + assert results[0].value_count == 3 + assert results[1].value_avg == 4.5 + assert results[1].value_min == 4 + assert results[1].value_max == 5 + assert results[1].value_count == 2 + + def test_measurement_agg_invalid(self): + with self.assertRaises(ValueError): + MeasurementSummary.agg_by("invalid").all() + + +@pytest.mark.skipif( + not settings.TIMESERIES_ENABLED, reason="requires timeseries data storage" +) +class DatasetTests(TransactionTestCase): + databases = {"timeseries"} + + @freeze_time("2022-01-01T01:00:01+0000") + def test_is_backfilled_true(self): + dataset = DatasetFactory() + + Dataset.objects.filter(pk=dataset.pk).update( + created_at=datetime(2022, 1, 1, 0, 0, 0) + ) + + dataset.refresh_from_db() + assert dataset.is_backfilled() == True + + @freeze_time("2022-01-01T00:59:59+0000") + def test_is_backfilled_false(self): + dataset = DatasetFactory() + + Dataset.objects.filter(pk=dataset.pk).update( + created_at=datetime(2022, 1, 1, 0, 0, 0) + ) + + dataset.refresh_from_db() + assert dataset.is_backfilled() == False + + def test_is_backfilled_no_created_at(self): + dataset = DatasetFactory() + + Dataset.objects.filter(pk=dataset.pk).update(created_at=None) + + dataset.refresh_from_db() + assert dataset.is_backfilled() == False