Replace table jobset_input_alts by a JSONB column

We could have used the classic entity-attribute-value (EAV) model instead but that would involve lots of self-joins and we'd also need to put every single property value into a series of text fields. If we want to have nesting, like we have for the "attrs" value for the "build" and "sysbuild" types we'd have to introduce even more madness by even creating yet another EAV table. Of course, we could avoid the nesting by serializing the stuff into the text fields, but serialization would be something like JSON anyway and we have a field type for that since PostgreSQL 9.3 and even a JSONB type in version 9.4 which is even indexable. The really nasty part here was getting every jobset input type into JSON here, because during a schema upgrade we can't rely on PL/Perl being available and everything outside seems to be hackish by modifying the schema updater code to run a Perl function just for upgrade 51 (which also needs to be retained). Right now, this only updates the database and the DBIx schema (also with deflation/inflation of properties from/to JSON) but not the code, which is still using jobset_input_alts and will obviously fail now. Signed-off-by: aszlig <aszlig@redmoonstudios.org> Issue: NixOS#279
aszlig · Aug 4, 2016 · e0a2656 · e0a2656
1 parent d00399b
commit e0a2656
Show file tree

Hide file tree

Showing 4 changed files with 182 additions and 174 deletions.
diff --git a/src/lib/Hydra/Schema/JobsetInputAlts.pm b/src/lib/Hydra/Schema/JobsetInputAlts.pm
diff --git a/src/lib/Hydra/Schema/JobsetInputs.pm b/src/lib/Hydra/Schema/JobsetInputs.pm
@@ -57,6 +57,11 @@ __PACKAGE__->table("jobset_inputs");
   data_type: 'text'
   is_nullable: 0
 
+=head2 properties
+
+  data_type: 'jsonb'
+  is_nullable: 0
+
 =head2 email_responsible
 
   data_type: 'integer'
@@ -74,6 +79,8 @@ __PACKAGE__->add_columns(
   { data_type => "text", is_nullable => 0 },
   "type",
   { data_type => "text", is_nullable => 0 },
+  "properties",
+  { data_type => "jsonb", is_nullable => 0 },
   "email_responsible",
   { data_type => "integer", default_value => 0, is_nullable => 0 },
 );
@@ -111,37 +118,13 @@ __PACKAGE__->belongs_to(
   { is_deferrable => 0, on_delete => "CASCADE", on_update => "CASCADE" },
 );
 
-=head2 jobset_input_alts
-
-Type: has_many
-
-Related object: L<Hydra::Schema::JobsetInputAlts>
-
-=cut
-
-__PACKAGE__->has_many(
-  "jobset_input_alts",
-  "Hydra::Schema::JobsetInputAlts",
-  {
-    "foreign.input"   => "self.name",
-    "foreign.jobset"  => "self.jobset",
-    "foreign.project" => "self.project",
-  },
-  undef,
-);
-
-
-# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-07-07 08:50:21
-# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sLMvhyXlxLIzfAGTsVMF0A
 
-my %hint = (
-    relations => {
-        "jobset_input_alts" => "value"
-    }
-);
+# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-07-08 02:55:18
+# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:OZ3l6psoJLYr8ct9lDPJxw
 
-sub json_hint {
-    return \%hint;
-}
+__PACKAGE__->inflate_column(properties => {
+    inflate => sub { JSON::decode_json(shift) },
+    deflate => sub { JSON::encode_json(shift) },
+});
 
 1;
diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql
@@ -94,27 +94,13 @@ create table jobset_inputs (
     jobset        text not null,
     name          text not null,
     type          text not null, -- "svn", "path", "uri", "string", "boolean", "nix"
+    properties    jsonb not null,
     email_responsible integer not null default 0, -- whether to email committers to this input who change a build
     primary key   (project, jobset, name),
     foreign key   (project, jobset) references jobsets(project, name) on delete cascade on update cascade
 );
 
 
-create table jobset_input_alts (
-    project       text not null,
-    jobset        text not null,
-    input         text not null,
-    alt_nr         integer not null,
-
-    -- urgh
-    value         text, -- for most types, a URI; for 'path', an absolute path; for 'string', an arbitrary value
-    revision      text, -- for repositories
-
-    primary key   (project, jobset, input, alt_nr),
-    foreign key   (project, jobset, input) references jobset_inputs(project, jobset, name) on delete cascade on update cascade
-);
-
-
 create table jobs (
     project       text not null,
     jobset        text not null,
@@ -616,8 +602,6 @@ create index index_cached_subversion_inputs_on_uri_revision on cached_subversion
 create index index_cached_bazaar_inputs_on_uri_revision on cached_bazaar_inputs(uri, revision);
 create index index_jobset_eval_members_on_build on jobset_eval_members(build);
 create index index_jobset_eval_members_on_eval on jobset_eval_members(eval);
-create index index_jobset_input_alts_on_input on jobset_input_alts(project, jobset, input);
-create index index_jobset_input_alts_on_jobset on jobset_input_alts(project, jobset);
 create index index_projects_on_enabled on projects(enabled);
 create index index_release_members_on_build on release_members(build);
 

diff --git a/src/sql/upgrade-51.sql b/src/sql/upgrade-51.sql
@@ -0,0 +1,168 @@
+create function fail_on_old_pgsql_version() returns boolean as $$
+declare
+    human_ver text;
+    machine_ver integer;
+begin
+    select setting into machine_ver
+        from pg_settings where name = 'server_version_num';
+    if machine_ver < 90400 then
+        select setting into human_ver
+            from pg_settings where name = 'server_version';
+        raise exception using message = 'You need at least PostgreSQL version'
+                                     || ' 9.4 in order to upgrade to the new'
+                                     || ' Hydra schema version. Unfortunately'
+                                     || ' you are running version '
+                                     || human_ver || ' right now. Please'
+                                     || ' update your PostgreSQL server.';
+    end if;
+    return null;
+end;
+$$ language plpgsql;
+
+select fail_on_old_pgsql_version();
+drop function fail_on_old_pgsql_version();
+
+-- This is a version of "parseJobName" from src/lib/Hydra/Helper/AddBuilds.pm,
+-- which used an overly complicated regular expression to parse the string into
+-- its components (a bit restructured/annotated to make it easier to read):
+--
+--   /^ (?: (?:
+--    (
+--      # project name:
+--      (?:[A-Za-z_][A-Za-z0-9-_]*)
+--    ) : )? (
+--      # jobset name:
+--      (?:[A-Za-z_][A-Za-z0-9-_\.]*)
+--    ) : )? (
+--      # job name:
+--      (?:(?:[A-Za-z_][A-Za-z0-9-_]*)(?:\\.(?:[A-Za-z_][A-Za-z0-9-_]*))*)
+--    ) \s*
+--      (\[ \s* (
+--        ([\w]+) (?{ $key = $^N; }) \s* = \s* \"
+--        ([\w\-]+) (?{ $attrs{$key} = $^N; }) \"
+--      \s* )* \])?
+--    $
+--   /x
+--
+-- The original description of "parseJobName" was:
+--
+--   Parse a job specification of the form `<project>:<jobset>:<job>
+--   [attrs]'.  The project, jobset and attrs may be omitted.  The
+--   attrs have the form `name = "value"'.
+--
+-- Fortunately, we only need to take care of valid specifications and we
+-- thankfully no longer need to put the results into a string again but
+-- rather into a JSONB object.
+--
+-- So to simplify, first a few observations:
+--
+--  * We have : as a separator between the fields within <project>,
+--    <jobset> and <job>, there are no : allowed.
+--  * The [attrs] value starts with a '[', and it isn't allowed in
+--    <project>, <jobset> and <job> either.
+--  * Fortunately, the attribute keys and values don't allow '"' or '['
+--    either.
+--
+-- This observation allows us to match very strictly solely on the basis
+-- of the mentioned delimiters.
+create function parse_jobname(value text) returns jsonb as $$
+declare
+    attrs_raw text[];
+    attrs_regex text;
+    attrs jsonb;
+
+    spec_raw text[];
+    spec_parts text[];
+    spec jsonb;
+begin
+    spec_raw := regexp_matches(value, E'^\\s*([^[]+)');
+    attrs_raw := regexp_matches(value, E'\\[(.*)\\]\\s*$');
+    attrs_regex := E'\\s*([^=]+?)\\s*=\\s*"([^"]+)"';
+
+    spec_parts := regexp_split_to_array(spec_raw[1], E'\\s*:\\s*');
+    case array_length(spec_parts, 1)
+        -- Could be more beautiful but this is less error-prone:
+        when 1 then spec := jsonb_build_object('job',     spec_parts[1]);
+        when 2 then spec := jsonb_build_object('jobset',  spec_parts[1],
+                                               'job',     spec_parts[2]);
+        when 3 then spec := jsonb_build_object('project', spec_parts[1],
+                                               'jobset',  spec_parts[2],
+                                               'job',     spec_parts[3]);
+        else raise exception 'Unable to parse job name "%".', value;
+    end case;
+
+    if array_length(attrs_raw, 1) > 0 then
+        select jsonb_object_agg(m[1], m[2]) into attrs
+        from regexp_matches(attrs_raw[1], attrs_regex, 'g') as m;
+        if attrs is not null then
+            spec := spec || jsonb_build_object('attrs', attrs);
+        end if;
+    end if;
+
+    return spec;
+end;
+$$ language plpgsql;
+
+-- Just decompose the stringly typed values for the SCM types.
+-- This is very hacky and always converts the last argument to boolean.
+-- Sole reason for this is to support the deepClone argument for Git types.
+create function unscm(value text, n1 text, n2 text) returns jsonb as $$
+declare
+    fields text[];
+    result jsonb;
+begin
+    fields := regexp_split_to_array(value, ' ');
+    result := jsonb_build_object('uri', fields[1]);
+    if array_length(fields, 1) > 1 then
+        result := result || jsonb_build_object(n1, fields[2]);
+    end if;
+    if array_length(fields, 1) > 2 and n2 is not null then
+        result := result || jsonb_build_object(n2, char_length(fields[3]) > 0);
+    end if;
+    return result;
+end;
+$$ language plpgsql;
+
+-- Yes, this function can be done inline as well, but in PL/pgSQL we can use
+-- multiple case matches in one single line.
+create function migrate_jobset_alts(type text, val text) returns jsonb as $$
+begin
+    case type
+        when 'boolean' then return json_build_object('value', val::boolean);
+        when 'build', 'sysbuild' then return parse_jobname(val);
+        when 'bzr', 'bzr-checkout' then return jsonb_build_object('uri', val);
+        when 'darcs' then return jsonb_build_object('uri', val);
+        when 'eval' then return jsonb_build_object('number', val::integer);
+        when 'git' then return unscm(val, 'branch', 'deepClone');
+        when 'hg' then return unscm(val, 'id', null);
+        when 'string' then return json_build_object('value', val);
+        when 'nix', 'path' then return json_build_object('value', val);
+        when 'svn', 'svn-checkout' then return unscm(val, 'revision', null);
+        else raise warning 'Unknown jobset type "%", treating "%" as text.',
+                           type, val;
+             return json_build_object('value', val);
+    end case;
+end;
+$$ language plpgsql;
+
+alter table jobset_inputs add column properties jsonb null;
+
+update jobset_inputs j set properties = (
+    select migrate_jobset_alts(i.type, a.value)
+    from jobset_inputs i
+    left join jobset_input_alts a on i.name = a.input
+                                 and i.project = a.project
+                                 and i.jobset = a.jobset
+    where i.project = j.project
+      and i.jobset = j.jobset
+      and i.name = j.name
+      and alt_nr = 0
+);
+
+drop table jobset_input_alts;
+
+alter table jobset_inputs alter column properties set not null;
+
+drop function migrate_jobset_alts(text, text);
+drop function unscm(text, text, text);
+drop function parse_jobname(text);