From 25a56c879d0f19b82776e2cee4026f61c49e86a4 Mon Sep 17 00:00:00 2001 From: ragi Date: Fri, 29 Mar 2019 19:47:43 +0900 Subject: [PATCH 1/4] Modify redshift adapter to support late binding view --- app/batches/synchronize_data_sources.rb | 14 +++++++ .../data_source_adapters/redshift_adapter.rb | 40 +++++++++++++------ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/app/batches/synchronize_data_sources.rb b/app/batches/synchronize_data_sources.rb index a4a7e1d..7bf0752 100644 --- a/app/batches/synchronize_data_sources.rb +++ b/app/batches/synchronize_data_sources.rb @@ -42,6 +42,7 @@ def self.import_table_memo!(schema_memo, table_memos, source_table) columns = source_table.columns import_table_memo_raw_dataset!(table_memo, source_table, columns) + import_view_query!(table_memo, source_table) column_names = columns.map(&:name) column_memos.reject {|memo| column_names.include?(memo.name) }.each {|memo| memo.update!(linked: false) } @@ -82,4 +83,17 @@ def self.import_table_memo_raw_dataset_rows!(table_memo, source_table, columns) end end private_class_method :import_table_memo_raw_dataset_rows! + + def self.import_view_query!(table_memo, source_table) + query = source_table.fetch_view_query + query_plan = source_table.fetch_view_query_plan + if query && query_plan + ViewMetaDatum.find_or_initialize_by(table_memo_id: table_memo.id) do |meta_data| + meta_data.query = query + meta_data.explain = query_plan + meta_data.save + end + end + end + private_class_method :import_view_query! end diff --git a/app/models/data_source_adapters/redshift_adapter.rb b/app/models/data_source_adapters/redshift_adapter.rb index fd07b79..9574c40 100644 --- a/app/models/data_source_adapters/redshift_adapter.rb +++ b/app/models/data_source_adapters/redshift_adapter.rb @@ -6,9 +6,14 @@ def fetch_table_names query_result = source_base_class.connection.query(<<~SQL, 'SCHEMA') SELECT table_schema, table_name, table_type FROM ( - SELECT table_schema, table_name, table_type FROM svv_tables WHERE table_schema = ANY (current_schemas(false)) or table_type = 'EXTERNAL TABLE' + SELECT table_schema, table_name, table_type + FROM svv_tables + WHERE table_schema = ANY (current_schemas(false)) or table_type = 'EXTERNAL TABLE' + UNION - SELECT schemaname as table_schema, viewname AS table_name, 'VIEW' FROM pg_views WHERE schemaname = ANY (current_schemas(false)) + + SELECT DISTINCT view_schema, view_name, 'LATE BINDING' + FROM pg_get_late_binding_view_cols() cols(view_schema name, view_name name, col_name name, col_type varchar, col_num int) ) tables ORDER BY table_schema, table_name; SQL @@ -18,32 +23,29 @@ def fetch_table_names @base_table_names = table_groups['BASE TABLE'] || [] @external_table_names = table_groups['EXTERNAL TABLE'] || [] @view_names = table_groups['VIEW'] || [] + @late_binding_view_names = table_groups['LATE BINDING'] || [] - (@base_table_names + @external_table_names + @view_names).uniq + (@base_table_names + @external_table_names + @view_names + @late_binding_view_names).uniq rescue ActiveRecord::ActiveRecordError, PG::Error => e raise DataSource::ConnectionBad.new(e) end def fetch_columns(table) adapter = connection.pool.connection - if spectrum?(table) - connection.query(<<~SQL, 'COLUMN').map { |name, sql_type| Column.new(name, sql_type, "NULL", true) } - SELECT columnname, external_type FROM svv_external_columns WHERE tablename = '#{table.table_name}'; - SQL - else - connection.columns(table.full_table_name).map { |c| Column.new(c.name, c.sql_type, adapter.quote(c.default), c.null) } - end + connection.query(<<~SQL, 'COLUMN').map { |name, sql_type, default, nullable| Column.new(name, sql_type, adapter.quote(default), nullable || false) } + SELECT column_name, data_type, column_default, is_nullable FROM svv_columns WHERE table_schema = '#{table.schema_name}' and table_name = '#{table.table_name}'; + SQL rescue ActiveRecord::ActiveRecordError, Mysql2::Error, PG::Error => e raise DataSource::ConnectionBad.new(e) end def fetch_rows(table, limit) - return [] if spectrum?(table) + return [] if spectrum?(table) || late_binding_view?(table) super end def fetch_count(table) - return 0 if spectrum?(table) + return 0 if spectrum?(table) || late_binding_view?(table) super end @@ -61,5 +63,19 @@ def spectrum?(table) raise "@external_table_names must be defined, execute fetch_table befor spectrum?" unless instance_variable_defined?(:@external_table_names) @external_table_names.include?(table.full_table_name.split('.')) end + + def view?(table) + base_view?(table) || late_binding_view?(table) + end + + def base_view?(table) + raise "@view_names must be defined, execute fetch_table befor view?" unless instance_variable_defined?(:@view_names) + @view_names.include?(table.full_table_name.split('.')) + end + + def late_binding_view?(table) + raise "@late_binding_view_names must be defined, execute fetch_table befor late_binding_view?" unless instance_variable_defined?(:@late_binding_view_names) + @late_binding_view_names.include?(table.full_table_name.split('.')) + end end end From 9b98462a8e1b8467e7bdcec21db00719e2e77a22 Mon Sep 17 00:00:00 2001 From: ragi Date: Fri, 29 Mar 2019 19:48:54 +0900 Subject: [PATCH 2/4] Add view meta data model to show view query at table memo page --- app/controllers/table_memos_controller.rb | 1 + .../data_source_adapters/redshift_adapter.rb | 14 ++++++++ .../data_source_adapters/standard_adapter.rb | 11 +++++++ app/models/data_source_table.rb | 8 +++++ app/models/table_memo.rb | 1 + app/models/view_meta_datum.rb | 3 ++ app/views/table_memos/_raw_dataset.html.haml | 29 ++++++++++++++++ .../table_memos/_view_meta_data.html.haml | 14 ++++++++ app/views/table_memos/show.html.haml | 33 +++---------------- db/view_meta_data.schema | 6 ++++ 10 files changed, 91 insertions(+), 29 deletions(-) create mode 100644 app/models/view_meta_datum.rb create mode 100644 app/views/table_memos/_raw_dataset.html.haml create mode 100644 app/views/table_memos/_view_meta_data.html.haml create mode 100644 db/view_meta_data.schema diff --git a/app/controllers/table_memos_controller.rb b/app/controllers/table_memos_controller.rb index b3abb24..dc23e8f 100644 --- a/app/controllers/table_memos_controller.rb +++ b/app/controllers/table_memos_controller.rb @@ -15,6 +15,7 @@ def show(database_name, schema_name, name) @raw_dataset_columns = @raw_dataset.columns.order(:position) @raw_dataset_rows = @raw_dataset.rows.pluck(:row) end + @view_meta_data = @table_memo.view_meta_data end def edit(id) diff --git a/app/models/data_source_adapters/redshift_adapter.rb b/app/models/data_source_adapters/redshift_adapter.rb index 9574c40..1225cbe 100644 --- a/app/models/data_source_adapters/redshift_adapter.rb +++ b/app/models/data_source_adapters/redshift_adapter.rb @@ -49,6 +49,20 @@ def fetch_count(table) super end + def fetch_view_query(table) + return nil unless view?(table) + adapter = connection.pool.connection + connection.query(<<~SQL, 'VIEW QUERY').join("\n") + SELECT definition FROM pg_views WHERE schemaname = '#{table.schema_name}' and viewname = '#{table.table_name}'; + SQL + end + + def fetch_view_query_plan(query) + return nil if query.blank? + query = query.sub(/create view .*?as/, '').sub('with no schema binding', '') + super + end + private def group_by_table_type(records) diff --git a/app/models/data_source_adapters/standard_adapter.rb b/app/models/data_source_adapters/standard_adapter.rb index cf53bfc..acc0e7c 100644 --- a/app/models/data_source_adapters/standard_adapter.rb +++ b/app/models/data_source_adapters/standard_adapter.rb @@ -33,6 +33,17 @@ def fetch_count(table) raise DataSource::ConnectionBad.new(e) end + def fetch_view_query(view) + raise NotImplementedError + end + + def fetch_view_query_plan(query) + adapter = connection.pool.connection + connection.query("EXPLAIN #{query}", 'EXPLAIN').join("\n") + rescue ActiveRecord::ActiveRecordError, Mysql2::Error, PG::Error => e + raise DataSource::ConnectionBad.new(e) + end + def source_base_class return DynamicTable.const_get(source_base_class_name) if DynamicTable.const_defined?(source_base_class_name) diff --git a/app/models/data_source_table.rb b/app/models/data_source_table.rb index e9e6eab..5e998ea 100644 --- a/app/models/data_source_table.rb +++ b/app/models/data_source_table.rb @@ -22,4 +22,12 @@ def fetch_rows(limit=20) def fetch_count @data_source.access_logging { data_source_adapter.fetch_count(self) } end + + def fetch_view_query + @view_query ||= @data_source.access_logging { data_source_adapter.fetch_view_query(self) } + end + + def fetch_view_query_plan + @view_query_plan ||= @data_source.access_logging { data_source_adapter.fetch_view_query_plan(@view_query) } + end end diff --git a/app/models/table_memo.rb b/app/models/table_memo.rb index 3794f51..177e175 100644 --- a/app/models/table_memo.rb +++ b/app/models/table_memo.rb @@ -7,6 +7,7 @@ class TableMemo < ApplicationRecord belongs_to :schema_memo has_one :raw_dataset, class_name: "TableMemoRawDataset", dependent: :destroy + has_one :view_meta_data, class_name: "ViewMetaDatum", dependent: :destroy has_many :column_memos, dependent: :destroy has_many :logs, -> { order(:id) }, class_name: "TableMemoLog", dependent: :destroy diff --git a/app/models/view_meta_datum.rb b/app/models/view_meta_datum.rb new file mode 100644 index 0000000..a69acd5 --- /dev/null +++ b/app/models/view_meta_datum.rb @@ -0,0 +1,3 @@ +class ViewMetaDatum < ApplicationRecord + belongs_to :table_memo +end diff --git a/app/views/table_memos/_raw_dataset.html.haml b/app/views/table_memos/_raw_dataset.html.haml new file mode 100644 index 0000000..82456e9 --- /dev/null +++ b/app/views/table_memos/_raw_dataset.html.haml @@ -0,0 +1,29 @@ +.box.data-box + .box-header + %h3.box-title Data + .box-body + - if @raw_dataset + %table.table.table-hover.table-bordered.table-striped.text-sm{ role: "grid" } + %tr + - @raw_dataset_columns.each do |column| + %th #{column.name} (#{column.sql_type}) + - if @table_memo.masked? + %tr + %td.text-center{ colspan: @raw_dataset_columns.size } + = t("masked_table") + - else + - database_name = @table_memo.database_memo.name + - table_name = @table_memo.name + - @raw_dataset_rows.each do |row| + %tr + - Array.wrap(row).each_with_index do |value, i| + - column = @raw_dataset_columns[i] + %td + - if MaskedDatum.masked_column?(database_name, table_name, column.name) + = t("masked_text") + - else + = value + .pull-right.block + #{@raw_dataset_rows.try(:size).to_i} / #{@raw_dataset.count || '?' } records + - else + = t("no_preview_dataset") diff --git a/app/views/table_memos/_view_meta_data.html.haml b/app/views/table_memos/_view_meta_data.html.haml new file mode 100644 index 0000000..88b0724 --- /dev/null +++ b/app/views/table_memos/_view_meta_data.html.haml @@ -0,0 +1,14 @@ +.box.data-box + .box-header + %h3.box-title View Query + .box-body + %pre + %code= @view_meta_data.query + +.box.data-box + .box-header + %h3.box-title View Query Plan + .box-body + %pre + %code= @view_meta_data.explain + diff --git a/app/views/table_memos/show.html.haml b/app/views/table_memos/show.html.haml index 6fe8953..4ceba36 100644 --- a/app/views/table_memos/show.html.haml +++ b/app/views/table_memos/show.html.haml @@ -60,32 +60,7 @@ %th Nullable = render partial: "column_memo", collection: @table_memo.column_memos.sort_by(&:display_order) -.box.data-box - .box-header - %h3.box-title Data - .box-body - - if @raw_dataset - %table.table.table-hover.table-bordered.table-striped.text-sm{ role: "grid" } - %tr - - @raw_dataset_columns.each do |column| - %th #{column.name} (#{column.sql_type}) - - if @table_memo.masked? - %tr - %td.text-center{ colspan: @raw_dataset_columns.size } - = t("masked_table") - - else - - database_name = @table_memo.database_memo.name - - table_name = @table_memo.name - - @raw_dataset_rows.each do |row| - %tr - - Array.wrap(row).each_with_index do |value, i| - - column = @raw_dataset_columns[i] - %td - - if MaskedDatum.masked_column?(database_name, table_name, column.name) - = t("masked_text") - - else - = value - .pull-right.block - #{@raw_dataset_rows.try(:size).to_i} / #{@raw_dataset.count || '?' } records - - else - = t("no_preview_dataset") +- if @view_meta_data + = render partial: "view_meta_data" +- else + = render partial: "raw_dataset" diff --git a/db/view_meta_data.schema b/db/view_meta_data.schema new file mode 100644 index 0000000..74a0817 --- /dev/null +++ b/db/view_meta_data.schema @@ -0,0 +1,6 @@ +create_table "view_meta_data", force: :cascade do |t| + t.integer "table_memo_id", null: false + t.text "query", null: false + t.text "explain", null: false + t.datetime "created_at", null: false +end From e62bf91c7f4693b638142afc3cebadafdaa57387 Mon Sep 17 00:00:00 2001 From: ragi Date: Mon, 1 Apr 2019 16:31:46 +0900 Subject: [PATCH 3/4] Fix mysql spec to add analyze table Mysql2 adapter uses information_schema.tables to count rows. The table update when analyze table. If select table rows before analyze, table_rows return 0. --- spec/models/data_source_adapters/mysql2_adapter_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/models/data_source_adapters/mysql2_adapter_spec.rb b/spec/models/data_source_adapters/mysql2_adapter_spec.rb index 6fdb975..0b9a432 100644 --- a/spec/models/data_source_adapters/mysql2_adapter_spec.rb +++ b/spec/models/data_source_adapters/mysql2_adapter_spec.rb @@ -26,6 +26,7 @@ def execute_sql(sql) CREATE TABLE IF NOT EXISTS #{table.table_name} (id INT PRIMARY KEY); TRUNCATE TABLE #{table.table_name}; INSERT INTO #{table.table_name} VALUES (1); + ANALYZE TABLE #{table.table_name}; SQL end From 43fde8d37f4434ecfa428368748e056320d35562 Mon Sep 17 00:00:00 2001 From: ragi Date: Mon, 1 Apr 2019 16:59:16 +0900 Subject: [PATCH 4/4] Add fetch_view_query & fetch_view_query_plan to superclass --- app/models/data_source_adapters/base.rb | 8 ++++++++ app/models/data_source_adapters/standard_adapter.rb | 4 ---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/app/models/data_source_adapters/base.rb b/app/models/data_source_adapters/base.rb index bfddf6b..db88750 100644 --- a/app/models/data_source_adapters/base.rb +++ b/app/models/data_source_adapters/base.rb @@ -20,6 +20,14 @@ def fetch_count(table) nil end + def fetch_view_query(view) + nil + end + + def fetch_view_query_plan(query) + raise NotImplementedError + end + def reset! raise NotImplementedError end diff --git a/app/models/data_source_adapters/standard_adapter.rb b/app/models/data_source_adapters/standard_adapter.rb index acc0e7c..1cf8499 100644 --- a/app/models/data_source_adapters/standard_adapter.rb +++ b/app/models/data_source_adapters/standard_adapter.rb @@ -33,10 +33,6 @@ def fetch_count(table) raise DataSource::ConnectionBad.new(e) end - def fetch_view_query(view) - raise NotImplementedError - end - def fetch_view_query_plan(query) adapter = connection.pool.connection connection.query("EXPLAIN #{query}", 'EXPLAIN').join("\n")