Skip to content
This repository has been archived by the owner on Apr 4, 2018. It is now read-only.

Commit

Permalink
Merge pull request #5 from alphagov/add-more-queries
Browse files Browse the repository at this point in the history
Script to show content items with missing topics
  • Loading branch information
Davidslv committed Apr 29, 2016
2 parents 55a229c + 5e96a72 commit d15a3ee
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 2 deletions.
24 changes: 24 additions & 0 deletions bin/missing_topics_and_browse
@@ -0,0 +1,24 @@
#!/usr/bin/env ruby

require 'optparse'
require './lib/database'

options = {format: :text}

parser = OptionParser.new do |opts|
opts.banner = "Usage: example.rb [options]"

opts.on("-fFORMAT", [:csv, :text], "--format=FORMAT") do |form|
options[:format] = form

end
end

begin
parser.parse!
rescue OptionParser::InvalidOption => error
die error.message, :usage => true
end

database = Database.new
database.find_missing_topics_and_browse!(output: options[:format])
51 changes: 49 additions & 2 deletions lib/database.rb
@@ -1,3 +1,4 @@
require 'csv'
require 'pg'

class Database
Expand Down Expand Up @@ -52,7 +53,8 @@ def compare!
end

# Identify rummager content that is not in the publishing api.
# This content is ignored in other queries.
# If the target of a link is not in the publishing api it gets
# ignored by the other queries.
def find_unmatched_base_paths!
query = <<-SQL
SELECT DISTINCT base_path FROM rummager
Expand Down Expand Up @@ -108,6 +110,52 @@ def find_missing_publishing_api_link_types!(publishing_app:)
puts "#{results.ntuples} missing #{publishing_app} links found"
end

# All items with missing topics or browse pages
def find_missing_topics_and_browse!(output:)
query = <<-SQL
WITH missing_links as (
SELECT
base_path, link_type
FROM rummager
WHERE link_type in ('topics', 'mainstream_browse_pages')
EXCEPT
SELECT
base_path,link_type
FROM publishing_api
WHERE link_type in ('topics', 'mainstream_browse_pages')
)
SELECT base_path, link_type, format, publishing_app
FROM
missing_links
JOIN
api_content USING(base_path)
SQL

results = @connection.exec(query)

if (output == :csv)
CSV do |csv|
csv << %w(base_path link_type format publishing_app)

results.each_row do |row|
csv << row
end
end
elsif (output == :text)
results.each_row do |row|
%w(base_path link_type format publishing_app).zip(row).each do |name, value|
puts ("%-20s " % name) + value
end
puts '------------------------------'
end

puts "#{results.ntuples} rows found"
end
end

def summarise_missing_publishing_api_link_types!
query = <<-SQL
WITH missing_links as (
Expand Down Expand Up @@ -138,5 +186,4 @@ def summarise_missing_publishing_api_link_types!
end

end

end

0 comments on commit d15a3ee

Please sign in to comment.