Skip to content
This repository has been archived by the owner on Apr 4, 2018. It is now read-only.

Script to show content items with missing topics #5

Merged
merged 2 commits into from Apr 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions bin/missing_topics_and_browse
@@ -0,0 +1,24 @@
#!/usr/bin/env ruby

require 'optparse'
require './lib/database'

options = {format: :text}

parser = OptionParser.new do |opts|
opts.banner = "Usage: example.rb [options]"

opts.on("-fFORMAT", [:csv, :text], "--format=FORMAT") do |form|
options[:format] = form

end
end

begin
parser.parse!
rescue OptionParser::InvalidOption => error
die error.message, :usage => true
end

database = Database.new
database.find_missing_topics_and_browse!(output: options[:format])
51 changes: 49 additions & 2 deletions lib/database.rb
@@ -1,3 +1,4 @@
require 'csv'
require 'pg'

class Database
Expand Down Expand Up @@ -52,7 +53,8 @@ def compare!
end

# Identify rummager content that is not in the publishing api.
# This content is ignored in other queries.
# If the target of a link is not in the publishing api it gets
# ignored by the other queries.
def find_unmatched_base_paths!
query = <<-SQL
SELECT DISTINCT base_path FROM rummager
Expand Down Expand Up @@ -108,6 +110,52 @@ def find_missing_publishing_api_link_types!(publishing_app:)
puts "#{results.ntuples} missing #{publishing_app} links found"
end

# All items with missing topics or browse pages
def find_missing_topics_and_browse!(output:)
query = <<-SQL
WITH missing_links as (
SELECT
base_path, link_type
FROM rummager
WHERE link_type in ('topics', 'mainstream_browse_pages')

EXCEPT

SELECT
base_path,link_type
FROM publishing_api
WHERE link_type in ('topics', 'mainstream_browse_pages')
)

SELECT base_path, link_type, format, publishing_app
FROM
missing_links
JOIN
api_content USING(base_path)
SQL

results = @connection.exec(query)

if (output == :csv)
CSV do |csv|
csv << %w(base_path link_type format publishing_app)

results.each_row do |row|
csv << row
end
end
elsif (output == :text)
results.each_row do |row|
%w(base_path link_type format publishing_app).zip(row).each do |name, value|
puts ("%-20s " % name) + value
end
puts '------------------------------'
end

puts "#{results.ntuples} rows found"
end
end

def summarise_missing_publishing_api_link_types!
query = <<-SQL
WITH missing_links as (
Expand Down Expand Up @@ -138,5 +186,4 @@ def summarise_missing_publishing_api_link_types!
end

end

end