-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add script for PDF attachment report
This script produces a CSV of counts of published PDF attachemnts per organisation owning the associated manual. This was produced for the Content Operating Model team.
- Loading branch information
1 parent
1adcd46
commit 95f3264
Showing
1 changed file
with
84 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require File.expand_path("../../config/environment", __FILE__) | ||
require 'csv' | ||
|
||
POST_PUBLICATION_STATES = %w(published archived).freeze | ||
|
||
first_period_start_date = ENV.fetch('FIRST_PERIOD_START_DATE', Date.parse('2016-01-01')) | ||
last_time_period_days = ENV.fetch('LAST_TIME_PERIOD_DAYS', 30) | ||
last_time_period_start_date = last_time_period_days.days.ago | ||
|
||
manual_records = ManualRecord.all | ||
unique_owning_organisation_slugs = manual_records.map(&:organisation_slug).uniq | ||
|
||
organisation_published_pdfs_total_counts_hash = Hash[unique_owning_organisation_slugs.map { |o| [o, 0] }] | ||
organisation_published_pdfs_since_first_period_counts_hash = Hash[unique_owning_organisation_slugs.map { |o| [o, 0] }] | ||
organisation_published_pdfs_since_second_period_counts_hash = Hash[unique_owning_organisation_slugs.map { |o| [o, 0] }] | ||
|
||
def document_published_after_date?(document_edition, date) | ||
(document_edition.exported_at || document_edition.updated_at) >= date | ||
end | ||
|
||
def document_edition_never_published?(document_edition) | ||
!POST_PUBLICATION_STATES.include?(document_edition.state) | ||
end | ||
|
||
def all_unique_document_ids_for_manual(manual) | ||
manual.editions.map(&:document_ids).flatten.uniq | ||
end | ||
|
||
manual_records.to_a.each do |manual| | ||
next unless manual.has_ever_been_published? | ||
|
||
unique_pdf_attachment_file_ids_for_manual = Set.new | ||
|
||
# Rather than examine each manual edition and its set of document editions and attachments in turn, | ||
# we instead get all unique document ids associated with this manual, then walk through | ||
# the editions of these documents in version order to find unique PDF attachments and their | ||
# publication times. | ||
all_unique_document_ids_for_manual(manual).each do |document_id| | ||
document_editions = SpecialistDocumentEdition.where(document_id: document_id).order(:version_number) | ||
|
||
document_editions.each do |document_edition| | ||
next if document_edition_never_published?(document_edition) | ||
|
||
document_edition.attachments.each do |attachment| | ||
next if unique_pdf_attachment_file_ids_for_manual.include? attachment.file_id | ||
next unless /.*\.pdf$/ =~ attachment.filename | ||
|
||
organisation_published_pdfs_total_counts_hash[manual.organisation_slug] += 1 | ||
|
||
if document_published_after_date?(document_edition, first_period_start_date) | ||
organisation_published_pdfs_since_first_period_counts_hash[manual.organisation_slug] += 1 | ||
end | ||
|
||
if document_published_after_date?(document_edition, last_time_period_start_date) | ||
organisation_published_pdfs_since_second_period_counts_hash[manual.organisation_slug] += 1 | ||
end | ||
|
||
unique_pdf_attachment_file_ids_for_manual << attachment.file_id | ||
end | ||
end | ||
end | ||
end | ||
|
||
document_report_filename = Rails.root.join("content-operating-report-for-pdf-documents-#{Time.zone.today.strftime('%Y-%m-%d')}.csv") | ||
|
||
CSV.open(document_report_filename, 'w') do |document_csv| | ||
document_csv << [ | ||
"Organisation", | ||
"Total published PDF attachments", | ||
"#{first_period_start_date} - present PDF attachments", | ||
"Last #{last_time_period_days} days PDF attachments" | ||
] | ||
|
||
unique_owning_organisation_slugs.each do |organisation_slug| | ||
document_csv << [ | ||
organisation_slug.titleize, | ||
organisation_published_pdfs_total_counts_hash[organisation_slug], | ||
organisation_published_pdfs_since_first_period_counts_hash[organisation_slug], | ||
organisation_published_pdfs_since_second_period_counts_hash[organisation_slug] | ||
] | ||
end | ||
end |