Skip to content
This repository has been archived by the owner on Aug 11, 2023. It is now read-only.

Refactor data importer #26

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 25 additions & 4 deletions README.md
@@ -1,15 +1,36 @@
# licence-finder

To get set up with Licence Finder, first make sure you're up to date with puppet.
To get set up with Licence Finder, first make sure you're up to date
with puppet.

## Loading Records

Before starting you'll need to drop the mongo database

bundle exec rake db:mongoid:drop

Load the data into MongoDB

bundle exec rake data_import:all
bundle exec rake import:all

Load the data into Elasticsearch

bundle exec rake search:index

Migrating your Licence data to use gds_id instead of correlation_id (This happens automatically on importing licences)
## Creating Records

Each model has a corresponding rake task to easily create records.
Running `rake -T` will give a complete description of how to use each
task with the parameters required. Here's an example for creating licences:

bundle exec rake create:licence[Licences to play music in an odd shaped building,Copyright,9000]

## Exporting Records

You can export all the data stored in your local database

bundle exec rake export:all

Or specific models, depending on your needs

bundle exec rake licence_migrate
bundle exec rake export:sector
4 changes: 2 additions & 2 deletions app/models/licence.rb
Expand Up @@ -14,11 +14,11 @@ class Licence

validates :name, :presence => true
validates :regulation_area, :presence => true

def self.find_by_correlation_id(correlation_id)
where(correlation_id: correlation_id).first
end

def self.find_by_gds_id(gds_id)
where(gds_id: gds_id).first
end
Expand Down
1 change: 1 addition & 0 deletions data/json/activity.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/json/licence.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/json/licencelink.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/json/sector.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions lib/data_exporter.rb
@@ -0,0 +1,15 @@
class DataExporter
def self.export_for(klass)
records = []
klass.all.each do |record|
records << record
end
save_to_file(records.to_json, klass.to_s.downcase)
end

def self.save_to_file(data, filename)
File.open("data/json/#{filename}.json", 'w') do |file|
file.write(data)
end
end
end
47 changes: 15 additions & 32 deletions lib/data_importer.rb
@@ -1,38 +1,21 @@
require 'csv'
require 'json'

class DataImporter

def self.update
fh = open_data_file
begin
new(fh).run
ensure
fh.close
end
end

def self.data_file_path(filename)
Rails.root.join('data', filename)
end

def initialize(fh)
@filehandle = fh
end

def run
counter = 0
CSV.new(@filehandle, headers: true).each do |row|
counter += process_row(row)
done(counter, "\r")
def self.import_for(klass)
file = File.open("data/json/#{klass.to_s.downcase}.json", "rb")
records = JSON.parse(file.read)
file.close

unless records.nil?
records.each do |record|
# symbolise keys.
record.keys.each do |key|
record[(key.to_sym rescue key) || key] = record.delete(key)
end

klass.create!(record)
end
end
done(counter, "\n")
end

private

def done(counter, nl)
print "Imported #{counter} #{self.class.name.split('::').last}.#{nl}"
end
end

Dir[File.join(File.dirname(__FILE__), "data_importer/**/*.rb")].each {|f| require f}
33 changes: 0 additions & 33 deletions lib/data_importer/activities.rb

This file was deleted.

63 changes: 0 additions & 63 deletions lib/data_importer/licences.rb

This file was deleted.

56 changes: 0 additions & 56 deletions lib/data_importer/sectors.rb

This file was deleted.

26 changes: 26 additions & 0 deletions lib/tasks/create.rake
@@ -0,0 +1,26 @@
namespace :create do
desc "Create a new Activity entry and save it to the database"
task :activity, [:name, :correlation_id] => :environment do |t, args|
activity = Activity.create!(name: args[:name], correlation_id: args[:correlation_id])
puts "Created activity: #{activity.inspect}" unless activity.nil?
end

desc "Create a new Licence entry and save it to the database"
task :licence, [:name, :regulation_area, :gds_id] => :environment do |t, args|
licence = Licence.create!(name: args[:name], regulation_area: args[:regulation_area], gds_id: args[:gds_id])
puts "Created licence: #{licence.inspect}" unless licence.nil?
end

desc "Create a relationship between an Activity, Licence, and Sector and save it to the database"
task :licence_link, [:activity_id, :licence_id, :sector_id] => :environment do |t, args|
licence_link = LicenceLink.create!(
activity_id: args[:activity_id], licence_id: args[:licence_id], sector_id: args[:sector_id])
puts "Created licence link: #{licence_link.inspect}" unless licence_link.nil?
end

desc "Create a new Sector entry and save it to the database"
task :sector, [:name, :layer, :correlation_id] => :environment do |t, args|
sector = Sector.create!(name: args[:name], layer: args[:layer], correlation_id: args[:correlation_id])
puts "Created sector: #{sector.inspect}" unless sector.nil?
end
end
31 changes: 0 additions & 31 deletions lib/tasks/data_import.rake

This file was deleted.

26 changes: 26 additions & 0 deletions lib/tasks/export.rake
@@ -0,0 +1,26 @@
require 'data_exporter'

namespace :export do
desc "Export all records for all models"
task :all => [:activity, :licence, :licence_link, :sector]

desc "Export all Activity records to a local file in the /data/json folder"
task :activity => :environment do
DataExporter.export_for(Activity)
end

desc "Export all Licence records to a local file in the /data/json folder"
task :licence => :environment do
DataExporter.export_for(Licence)
end

desc "Export all LicenceLink records to a local file in the /data/json folder"
task :licence_link => :environment do
DataExporter.export_for(LicenceLink)
end

desc "Export all Sector records to a local file in the /data/json folder"
task :sector => :environment do
DataExporter.export_for(Sector)
end
end
26 changes: 26 additions & 0 deletions lib/tasks/import.rake
@@ -0,0 +1,26 @@
require 'data_importer'

namespace :import do
desc "Import all records from the JSON files"
task :all => ["import:sector", "import:activity", "import:licence", "import:licence_link"]

desc "Import all activity data"
task :activity => :environment do
DataImporter.import_for(Activity)
end

desc "Import all licence data"
task :licence => :environment do
DataImporter.import_for(Licence)
end

desc "Import all licence data"
task :licence_link => :environment do
DataImporter.import_for(LicenceLink)
end

desc "Import all sector data"
task :sector => :environment do
DataImporter.import_for(Sector)
end
end