Skip to content

Commit

Permalink
Allow GraduationService to process CSV data
Browse files Browse the repository at this point in the history
The registrar provides data in CSV format and folks have been
converting it to JSON for processing by the application.

Rather than converting the data to JSON and then parsing it back
to a hash from JSON, this change enables the application to
process the CSV file directly from the registrar.

JSON support is still provided for backwards compatibility. This
also allows the change to be deployed to production independently of
cutting over the data format in the registrar workflow.
  • Loading branch information
mark-dce committed Jun 9, 2023
1 parent f5cbf3d commit 72820b2
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 5 deletions.
24 changes: 21 additions & 3 deletions app/services/graduation_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# 1. Checks the repository for works in the `approved` workflow state
# 2. For each of these works, query the registrar data to see if the student has graduated
# 3. If so, call GraduationJob for the given work and the graduation_date returned by registrar data
# @param [String] The path to the data JSON file. Expects location of registrar data to be set in REGISTRAR_DATA_PATH, e.g., in .env.production
# @param [RegistrarFeed] a RegistarFeed object - expected to have a valid registar graduation_record file attached
# @example How to call this service
# GraduationService.run
class GraduationService
Expand All @@ -20,7 +20,7 @@ def initialize(registrar_feed)
Rails.logger.warn "Graduation service: Running graduation service for #{registrar_feed.to_global_id}"
@registrar_feed = registrar_feed
@registrar_feed.published_etds = published_etds
@registrar_data = JSON.parse(registrar_feed.graduation_records.download)
@registrar_data = parse_registrar_file
@graduation_report = GraduationReport.new
raise FormatError unless contains_graduation_data
end
Expand All @@ -42,6 +42,24 @@ def run
update_registrar_feed(approved_etds, publishable_etds)
end

DOWNCASE_CONVERTER = ->(header) { header.downcase }

# Read registrar data from CSV or JSON source file
# responsible for massaging the input file into the same json structure
# @return [Hash] a hash of registrar keys pointing at the associated student graduation records
def parse_registrar_file
grad_records = @registrar_feed.graduation_records
case grad_records.content_type
when 'text/csv'
registrar_csv = CSV.parse(grad_records.download, headers: true, header_converters: DOWNCASE_CONVERTER)
registrar_csv.map { |row| [row['etd record key'], row.to_hash] }.to_h
when 'application/json'
JSON.parse(grad_records.download)
else
raise ArgumentError, "Unexpectected content type: #{grad_records.content_type.inspect}"
end
end

# Save process stats and the report to the registrar feed
def update_registrar_feed(approved_etds, publishable_etds)
@registrar_feed.approved_etds = approved_etds.count
Expand Down Expand Up @@ -171,7 +189,7 @@ def published_etds
# If these keys and values are not present, the file
# probably does not contain correctly formatted graduation data.
def contains_graduation_data
@registrar_data.find { |k, v| v.fetch('degree status date', '').length > 4 }.present?
@registrar_data.find { |_k, v| v.fetch('degree status date', '')&.match?(/\d{4}-\d{2}-\d{2}/) }.present?
end

# PROGRAM_MAP: Keys = Laevigata degree codes (degree_tesim); Values = corresponding Registrar academic program codes
Expand Down
11 changes: 9 additions & 2 deletions spec/factories/registrar_feeds.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
FactoryBot.define do
registrar_sample = Rack::Test::UploadedFile.new(
registrar_sample_json = Rack::Test::UploadedFile.new(
Rails.root.join('spec', 'fixtures', 'registrar_feeds', 'registrar_sample.json'), 'application/json'
)
registrar_sample_csv = Rack::Test::UploadedFile.new(
Rails.root.join('spec', 'fixtures', 'registrar_feeds', 'registrar_sample.csv'), 'text/csv'
)
sample_report = Rack::Test::UploadedFile.new(
Rails.root.join('spec', 'fixtures', 'registrar_feeds', 'graduation_report.csv'), 'text/csv'
)

factory :registrar_feed do
graduation_records { registrar_sample }
graduation_records { registrar_sample_csv }
approved_etds { 1 }
graduated_etds { 0 }
published_etds { 2 }

factory :json_registrar_feed do
graduation_records { registrar_sample_json }
end

factory :completed_registrar_feed do
report { sample_report }
status { 'completed' }
Expand Down
8 changes: 8 additions & 0 deletions spec/fixtures/registrar_feeds/registrar_sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
etd record key,public person id,directory last name,directory first name,directory middle name,preferred email address,home address 1,home address 2,home address 3,home address city,home address state,home address postal code,home address country code,home address country descr,ferpa suppression flag,acad career code,acad career descr,acad program code,acad program descr,primary acad plan code,primary acad plan descr,secondary acad plan code,secondary acad plan descr,program status descr,degree code,degree status descr,degree status date
P0000001-GSAS-PHD,P0000001,Doe,John, ,jdoe@example.com,123 Fake St, , ,Atlanta,GA,30301,USA,United States,N,GSAS,School of Graduate Studies,PHD,Doctor of Philosophy,BBSPHD,Biological and Biomedical Sci., , ,AC,PHD, ,
P0000002-UCOL-LIBAS,P0000002,Smith,Jane,Cinderlla,jsmith@example.com,321 Ash Way, , ,Atlanta,GA,30301,USA,United States,N,UCOL,Undergraduate Emory College,LIBAS,Liberal Arts & Sciences,POLISCIBA,Political Science,LACSND,Latin Amer. & Caribbean Stu.,AC,BS,Awarded,2017-05-18
P0000003-UCOL-LIBAS,P0000003,Hood,Riding,Red,rhood@example.com,12 Nana Ct,"","",Atlanta,GA,30301,USA,United States,N,UCOL,Undergraduate Emory College,LIBAS,Liberal Arts & Sciences,MATHCSBS,Mathematics & Computer Science, , ,CM,BS,Awarded,2017-03-16
P0000004-THEO-MDV,P0000004,Smith,Jim,James,jim.s@example.com,123 Fake Dr,"","",Atlanta,GA,30301,USA,United States,N,THEO,Theology,MDV,Master of Divinity,MDVDIVIN,Divinity, , ,CM,MDV,Awarded,2018-01-12
P0000004-THEO-THD,P0000004,Smith,Jim,James,jim.s@example.com,123 Fake Dr,"","",Atlanta,GA,30301,USA,United States,N,THEO,Theology,THD,Doctor of Theology,THDCOUNSEL,Pastoral Counseling, , ,AC,THD,Awarded,2020-05-23
P0000005-GSAS-PHD,P0000005,Anderson,John, ,janders@example.com,123 Fake Dr,"","",Atlanta,GA,30301,USA,United States,N,GSAS,School of Graduate Studies,PHD,Doctor of Philosophy,CHEMPHD,Chemistry, , ,CM,PHD,Awarded,2020-05-25
P0000006-UBUS-BBA,P0000006,Dieu-le-Veut,Anne,"",adv@example.com,50 Rue de Mer,"","",Atlanta,GA,30301,USA,United States,N,UBUS,Undergraduate Business,BBA,Bachelor of Business Admin,BUSBBA,Business Administration,BUSMUSIC,Music,CM,BBA,Awarded,2022-05-25
31 changes: 31 additions & 0 deletions spec/services/graduation_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,37 @@
end
end

describe "#parse_registrar_file" do
context 'with JSON data' do
let(:feed) { FactoryBot.create(:json_registrar_feed) }
it "exrtracts records successfully" do
parsed_data = grad_service.parse_registrar_file
expect(parsed_data)
.to include('P0000006-UBUS-BBA' =>
hash_including('public person id' => 'P0000006',
'directory last name' => 'Dieu-le-Veut',
'degree status date' => '2022-05-25'))
end
end

context 'with CSV data' do
let(:feed) { FactoryBot.create(:registrar_feed) }
it 'exrtracts records successfully' do
parsed_data = grad_service.parse_registrar_file
expect(parsed_data)
.to include('P0000006-UBUS-BBA' =>
hash_including('public person id' => 'P0000006',
'directory last name' => 'Dieu-le-Veut',
'degree status date' => '2022-05-25'))
end
end

it 'raises an error on unexpected content types' do
allow(feed.graduation_records).to receive(:content_type).and_return('text/html')
expect { grad_service.parse_registrar_file }.to raise_exception(ArgumentError, /html/)
end
end

describe "#find_registrar_match" do
describe "for exact matches" do
let(:etd_solr_doc) { { 'id' => 'MatchingETD', 'depositor_ssim' => ['P0000003'], 'school_tesim' => ['Emory College'], 'degree_tesim' => ['B.S.'] } }
Expand Down

0 comments on commit 72820b2

Please sign in to comment.