Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/mwcrawler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
require 'mwcrawler/classes'
require 'mwcrawler/courses'
require 'mwcrawler/departments'
require 'mwcrawler/subjects'
require 'mwcrawler/helpers'
require 'mwcrawler/crawler'

Expand Down
5 changes: 5 additions & 0 deletions lib/mwcrawler/crawler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ class Crawler
end
end

def subjects(department, options = { log: false })
Options.init(options)
Subjects.scrap department, options
end

def semester
page = Helpers.set_crawler(nil, 'graduacao/default.aspx', exact: true)
page.css("a[title='Período Atual'] span").first.text
Expand Down
49 changes: 49 additions & 0 deletions lib/mwcrawler/subjects.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# frozen_string_literal: true

module Mwcrawler
# Scraps Subjects by department
module Subjects
def self.scrap(department_or_id, options)
if options[:by_id]
subject_by_id(department_or_id)
elsif options[:by_department]
subject_by_department(department_or_id)
else
raise ArgumentError, 'second argument not specified. You can find a subject by department code or id'
end
end

private_class_method def self.subject_by_department(department)
page = Helpers.set_crawler(department, 'graduacao/oferta_dis.aspx?cod=', exact: true)
scrap_row(department, page)
end

private_class_method def self.subject_by_id(id)
page = Helpers.set_crawler(id, 'graduacao/oferta_dados.aspx?cod=', exact: true)
row_init_by_id(page)
end

private_class_method def self.row_init_by_id(page)
{ code: page.css('#datatable')[0].css('tr:nth-child(2) td').text.to_i,
name: page.css('#datatable')[0].css('tr:nth-child(3) td').text,
department: page.css('#datatable tr:first-child a').first['href'].scan(/\d+/)[0].to_i,
level: 'graduação' }
end

private_class_method def self.scrap_row(dep_code, page)
subjects = []
length = page.css('#datatable tr td:nth-child(1)').count
length.times do |i|
subjects << row_init_by_department(page, dep_code, i)
end
subjects
end

private_class_method def self.row_init_by_department(page, dep_code, index)
{ code: page.css('#datatable tr td:nth-child(1)').map(&:text)[index].to_i,
name: page.css('#datatable tr td:nth-child(2)').map(&:text)[index],
department: dep_code.to_i,
level: 'graduação' }
end
end
end
44 changes: 44 additions & 0 deletions spec/fixtures/subjects/subject_116441.yml

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions spec/fixtures/subjects/subjects_cic.yml

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions spec/subjects_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# frozen_string_literal: true

RSpec.describe Mwcrawler::Subjects do
context 'Subjects by department: CIC' do
before :all do
VCR.use_cassette('subjects/subjects_cic') do
@subjects = Mwcrawler::Crawler.new.subjects('116', by_department: true)
end
end

it { expect(@subjects).to be_a_kind_of Array }

it { expect(@subjects).not_to be nil }

it { expect(@subjects.first.keys).to include(:code, :department, :name, :level) }

it { expect(@subjects.first).to include(code: be_integer) }

it { expect(@subjects.first).to include(code: be_nonzero) }

it { expect(@subjects.first).to include(department: be_integer) }

it { expect(@subjects.first).to include(department: be_nonzero) }
end

context 'crawls subject by id' do
before :all do
VCR.use_cassette('subjects/subject_116441') do
@subject = Mwcrawler::Crawler.new.subjects('116441', by_id: true)
end
end

it { expect(@subject).to be_a_kind_of Hash }

it { expect(@subject.keys).to include(:code, :department, :name, :level) }

it { expect(@subject).to include(code: be_integer) }

it { expect(@subject).to include(code: be_nonzero) }

it { expect(@subject).to include(department: be_integer) }

it { expect(@subject).to include(department: be_nonzero) }
end
end