-
Notifications
You must be signed in to change notification settings - Fork 0
/
lacaixa.rb
166 lines (145 loc) · 4.52 KB
/
lacaixa.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# encoding: utf-8
require 'capybara'
require 'capybara/dsl'
require 'show_me_the_cookies'
require 'fileutils'
require 'pry'
Capybara.register_driver :selenium do |app|
Capybara::Selenium::Driver.new(app, :browser => :firefox)
end
Capybara.default_wait_time = 20
Capybara.ignore_hidden_elements = false
module Autility
# Public: A scraper for all the utility invoices in LaCaixa.
#
# Examples
#
# # Download the invoice from September this year and store it in /tmp.
# LaCaixa.scrape("user", "password", 9, "/tmp") # Download all invoices from September this year
#
class LaCaixa
include Capybara::DSL
include ShowMeTheCookies
# Public: Instantiates a new scraper and fires it to download the utility
# invoices from LaCaixa.
#
# Returns nothing.
def self.scrape(*args)
new(*args).scrape
end
def initialize(user, password, month, folder)
@user = user
@password = password
@month = month
@folder = folder
@year = Time.now.year
end
# Public: Scrapes the lacaixa website and gets the invoice for the current
# month, saving it to @folder.
#
# Returns the String path of the saved document.
def scrape(index=nil)
setup_capybara
log_in
FileUtils.mkdir_p(@folder)
if index
filename = "#{@folder}/lacaixa_#{month}_#{@year}__#{index}.pdf"
document(index).save(filename)
else
return document
end
filename
end
private
# Internal: Logs in to the LaCaixa website.
#
# Returns nothing.
def log_in
visit "/home/empreses_ca.html"
fill_in "u", :with => @user
fill_in "p", :with => @password
first(".loginbut").click
end
# Internal: Gets the latest invoice and returns it as a Document (not
# fetched yet).
#
# Returns the Document to be fetched.
def document(index=nil)
@document ||= begin
params = {}
url = "https://loc6.lacaixa.es"
wait_until { find('frame') }
within_frame(all('frame')[1][:name]) do
within_frame(find('frame')[:name]) do
find('#buzon1.msn a').click
end
end
wait_until { find('frame') }
within_frame(all('frame')[1][:name]) do
within_frame('Cos') do
sleep 3
find("#lbl_Varios a").click
sleep 3
wait_until { find('#enlaceDescr') }
rows = all('.table_generica tr').select do |tr|
tr.find("td").text =~ /#{month}\/#{@year}/
end
if index
rows[index].find("a").click
else
docs = []
rows.each_with_index do |row, idx|
scraper = LaCaixa.new(@user, @password, @month, @folder)
docs << scraper.scrape(idx)
end
return docs
end
end
end
wait_until { find('frame') }
within_frame(all('frame')[1][:name]) do
within_frame('Cos') do
within_frame("Fr1") do
url = find("form[name=\"datos\"]")[:action]
within("form[name=\"datos\"]") do
guardar = {
"PN" => "COM",
"PE" => "39",
"RESOLUCION" => "300",
"CANAL_MOVIMIENTO" => "INT",
"target" => "Fr1",
"PAGINA_SOLICITADA" => "00001",
"FLAG_PDF_INICIAL" => "S",
"FLUJO" => "COM,10,:COM,51:SCP,23:GFI,7,''",
"CLICK_ORIG" => "FLX_COM_4",
"OPCION" => ""
}
params = all('input').reduce({}) do |h, i|
h.update({ i[:name] => i[:value] })
end.update(guardar)
end
end
end
end
Capybara.app_host = "https://loc6.lacaixa.es"
cookie = Cookie.new("JSESSIONID_CTX", get_me_the_cookie("JSESSIONID_CTX")[:value])
Document.new(url, :post, cookie, params)
end
end
# Internal: Returns the String current month padded with zeros to the left.
def month
@month.to_s.rjust(2, '0')
end
# Internal: Sets the configuration for capybara to work with the LaCaixa
# website.
#
# Returns nothing.
def setup_capybara
Capybara.run_server = false
Capybara.current_driver = :selenium
Capybara.app_host = 'http://empresa.lacaixa.es'
Capybara.default_wait_time = 20
Capybara.ignore_hidden_elements = false
end
end
end