This repository has been archived by the owner on Mar 9, 2024. It is now read-only.
/
minibrowser.rb
147 lines (133 loc) · 4.68 KB
/
minibrowser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
=begin
This file is part of Ruby-MediaWiki.
Ruby-MediaWiki is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Ruby-MediaWiki is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with Ruby-MediaWiki. If not, see
<http://www.gnu.org/licenses/>.
=end
require 'net/http'
require 'net/https'
require 'cgi'
module MediaWiki
##
# The MiniBrowser is used to perform GET and POST requests
# over HTTP and HTTPS, supporting:
# * HTTP-Auth encoding in URLs (proto://user:password@host/...)
# * Cookie support
# * HTTP Redirection (max. 10 in a row)
#
# All interaction with MiniBrowser is normally done by
# MediaWiki::Wiki.
class MiniBrowser
##
# Initialize a MiniBrowser instance
# url:: [URI::HTTP] or [URI::HTTPS]
# ca_file:: [String] Path to a CA certificate file
def initialize(url, ca_file = "/etc/ssl/certs/ca-certificates.crt")
@url = url
@http = Net::HTTP.new( @url.host, @url.port )
if @url.class == URI::HTTPS
@http.use_ssl = true
if File.exist?(ca_file)
@http.verify_mode = OpenSSL::SSL::VERIFY_PEER
@http.ca_file = ca_file
end
end
@user_agent = 'WikiBot'
@cookies = {}
end
##
# Add cookies to the volatile cookie cache
# cookies:: [Array]
def add_cookie(cookies)
cookies.each do | c |
c.gsub!(/;.*$/, '')
if match = c.match(/([^=]+)=(.*)/)
@cookies[match[1]] = match[2]
end
end
end
##
# Get the cookie cache in a serialized form ready for HTTP.
# result:: [String]
def cookies
c = @cookies.collect do | key, value | "#{key}=#{value}" end
c.join(";")
end
##
# Perform a GET request
#
# This method accepts 10 HTTP redirects at max.
# url:: [String]
# result:: [String] Document
def get_content(url)
retries = 10
@http.start { |http|
loop {
raise "too many redirects" if retries < 1
request = Net::HTTP::Get.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
'User-Agent' => @user_agent,
'Cookie' => cookies})
request.basic_auth(@url.user, @url.password) if @url.user
response = http.request(request)
case response
when Net::HTTPSuccess, Net::HTTPNotFound then
return response.body
when Net::HTTPRedirection then
MediaWiki::logger.debug("Redirecting to #{response['Location']}")
retries -= 1
url = response['Location']
else
raise "Unknown Response: #{response.inspect}"
end
}
}
end
##
# Perform a POST request
#
# Will switch to MiniBrowser#get_content upon HTTP redirect.
# url:: [String]
# data:: [Hash] POST data
# result:: [String] Document
def post_content(url, data)
post_data = data.collect { | key, value | "#{CGI::escape(key.to_s)}=#{CGI::escape(value.to_s)}" }.join('&')
response = nil
@http.start { |http|
request = Net::HTTP::Post.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
'User-Agent' => @user_agent,
'Cookie' => cookies})
request.basic_auth(@url.user, @url.password) if @url.user
response = http.request(request, post_data)
}
case response
when Net::HTTPSuccess
then
begin
add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
rescue NoMethodError
add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
end
return response.body
when Net::HTTPRedirection
then
MediaWiki::logger.debug("Redirecting to #{response['Location']}")
begin
add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
rescue NoMethodError
add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
end
return get_content(response['Location'])
else
raise "Unknown Response on #{url}: #{response.inspect}"
end
end
end
end