|
| 1 | +require 'mail' |
| 2 | +require 'yaml' |
| 3 | +require 'logger' |
| 4 | +require 'haml' |
| 5 | +require 'filemagic/ext' |
| 6 | +require_relative 'config' |
| 7 | +require 'heathen' |
| 8 | + |
| 9 | +module AutoHeathen |
| 10 | + class EmailProcessor |
| 11 | + include AutoHeathen::Config |
| 12 | + |
| 13 | + # The only valid email headers we will allow forward to LEG_wikilex |
| 14 | + ONWARD_HEADERS = ['Date','From','To','Subject','Content-Type','Content-Transfer-Encoding','Mime-Version'] |
| 15 | + |
| 16 | + attr_reader :cfg, :logger |
| 17 | + |
| 18 | + # Constructs the processor |
| 19 | + # @param cfg a hash of configuration settings: |
| 20 | + # deliver: true If false, email will not be actually sent (useful for testing) |
| 21 | + # email: nil Email to send response to (if mode == :email) |
| 22 | + # from: 'autoheathen' Who to say the email is from |
| 23 | + # cc_blacklist: nil Array of email addresses to excise from CC list of any mails |
| 24 | + # - used to avoid infinite loops in autoheathen |
| 25 | + # mail_host: 'localhost' Mail relay host for responses (mode in [:return_to_sender,:email] |
| 26 | + # mail_port: 25 Mail relay port (ditto) |
| 27 | + # text_template: 'config/response.text.haml' Template for text part of response email (mode in [:return_to_sender,:email]) |
| 28 | + # html_template: 'config/response.html.haml' Template for HTML part of response email (ditto) |
| 29 | + # logger: nil Optional logger object |
| 30 | + def initialize cfg={}, config_file=nil |
| 31 | + @cfg = load_config( { # defaults |
| 32 | + deliver: true, |
| 33 | + language: 'en', |
| 34 | + from: 'autoheathen', |
| 35 | + cc_blacklist: nil, |
| 36 | + email: nil, |
| 37 | + verbose: false, |
| 38 | + mail_host: 'localhost', |
| 39 | + mail_port: 25, |
| 40 | + logger: nil, |
| 41 | + text_template: 'config/autoheathen.text.haml', |
| 42 | + html_template: 'config/autoheathen.html.haml', |
| 43 | + }, config_file, cfg ) |
| 44 | + @logger = @cfg[:logger] || Logger.new(nil) |
| 45 | + @logger.level = @cfg[:verbose] ? Logger::DEBUG : Logger::INFO |
| 46 | + end |
| 47 | + |
| 48 | + def process_rts email |
| 49 | + process email, email.from, true |
| 50 | + end |
| 51 | + |
| 52 | + # Processes the given email, submits attachments to the Heathen server, delivers responses as configured |
| 53 | + # @param input A string containing the encoded email (suitable to be decoded using Mail.read(input) |
| 54 | + # @return a hash of the decoded attachments (or the reason why they could not be decoded) |
| 55 | + def process email, mail_to, is_rts=false |
| 56 | + documents = [] |
| 57 | + |
| 58 | + unless email.has_attachments? |
| 59 | + logger.info "From: #{email.from} Subject: (#{email.subject}) Files: no attachments" |
| 60 | + return |
| 61 | + end |
| 62 | + |
| 63 | + logger.info "From: #{email.from} Subject: (#{email.subject}) Files: #{email.attachments.map(&:filename).join(',')}" |
| 64 | + |
| 65 | + # |
| 66 | + # Convert the attachments |
| 67 | + # |
| 68 | + email.attachments.each do |attachment| |
| 69 | + begin |
| 70 | + converter = Heathen::Converter.new( logger: logger ) |
| 71 | + language = @cfg[:language] |
| 72 | + input_source = attachment.body.decoded |
| 73 | + action = get_action input_source.content_type |
| 74 | + logger.info " convert #{attachment.filename} using action: #{action}" |
| 75 | + data = converter.convert action, input_source, language |
| 76 | + converted_filename = Heathen::Filename.suggest attachment.filename, data.mime_type |
| 77 | + documents << { orig_filename: attachment.filename, orig_content: input_source, filename: converted_filename, content: data, error: false } |
| 78 | + rescue StandardError => e |
| 79 | + documents << { orig_filename: attachment.filename, orig_content: input_source, filename: nil, content: nil, error: e.message } |
| 80 | + end |
| 81 | + end |
| 82 | + |
| 83 | + # |
| 84 | + # deliver the results |
| 85 | + # |
| 86 | + if is_rts |
| 87 | + deliver_rts email, documents, mail_to |
| 88 | + else |
| 89 | + deliver_onward email, documents, mail_to |
| 90 | + end |
| 91 | + |
| 92 | + # |
| 93 | + # Summarise the processing |
| 94 | + # |
| 95 | + logger.info "Results of conversion" |
| 96 | + documents.each do |doc| |
| 97 | + if doc[:content].nil? |
| 98 | + logger.info " #{doc[:orig_filename]} was not converted (#{doc[:error]}) " |
| 99 | + else |
| 100 | + logger.info " #{doc[:orig_filename]} was converted successfully" |
| 101 | + end |
| 102 | + end |
| 103 | + |
| 104 | + documents |
| 105 | + end |
| 106 | + |
| 107 | + # Forward the email to sender, with decoded documents replacing the originals |
| 108 | + def deliver_onward email, documents, mail_to |
| 109 | + logger.info "Sending response mail to #{mail_to}" |
| 110 | + email.cc [] # No CCing, just send to the recipient |
| 111 | + email.to mail_to |
| 112 | + email.subject "#{'Fwd: ' unless email.subject.start_with? 'Fwd:'}#{email.subject}" |
| 113 | + email.return_path email.from unless email.return_path |
| 114 | + # something weird goes on with Sharepoint, where the doc is dropped on the floor |
| 115 | + # so, remove any offending headers |
| 116 | + email.message_id = nil # make sure of message_id too |
| 117 | + good_headers = ONWARD_HEADERS.map{ |h| h.downcase } |
| 118 | + inspect_headers = email.header.map(&:name) |
| 119 | + inspect_headers .each do |name| |
| 120 | + unless good_headers.include? name.downcase |
| 121 | + email.header[name] = nil |
| 122 | + end |
| 123 | + end |
| 124 | + email.received = nil # make sure of received |
| 125 | + # replace attachments with converted files |
| 126 | + email.parts.delete_if { |p| p.attachment? } |
| 127 | + documents.each do |doc| |
| 128 | + if doc[:content] |
| 129 | + email.add_file filename: doc[:filename], content: doc[:content] |
| 130 | + else # preserve non-converted attachments when forwarding |
| 131 | + email.add_file filename: doc[:orig_filename], content: doc[:orig_content] |
| 132 | + end |
| 133 | + end |
| 134 | + email.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port] |
| 135 | + deliver email |
| 136 | + end |
| 137 | + |
| 138 | + # Send decoded documents back to sender |
| 139 | + def deliver_rts email, documents, mail_to |
| 140 | + logger.info "Sending response mail to #{mail_to}" |
| 141 | + mail = Mail.new |
| 142 | + mail.from @cfg[:from] |
| 143 | + mail.to mail_to |
| 144 | + # CCs to the original email will get a copy of the converted files as well |
| 145 | + mail.cc (email.cc - email.to - (@cfg[:cc_blacklist]||[]) ) if email.cc # Prevent autoheathen infinite loop! |
| 146 | + # Don't prepend yet another Re: |
| 147 | + mail.subject "#{'Re: ' unless email.subject.start_with? 'Re:'}#{email.subject}" |
| 148 | + # Construct received path |
| 149 | + # TODO: is this in the right order? |
| 150 | + #rcv = "by localhost(autoheathen); #{Time.now.strftime '%a, %d %b %Y %T %z'}" |
| 151 | + #[email.received,rcv].flatten.each { |rec| mail.received rec.to_s } |
| 152 | + mail.return_path email.return_path if email.return_path |
| 153 | + mail.header['X-Received'] = email.header['X-Received'] if email.header['X-Received'] |
| 154 | + documents.each do |doc| |
| 155 | + next if doc[:content].nil? |
| 156 | + mail.add_file filename: doc[:filename], content: doc[:content] |
| 157 | + end |
| 158 | + cfg = @cfg # stoopid Mail scoping |
| 159 | + me = self # stoopid Mail scoping |
| 160 | + mail.text_part do |
| 161 | + s = Haml::Engine.new( me.read_file cfg[:text_template] ).render(Object.new, to: mail_to, documents: documents, cfg: cfg) |
| 162 | + body s |
| 163 | + end |
| 164 | + mail.html_part do |
| 165 | + content_type 'text/html; charset=UTF-8' |
| 166 | + s = Haml::Engine.new( me.read_file cfg[:html_template] ).render(Object.new, to: mail_to, documents: documents, cfg: cfg) |
| 167 | + body s |
| 168 | + end |
| 169 | + mail.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port] |
| 170 | + deliver mail |
| 171 | + end |
| 172 | + |
| 173 | + # Convenience method allowing us to stub out actual mail delivery in RSpec |
| 174 | + def deliver mail |
| 175 | + if @cfg[:deliver] |
| 176 | + mail.deliver! |
| 177 | + logger.debug "Files were emailed to #{mail.to}" |
| 178 | + else |
| 179 | + logger.debug "Files would have been emailed to #{mail.to}, but #{self.class.name} is configured not to" |
| 180 | + end |
| 181 | + end |
| 182 | + |
| 183 | + # Opens and reads a file, first given the filename, then tries from the project base directory |
| 184 | + def read_file filename |
| 185 | + f = filename |
| 186 | + unless File.exist? f |
| 187 | + f = Pathname.new(__FILE__).realpath.parent.parent.parent + f |
| 188 | + end |
| 189 | + File.read f |
| 190 | + end |
| 191 | + |
| 192 | + # Returns the correct conversion action based on the content type |
| 193 | + # @raise RuntimeError if there is no conversion action for the content type |
| 194 | + def get_action content_type |
| 195 | + ct = content_type.gsub(/;.*/, '') |
| 196 | + op = { |
| 197 | + 'application/pdf' => 'ocr', |
| 198 | + 'text/html' => 'pdf', |
| 199 | + 'application/zip' => 'pdf', |
| 200 | + 'application/msword' => 'pdf', |
| 201 | + 'application/vnd.oasis.opendocument.text' => 'pdf', |
| 202 | + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'pdf', |
| 203 | + 'application/vnd.ms-excel' => 'pdf', |
| 204 | + 'application/vnd.ms-office' => 'pdf', |
| 205 | + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'pdf', |
| 206 | + 'application/vnd.ms-powerpoint' => 'pdf', |
| 207 | + 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pdf', |
| 208 | + }[ct] |
| 209 | + op = 'ocr' if ! op && ct.start_with?('image/') |
| 210 | + raise "Conversion from #{ct} is not supported" unless op |
| 211 | + op |
| 212 | + end |
| 213 | + |
| 214 | + end |
| 215 | +end |
| 216 | + |
| 217 | + |
0 commit comments