Skip to content

Commit 0ef1fed

Browse files
author
Joe Blackman
committed
Ported autoheathen from heathen repo
1 parent 34b0649 commit 0ef1fed

File tree

14 files changed

+664
-12
lines changed

14 files changed

+664
-12
lines changed

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ gem 'iso-639'
1010
gem 'wkhtmltopdf'
1111
gem 'sidekiq'
1212
gem 'rest-client'
13+
gem 'mail'
1314

1415
gem 'rspec'
1516
gem 'rack-test'

Gemfile.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ GEM
2323
iso-639 (0.2.5)
2424
json (1.8.1)
2525
kgio (2.9.3)
26+
mail (2.6.3)
27+
mime-types (>= 1.16, < 3)
2628
mime-types (2.4.3)
2729
netrc (0.10.2)
2830
rack (1.6.0)
@@ -79,6 +81,7 @@ DEPENDENCIES
7981
haml
8082
iso-639
8183
json
84+
mail
8285
mime-types
8386
rack-test
8487
rest-client

bin/autoheathen

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env ruby
2+
#
3+
# Script: autoheathen
4+
# Description: This script takes an encoded mail message from standard input and forwards
5+
# any attachments of the allowed MIME types to a Heathen server for translation.
6+
#
7+
# The resulting translated files are then delivered in accordance with the
8+
# command line switches provided.
9+
#
10+
# Usage: {mailfile} | bundle exec {path to}/autoheathen { -r | -t EMAIL | -s } {opts}
11+
#
12+
# Options: -r return converted files to sender email address
13+
# -t send converted files to the specified mail inbox
14+
# -s don't deliver, just output a summary
15+
#
16+
# run "heathen_mailstream -h" to get a list of other options, which are basically
17+
# configuration settings for mail host, etc. It is also possible
18+
# to load all of the configuration from a YAML file (-C flag).
19+
#
20+
require 'optparse'
21+
require 'pathname'
22+
$: << Pathname.new(__FILE__).realpath.parent.parent
23+
$: << Pathname.new(__FILE__).realpath.parent.parent + 'lib'
24+
require 'autoheathen'
25+
require 'mail'
26+
27+
cfg = {}
28+
mode = :return_to_sender
29+
mail_to = nil
30+
31+
OptionParser.new do |opts|
32+
opts.on( '-r', '--return-to-sender', 'Converted files will be emailed back to sender' ) { mode = :return_to_sender }
33+
opts.on( '-t', '--to EMAIL', 'Converted files will be emailed to this address' ) { |e| mode = :email; mail_to = e }
34+
opts.on( '-s', '--summary', 'Don\'t deposit the converted file, just log a summary' ) { cfg[:deliver] = false }
35+
opts.on( '-l', '--language', 'Document language' ) { |l| cfg[:language] = l }
36+
opts.on( '-M', '--mail-host MAILHOST', 'Mail server for sending replies' ) { |h| cfg[:mail_host] = h }
37+
opts.on( '-P', '--mail-port PORT', Integer, 'Mail server port' ) { |p| cfg[:mail_port] = p }
38+
opts.on( '-C', '--config FILE', 'Configuration YAML file' ) { |file| cfg[:config_file] = file }
39+
opts.on( '-v', '--verbose', 'Running commentary' ) { cfg[:logger] = Logger.new(STDOUT) }
40+
end.parse!
41+
42+
email = Mail.read_from_string $stdin.read
43+
processor = AutoHeathen::EmailProcessor.new(cfg)
44+
case mode
45+
when :return_to_sender
46+
processor.process_rts email
47+
when :email
48+
processor.process email, mail_to
49+
end

lib/autoheathen.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
require 'heathen'
2+
require_relative 'autoheathen/config'
3+
require_relative 'autoheathen/email_processor'

lib/autoheathen/config.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module AutoHeathen
2+
module Config
3+
def load_config defaults={}, config_file=nil, overwrites={}
4+
cfg = symbolize_keys(defaults)
5+
if config_file && File.exist?(config_file)
6+
cfg.merge! symbolize_keys(YAML::load_file config_file)
7+
end
8+
cfg.merge! symbolize_keys(overwrites) # non-file opts have precedence
9+
return cfg
10+
end
11+
12+
def symbolize_keys(hash)
13+
(hash||{}).inject({}){|result, (key, value)|
14+
new_key = key.is_a?(String) ? key.to_sym : key
15+
new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
16+
result[new_key] = new_value
17+
result
18+
}
19+
end
20+
end
21+
end

lib/autoheathen/email_processor.rb

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
require 'mail'
2+
require 'yaml'
3+
require 'logger'
4+
require 'haml'
5+
require 'filemagic/ext'
6+
require_relative 'config'
7+
require 'heathen'
8+
9+
module AutoHeathen
10+
class EmailProcessor
11+
include AutoHeathen::Config
12+
13+
# The only valid email headers we will allow forward to LEG_wikilex
14+
ONWARD_HEADERS = ['Date','From','To','Subject','Content-Type','Content-Transfer-Encoding','Mime-Version']
15+
16+
attr_reader :cfg, :logger
17+
18+
# Constructs the processor
19+
# @param cfg a hash of configuration settings:
20+
# deliver: true If false, email will not be actually sent (useful for testing)
21+
# email: nil Email to send response to (if mode == :email)
22+
# from: 'autoheathen' Who to say the email is from
23+
# cc_blacklist: nil Array of email addresses to excise from CC list of any mails
24+
# - used to avoid infinite loops in autoheathen
25+
# mail_host: 'localhost' Mail relay host for responses (mode in [:return_to_sender,:email]
26+
# mail_port: 25 Mail relay port (ditto)
27+
# text_template: 'config/response.text.haml' Template for text part of response email (mode in [:return_to_sender,:email])
28+
# html_template: 'config/response.html.haml' Template for HTML part of response email (ditto)
29+
# logger: nil Optional logger object
30+
def initialize cfg={}, config_file=nil
31+
@cfg = load_config( { # defaults
32+
deliver: true,
33+
language: 'en',
34+
from: 'autoheathen',
35+
cc_blacklist: nil,
36+
email: nil,
37+
verbose: false,
38+
mail_host: 'localhost',
39+
mail_port: 25,
40+
logger: nil,
41+
text_template: 'config/autoheathen.text.haml',
42+
html_template: 'config/autoheathen.html.haml',
43+
}, config_file, cfg )
44+
@logger = @cfg[:logger] || Logger.new(nil)
45+
@logger.level = @cfg[:verbose] ? Logger::DEBUG : Logger::INFO
46+
end
47+
48+
def process_rts email
49+
process email, email.from, true
50+
end
51+
52+
# Processes the given email, submits attachments to the Heathen server, delivers responses as configured
53+
# @param input A string containing the encoded email (suitable to be decoded using Mail.read(input)
54+
# @return a hash of the decoded attachments (or the reason why they could not be decoded)
55+
def process email, mail_to, is_rts=false
56+
documents = []
57+
58+
unless email.has_attachments?
59+
logger.info "From: #{email.from} Subject: (#{email.subject}) Files: no attachments"
60+
return
61+
end
62+
63+
logger.info "From: #{email.from} Subject: (#{email.subject}) Files: #{email.attachments.map(&:filename).join(',')}"
64+
65+
#
66+
# Convert the attachments
67+
#
68+
email.attachments.each do |attachment|
69+
begin
70+
converter = Heathen::Converter.new( logger: logger )
71+
language = @cfg[:language]
72+
input_source = attachment.body.decoded
73+
action = get_action input_source.content_type
74+
logger.info " convert #{attachment.filename} using action: #{action}"
75+
data = converter.convert action, input_source, language
76+
converted_filename = Heathen::Filename.suggest attachment.filename, data.mime_type
77+
documents << { orig_filename: attachment.filename, orig_content: input_source, filename: converted_filename, content: data, error: false }
78+
rescue StandardError => e
79+
documents << { orig_filename: attachment.filename, orig_content: input_source, filename: nil, content: nil, error: e.message }
80+
end
81+
end
82+
83+
#
84+
# deliver the results
85+
#
86+
if is_rts
87+
deliver_rts email, documents, mail_to
88+
else
89+
deliver_onward email, documents, mail_to
90+
end
91+
92+
#
93+
# Summarise the processing
94+
#
95+
logger.info "Results of conversion"
96+
documents.each do |doc|
97+
if doc[:content].nil?
98+
logger.info " #{doc[:orig_filename]} was not converted (#{doc[:error]}) "
99+
else
100+
logger.info " #{doc[:orig_filename]} was converted successfully"
101+
end
102+
end
103+
104+
documents
105+
end
106+
107+
# Forward the email to sender, with decoded documents replacing the originals
108+
def deliver_onward email, documents, mail_to
109+
logger.info "Sending response mail to #{mail_to}"
110+
email.cc [] # No CCing, just send to the recipient
111+
email.to mail_to
112+
email.subject "#{'Fwd: ' unless email.subject.start_with? 'Fwd:'}#{email.subject}"
113+
email.return_path email.from unless email.return_path
114+
# something weird goes on with Sharepoint, where the doc is dropped on the floor
115+
# so, remove any offending headers
116+
email.message_id = nil # make sure of message_id too
117+
good_headers = ONWARD_HEADERS.map{ |h| h.downcase }
118+
inspect_headers = email.header.map(&:name)
119+
inspect_headers .each do |name|
120+
unless good_headers.include? name.downcase
121+
email.header[name] = nil
122+
end
123+
end
124+
email.received = nil # make sure of received
125+
# replace attachments with converted files
126+
email.parts.delete_if { |p| p.attachment? }
127+
documents.each do |doc|
128+
if doc[:content]
129+
email.add_file filename: doc[:filename], content: doc[:content]
130+
else # preserve non-converted attachments when forwarding
131+
email.add_file filename: doc[:orig_filename], content: doc[:orig_content]
132+
end
133+
end
134+
email.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port]
135+
deliver email
136+
end
137+
138+
# Send decoded documents back to sender
139+
def deliver_rts email, documents, mail_to
140+
logger.info "Sending response mail to #{mail_to}"
141+
mail = Mail.new
142+
mail.from @cfg[:from]
143+
mail.to mail_to
144+
# CCs to the original email will get a copy of the converted files as well
145+
mail.cc (email.cc - email.to - (@cfg[:cc_blacklist]||[]) ) if email.cc # Prevent autoheathen infinite loop!
146+
# Don't prepend yet another Re:
147+
mail.subject "#{'Re: ' unless email.subject.start_with? 'Re:'}#{email.subject}"
148+
# Construct received path
149+
# TODO: is this in the right order?
150+
#rcv = "by localhost(autoheathen); #{Time.now.strftime '%a, %d %b %Y %T %z'}"
151+
#[email.received,rcv].flatten.each { |rec| mail.received rec.to_s }
152+
mail.return_path email.return_path if email.return_path
153+
mail.header['X-Received'] = email.header['X-Received'] if email.header['X-Received']
154+
documents.each do |doc|
155+
next if doc[:content].nil?
156+
mail.add_file filename: doc[:filename], content: doc[:content]
157+
end
158+
cfg = @cfg # stoopid Mail scoping
159+
me = self # stoopid Mail scoping
160+
mail.text_part do
161+
s = Haml::Engine.new( me.read_file cfg[:text_template] ).render(Object.new, to: mail_to, documents: documents, cfg: cfg)
162+
body s
163+
end
164+
mail.html_part do
165+
content_type 'text/html; charset=UTF-8'
166+
s = Haml::Engine.new( me.read_file cfg[:html_template] ).render(Object.new, to: mail_to, documents: documents, cfg: cfg)
167+
body s
168+
end
169+
mail.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port]
170+
deliver mail
171+
end
172+
173+
# Convenience method allowing us to stub out actual mail delivery in RSpec
174+
def deliver mail
175+
if @cfg[:deliver]
176+
mail.deliver!
177+
logger.debug "Files were emailed to #{mail.to}"
178+
else
179+
logger.debug "Files would have been emailed to #{mail.to}, but #{self.class.name} is configured not to"
180+
end
181+
end
182+
183+
# Opens and reads a file, first given the filename, then tries from the project base directory
184+
def read_file filename
185+
f = filename
186+
unless File.exist? f
187+
f = Pathname.new(__FILE__).realpath.parent.parent.parent + f
188+
end
189+
File.read f
190+
end
191+
192+
# Returns the correct conversion action based on the content type
193+
# @raise RuntimeError if there is no conversion action for the content type
194+
def get_action content_type
195+
ct = content_type.gsub(/;.*/, '')
196+
op = {
197+
'application/pdf' => 'ocr',
198+
'text/html' => 'pdf',
199+
'application/zip' => 'pdf',
200+
'application/msword' => 'pdf',
201+
'application/vnd.oasis.opendocument.text' => 'pdf',
202+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'pdf',
203+
'application/vnd.ms-excel' => 'pdf',
204+
'application/vnd.ms-office' => 'pdf',
205+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'pdf',
206+
'application/vnd.ms-powerpoint' => 'pdf',
207+
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pdf',
208+
}[ct]
209+
op = 'ocr' if ! op && ct.start_with?('image/')
210+
raise "Conversion from #{ct} is not supported" unless op
211+
op
212+
end
213+
214+
end
215+
end
216+
217+

lib/heathen/task.rb

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,34 +17,37 @@ module Heathen
1717
class Task
1818
class << self
1919
def tasks
20-
@tasks ||= []
20+
@tasks ||= {}
2121
end
2222

2323
# Registers a code block to be run for the given action and mime type.
2424
def register action, mime_type_pattern, &block
25-
tasks << { action: action, mime_type_pattern: mime_type_pattern, proc: block }
25+
tasks[task_key(action,mime_type_pattern)] = { action: action, mime_type_pattern: mime_type_pattern, proc: block }
2626
end
2727

2828
# Finds a registered task suitable for the given action and mime type (note, the first
2929
# suitable one will be selected).
3030
def find action, mime_type
31-
tasks.each do |task|
31+
tasks.each do |k,task|
3232
if task[:action] == action && mime_type =~ %r[#{task[:mime_type_pattern]}]
3333
return task
3434
end
3535
end
3636
raise TaskNotFound.new action, mime_type
3737
end
3838

39-
# Performs a task (generally called in a
40-
#def perform action, job
41-
#task = find action, job.mime_type
42-
#task[:proc].call job
43-
#end
39+
# Deletes any tasks that are keyed for the given action (and optional mime_type pattern, as specified in register)
40+
# Used for testing purposes
41+
def clear action, mime_type=nil
42+
tasks.keys.each do |key|
43+
tasks.delete key if key =~ %r[#{task_key(action,mime_type)}]
44+
end
45+
end
4446

45-
# Deletes all registered tasks
46-
def clear
47-
@tasks = []
47+
protected
48+
49+
def task_key action, mime_type
50+
"#{action} -- #{mime_type}"
4851
end
4952
end
5053
end
@@ -74,3 +77,7 @@ def clear
7477
libreoffice 'oo'
7578
end
7679

80+
# support legacy method
81+
Heathen::Task.register 'doc', '.*' do
82+
perform_task 'microsoft'
83+
end
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
%p You submitted a document conversion request. Here are the results:
2+
%br
3+
- documents.each do |doc|
4+
- if doc[:content].nil?
5+
%p #{doc[:orig_filename]} was not converted (#{doc[:error]})
6+
- else
7+
%p #{doc[:orig_filename]} was converted successfully
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
:plain
2+
You submitted a document conversion request. Here are the results:
3+
4+
- documents.each do |doc|
5+
- if doc[:content].nil?
6+
#{doc[:orig_filename]} was not converted (#{doc[:error]})
7+
- else
8+
#{doc[:orig_filename]} was converted successfully

0 commit comments

Comments
 (0)