Skip to content

Commit

Permalink
Fixed a bug with db inserts and emojis. Filters now instantiated with…
Browse files Browse the repository at this point in the history
… filtered words/phrase accessible.
  • Loading branch information
aantix committed Aug 31, 2015
1 parent edefaf7 commit 6afebc1
Show file tree
Hide file tree
Showing 15 changed files with 190 additions and 82 deletions.
4 changes: 4 additions & 0 deletions config/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ development:
username: root
password:
pool: 25
encoding: utf8mb4
collation: utf8mb4_unicode_ci

test:
adapter: mysql2
database: followup_test
username: root
password:
encoding: utf8mb4
collation: utf8mb4_unicode_ci

production:
pool: 5
Expand Down
4 changes: 2 additions & 2 deletions db/migrate/20140807065935_devise_create_users.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ class DeviseCreateUsers < ActiveRecord::Migration
def change
create_table(:users) do |t|
## Database authenticatable
t.string :email, null: false, default: ""
t.string :email, null: false, default: "", limit: 128
t.string :encrypted_password, null: false, default: ""
t.string :image_url

## Recoverable
t.string :reset_password_token
t.string :reset_password_token, limit: 128
t.datetime :reset_password_sent_at

## Rememberable
Expand Down
6 changes: 6 additions & 0 deletions db/migrate/20150826175033_add_filter_data_to_emails.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddFilterDataToEmails < ActiveRecord::Migration
def change
add_column :emails, :filtered, :boolean, default: true, after: :email_thread_id
add_column :emails, :filtered_message, :string, after: :filtered
end
end
44 changes: 23 additions & 21 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20150820184101) do
ActiveRecord::Schema.define(version: 20150826175033) do

create_table "email_profile_images", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" do |t|
create_table "email_profile_images", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci" do |t|
t.string "email", limit: 255
t.string "url", limit: 255
t.string "image", limit: 255
Expand All @@ -22,7 +22,7 @@
t.datetime "updated_at", null: false
end

create_table "email_threads", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" do |t|
create_table "email_threads", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci" do |t|
t.integer "user_id", limit: 4
t.string "thread_id", limit: 255
t.datetime "last_email_at"
Expand All @@ -33,26 +33,28 @@
t.index ["deleted_at"], name: "index_email_threads_on_deleted_at", using: :btree
end

create_table "emails", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" do |t|
t.integer "email_thread_id", limit: 4
t.string "message_id", limit: 255
t.string "from_email", limit: 255
t.string "from_name", limit: 255
t.string "to_email", limit: 255
t.string "to_name", limit: 255
t.string "subject", limit: 255
t.text "plain_body", limit: 65535
t.text "html_body", limit: 65535
t.string "content_type", limit: 255
create_table "emails", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci" do |t|
t.integer "email_thread_id", limit: 4
t.boolean "filtered", default: true
t.string "filtered_message", limit: 255
t.string "message_id", limit: 255
t.string "from_email", limit: 255
t.string "from_name", limit: 255
t.string "to_email", limit: 255
t.string "to_name", limit: 255
t.string "subject", limit: 255
t.text "plain_body", limit: 65535
t.text "html_body", limit: 65535
t.string "content_type", limit: 255
t.datetime "received_on"
t.integer "questions_count", limit: 4, default: 0
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "questions_count", limit: 4, default: 0
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.datetime "deleted_at"
t.index ["deleted_at"], name: "index_emails_on_deleted_at", using: :btree
end

create_table "questions", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" do |t|
create_table "questions", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci" do |t|
t.integer "email_id", limit: 4
t.text "question", limit: 65535
t.datetime "created_at", null: false
Expand All @@ -61,11 +63,11 @@
t.index ["deleted_at"], name: "index_questions_on_deleted_at", using: :btree
end

create_table "users", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" do |t|
t.string "email", limit: 255, default: "", null: false
create_table "users", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci" do |t|
t.string "email", limit: 128, default: "", null: false
t.string "encrypted_password", limit: 255, default: "", null: false
t.string "image_url", limit: 255
t.string "reset_password_token", limit: 255
t.string "reset_password_token", limit: 128
t.datetime "reset_password_sent_at"
t.datetime "remember_created_at"
t.integer "sign_in_count", limit: 4, default: 0, null: false
Expand Down
87 changes: 48 additions & 39 deletions lib/mail/adapters/gmail_adapter.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Mail
module Adapters
class GmailAdapter
class GmailAdapter < MailAdapter
include Retryable

attr_reader :user, :total_cached_messages, :full_filter_messages
Expand All @@ -19,11 +19,6 @@ def messages
query_message_count

filter_full_messages if filter_meta_messages

puts "-------------------------------------------"
puts "total_saves = #{@total_saves}"
puts "total_failed_saves = #{@total_failed_saves.inspect}"
puts "-------------------------------------------"
end

private
Expand Down Expand Up @@ -75,43 +70,47 @@ def filter_full_messages
end

def query_message_count
result = label_list(id: 'INBOX')
result = label_list(id: 'INBOX')
@total_count = result.data.messages_total
end

def percentage_complete
((total_cached_messages / @total_count.to_f) * 100).round
end

def full_filter
Google::APIClient::BatchRequest.new do |message|
payload = message.data.payload rescue return

if payload.parts.any?
message_json = JSON.parse(message.data.to_json)

plain = plain_message(message_json)
html = html_message(message_json)
def save_message(message, plain_body, html_body, filtered = false, filter_message = nil)
msg = Mail::EmailMessage.new(thread_id: message.data.thread_id,
message_id: message.data.id,

unless Mail::Filters::BodyFilter.filtered?(html) || Mail::Filters::BodyFilter.filtered?(plain)
to_name: to(message).display_name,
to_email: to(message).address,

msg = Mail::EmailMessage.new(thread_id: message.data.thread_id,
message_id: message.data.id,
from_name: from(message).display_name,
from_email: from(message).address,

to_name: to(message).display_name,
to_email: to(message).address,
subject: find_header('Subject', message),
received_on: find_header('Date', message),

from_name: from(message).display_name,
from_email: from(message).address,
plain_body: plain_body,
html_body: html_body,

subject: find_header('Subject', message),
received_on: find_header('Date', message),
filtered: filtered,
filter_message: filter_message)

plain_body: plain,
html_body: html)
save_message!(user, msg)

Mail::Adapters::MailAdapter.save_message!(user, msg)
end

def full_filter
Google::APIClient::BatchRequest.new do |message|
payload = message.data.payload rescue nil
if payload.present? && payload.parts.any?
filter = body_filtering(message)

if body_filtering(message).filtered?
save_message(message, filter.html_body, filter.plain_body, true, filter.message)
else
save_message(message, filter.html_body, filter.plain_body)
end
end
end
Expand All @@ -120,17 +119,31 @@ def full_filter
def meta_filter
msg = Struct.new(:id)
Google::APIClient::BatchRequest.new do |message|
unless filtered?(message)
filter = meta_filtering(message)

if filter.filtered?
save_message(message, nil, nil, true, filter.message)
else
full_filter_messages << msg.new(message.data.id)
end
end
end

def filtered?(message)
EmailThread.exists?(thread_id: message.data.thread_id) ||
Mail::Filters::FromFilter.filtered?(from(message).address, user.email) ||
Mail::Filters::HeaderFilter.filtered?(message.data.payload.headers) ||
Mail::Filters::SubjectFilter.filtered?(find_header('Subject', message))
def meta_filtering(message)
Mail::Filters::MetaFiltering.new(from(message).address, user.email,
message.data.payload.headers,
find_header('Subject', message))
end

def body_filtering(message)
plain = plain_message(message_json(message))
html = html_message(message_json(message))

Mail::Filters::BodyFiltering.new(html, plain)
end

def message_json(message)
JSON.parse(message.data.to_json)
end

def html_message(json)
Expand All @@ -146,10 +159,6 @@ def decode_message(messages)
Base64.urlsafe_decode64 message['body']['data']
end

def extract_body(content_type, body)
Mail::Adapters::MailAdapter.extract_body(content_type, body)
end

def message_for(type, json)
collection = []
json.each do |message_part|
Expand Down Expand Up @@ -190,7 +199,7 @@ def filtered_message_list(filter)
end

def find_header header_name, message
message.data.payload.headers.find { |h| h.name.strip.downcase == header_name.downcase }.value.scrub
(message.data.payload.headers.find { |h| h.name.strip.downcase == header_name.downcase } || Struct.new(:value).new).value
end

def from(message)
Expand Down
4 changes: 2 additions & 2 deletions lib/mail/adapters/mail_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def initialize(user, adapter = nil)
@adapter = (adapter || default_adapter).new(user)
end

def self.save_message!(user, message)
def save_message!(user, message)
thread = user.email_threads.find_or_create_by(thread_id: message.thread_id)

return if thread.destroyed?
Expand All @@ -28,7 +28,7 @@ def self.save_message!(user, message)
plain_body: message.plain_body
end

def self.extract_body(content_type, quoted_body)
def extract_body(content_type, quoted_body)
response = `#{CLAW_PATH} \"#{content_type}\" \"#{Rack::Utils.escape_html(quoted_body)}\"` || {}
JSON.parse(response)["reply"]
end
Expand Down
5 changes: 4 additions & 1 deletion lib/mail/email_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class EmailMessage
attr_reader :to_name, :to_email
attr_reader :from_name, :from_email
attr_reader :html_body, :plain_body, :headers
attr_reader :received_on
attr_reader :received_on, :filtered, :filtered_message

def initialize(properties)
@thread_id = properties[:thread_id]
Expand All @@ -27,6 +27,9 @@ def initialize(properties)

@headers = properties[:headers]
@received_on = properties[:received_on]

@filtered = properties[:filtered]
@filtered_message = properties[:filtered_message]
end
end
end
32 changes: 24 additions & 8 deletions lib/mail/filters/base_filter.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,36 @@
module Mail
module Filters
class BaseFilter
def self.blacklisted_words?(blacklisted_phrases, text)
blacklisted_phrases.detect{|phrase| text =~ /#{phrase}/i}
attr_reader :filter
attr_reader :message

def initialize
@filter = nil
@message = "No filtering occurred"
end

def blacklisted_words?(blacklisted_phrases, text)
filtered(blacklisted_phrases.detect{|phrase| text =~ /#{phrase}/i})
end

def self.blacklisted_key_value?(blacklisted_keys, headers)
blacklisted_keys.detect do |(k, v)|
def blacklisted_key_value?(blacklisted_keys, headers)
filtered(blacklisted_keys.detect do |(k, v)|
headers.keys.include?(k) && (header_hash[k] == v || v == :any)
end
end)
end

def self.blacklisted_name_value?(blacklisted_keys, headers)
blacklisted_keys.detect do |(k, v)|
def blacklisted_name_value?(blacklisted_keys, headers)
filtered(blacklisted_keys.detect do |(k, v)|
headers.any?{|h| h.name == k && (v == :any || h.value == v)}
end
end)
end

private

def filtered(result)
filter = result
message = "#{self.class}: '#{filter}' detected" if filter
result
end
end
end
Expand Down
19 changes: 19 additions & 0 deletions lib/mail/filters/base_filtering.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module Mail
module Filters
class BaseFiltering
attr_reader :filters
attr_accessor :message

def initialize
end

def filtered?
invoked_filter = filters.detect{|filter| filter.filtered?}
message = (invoked_filter || Struct.new(:message).new).message

invoked_filter
end

end
end
end
12 changes: 9 additions & 3 deletions lib/mail/filters/body_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,22 @@ class BodyFilter < BaseFilter
BLACKLISTED_LINKS = ['Privacy', 'Support', 'Blog', 'Legal', 'Terms', 'Terms of Use', 'Facebook', 'Twitter', 'Click Here',
'Help Center', 'Security']

def self.filtered?(body)
attr_reader :body

def initialize(body)
@body = body
end

def filtered?
blacklisted_words?(BLACKLISTED_PHRASES, body) # || too_many_links?(body)
end

private
def self.too_many_links?(body)
def too_many_links?(body)
link_count(body) > MAX_LINKS
end

def self.link_count(body)
def link_count(body)
doc = Nokogiri::HTML(body, 'utf-8')
doc.search("a").reject do |a|
BLACKLISTED_LINKS.any?{|bl| a.text =~ /#{bl}/i}
Expand Down
Loading

0 comments on commit 6afebc1

Please sign in to comment.