-
Notifications
You must be signed in to change notification settings - Fork 3
/
email2db.rb
executable file
·148 lines (121 loc) · 3.66 KB
/
email2db.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env ruby
require 'net/imap'
require 'mongo'
require 'date'
include Mongo
############################################
# This script copies emails from gmail and
# inserts them into a mongo db
# modified from: http://wonko.com/post/ruby_script_to_sync_email_from_any_imap_server_to_gmail
############################################
# Mail server connection info.
SOURCE_HOST = 'imap.gmail.com'
SOURCE_PORT = 993
SOURCE_SSL = true
# Get all messages
FOLDER = '[Gmail]/All Mail'
# By default get all messages from the last month
$date_end = (DateTime.now + 1).strftime("%-d-%b-%Y")
# end date should be much longer than 1 mo. If a user responded to an old email
# that email should be in the db
$date_start = (DateTime.now - 51).strftime("%-d-%b-%Y")
# Maximum number of messages to select at once.
UID_BLOCK_SIZE = 1024
$synced = 0
#---------------- Utility Methods -----------------------#
def uid_fetch_block(server, uids, *args)
pos = 0
while pos < uids.size
server.uid_fetch(uids[pos, UID_BLOCK_SIZE], *args).each {|data| yield data }
pos += UID_BLOCK_SIZE
end
end
def s_ary(ary)
return ary ? ary.map{|x| x.to_s} : []
end
#---------------- Mongo Methods -----------------------#
def init_mongo
# connect, will create db, collection if they don't exist
$db = MongoClient.new("localhost", 27017).db("my_email_response_rate")
$coll = $db.collection("email_collection")
#clean the database
$coll.drop()
end
def insert_msg(jsmsg)
id = $coll.insert(jsmsg)
end
#---------------- Mail Methods -----------------------#
# simplify Address
class Net::IMAP::Address
def to_s
return "#{self.mailbox}@#{self.host}"
end
end
def print_msg(msg)
puts msg.seqno
puts msg.attr['UID']
# puts msg.attr['RFC822']
puts msg.attr['INTERNALDATE']
puts msg.attr['FLAGS']
puts msg.attr['ENVELOPE'].date
puts msg.attr['ENVELOPE'].subject
puts msg.attr['ENVELOPE'].from
puts msg.attr['ENVELOPE'].to
puts msg.attr['ENVELOPE'].cc
puts msg.attr['ENVELOPE'].bcc
puts msg.attr['ENVELOPE'].in_reply_to
puts msg.attr['ENVELOPE'].message_id
end
def msg_to_json(msg)
subject = msg.attr['ENVELOPE'].subject
return {
'seqno' => msg.seqno,
'uid' => msg.attr['UID'],
'internaldate' => msg.attr['INTERNALDATE'],
'flags' => msg.attr['FLAGS'],
'date' => msg.attr['ENVELOPE'].date,
'subject' => msg.attr['ENVELOPE'].subject,
'from' => s_ary(msg.attr['ENVELOPE'].from),
'to' => s_ary(msg.attr['ENVELOPE'].to),
'cc' => s_ary(msg.attr['ENVELOPE'].cc),
'bcc' => s_ary(msg.attr['ENVELOPE'].bcc),
'in_reply_to' => msg.attr['ENVELOPE'].in_reply_to,
'message_id' => msg.attr['ENVELOPE'].message_id,
'is_reply' => !!(subject && subject.match(/^Re:/i))
}
end
def run()
# puts 'Connecting...'
source = Net::IMAP.new(SOURCE_HOST, SOURCE_PORT, SOURCE_SSL)
# puts 'Logging in...'
source.login(SOURCE_USER, SOURCE_PASS)
# Open All Mail folder in read-only mode.
begin
source.examine(FOLDER)
rescue => e
puts "Error: select failed: #{e}"
end
# Loop through all messages in the source folder.
#uids = source.uid_search(['ALL'])
uids = source.uid_search(['SINCE', $date_start, 'BEFORE', $date_end])
# puts "Found #{uids.length} messages"
if uids.length > 0
uid_fetch_block(source, uids, ['UID', 'ENVELOPE', 'FLAGS', 'INTERNALDATE']) do |msg|
# puts msg.seqno
# puts msg_to_json(msg)
insert_msg( msg_to_json(msg) )
$synced += 1
end
end
source.close
# puts "Finished. Message counts: #{$synced} copied to db"
end
## Setup
if ARGV.length < 2
puts "Usage: ruby email2db.rb gmail_username gmail_password"
exit
end
SOURCE_USER = ARGV[0]
SOURCE_PASS = ARGV[1]
init_mongo()
run()