-
Notifications
You must be signed in to change notification settings - Fork 4
/
functions.py
217 lines (173 loc) · 5.64 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# -*- coding: utf-8 -*-
import os, re, time, tldextract, urllib2, json, socket, csv, operator
from ftplib import FTP
from datetime import datetime
from settings import ftp_settings
attempt_to_resolve_hostname_from_ip = True # try to get hostname from IP address?
# GET CURRENT TIME IN MILLISECONDS
def millis():
return int(round(time.time() * 1000))
# CENTER AND RIGHT ALIGN TEXT IN TERMINAL
def center_text(s):
length = len(s)
width = int(os.popen('stty size', 'r').read().split()[1])
half = (width - length)/2
remaining = width - half - length
return (' ' * half) + s + (' ' * remaining)
def r_align(s, col_width):
return (' ' * (col_width-len(s))) + s
# SORT FILES FOR UPLOAD
def sort_files(log_filename):
data = csv.reader(open(log_filename), delimiter=',')
headers = ','.join(next(data, None)) + '\n'
# sort by URL
url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False)
with open('AllServers_URL.csv', 'w') as f:
f.write(headers)
for i, line in enumerate(url_sorted):
if i==0:
header = line
continue
f.write(','.join(line) + '\n')
# reverse file to sort by count (descending)
with open(log_filename) as f:
s = f.readlines()
s.reverse()
with open('AllServers_DESC.csv', 'w') as f:
f.write(headers)
f.writelines(s)
# UPLOAD FILE TO SERVER
# connects each time to avoid timeout and other
# issues for long FTP connections
def upload():
ftp_address = ftp_settings['ftp_address']
username = ftp_settings['username']
password = ftp_settings['password']
directory = ftp_settings['directory']
ftp = FTP(ftp_address)
ftp.login(username, password)
ftp.cwd(directory)
files_to_upload = [ 'AllServers_ASC.csv', 'AllServers_DESC.csv', 'AllServers_URL.csv' ]
for f in files_to_upload:
ext = os.path.splitext(f)[1]
if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'):
ftp.storlines('STOR ' + f, open(f))
else:
ftp.storbinary('STOR ' + f, open(f, 'rb'), 1024)
ftp.quit()
# LOG DATA TO CSV FILE
# saves count, date, time, url
def log_data(count, url, parts, location):
with open('AllServers.csv', 'a') as output:
date = datetime.now().strftime('%m-%d-%Y')
time = datetime.now().strftime('%H-%M-%S.%f')
p = ','.join(parts)
l = ','.join(location)
output.write(str(count) + ',' + date + ',' + time + ',' + url + ',' + p + ',' + l + '\n')
# GET LOCATION FROM URL
def get_location(url):
try:
j = json.load(urllib2.urlopen('http://freegeoip.net/json/' + url))
ip = str(j['ip'])
country = str(j['country_name'])
region = str(j['region_name'])
city = str(j['city'])
zip_code = str(j['zip_code'])
lat = str(j['latitude'])
lon = str(j['longitude'])
location = (ip, country, region, city, zip_code, lat, lon)
except:
location = ('', '', '', '', '', '', '')
return location
# IS THIS AN IP ADDRESS?
# checks url to see if it is an IP address
# returns True/False, (ip/address, extra bits)
def is_ip_address(url):
# standard IP address?
try:
socket.inet_aton(url)
# attempt to resolve IP to hostname
if attempt_to_resolve_hostname_from_ip:
try:
host = socket.gethostbyaddr(url)
return False, (host[0])
except:
pass
return True, (url, '')
except socket.error:
pass
# IP address with extra bits on it?
ip_with_extras = re.match('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\.(.*?)$', url)
if ip_with_extras is not None:
return True, (ip_with_extras.group(1), ip_with_extras.group(2))
# not an IP address
else:
return False, (url, '')
# SPLIT URL
# returns tuple with url split into components
# (subdomain, domain, tld, rest)
def split_url(url, not_really_tlds):
# few edge cases, hard-coded :(
if url == 'nexus.stevens.edu.s.imap':
return ('nexus', 'stevens', 'edu', 's.imap')
elif url == 'perfora.net.s.imap':
return ('', 'perfora', 'net', 's.imap')
# set "rest" to blank, will be set below if exists
rest = ''
# check for non-TLDs (like ftp)
# set "rest" to that value
for non in not_really_tlds:
if non in url:
url = url.replace(non, '')
if url.startswith('.'):
url = url[1:]
rest = non
# my mobile devices?
if 'jeffthonsiphone' in url:
matches = re.match(r'jeffthonsiphone\.([a-z].*?)\.(.*?)$', url)
return ('', 'jeffthompsonsiphone', matches.group(1), matches.group(2))
if 'jeffs-ipad' in url:
matches = re.match(r'jeffs-ipad\.([a-z].*?)\.(.*?)$', url)
return ('', 'jeffs-ipad', matches.group(1), matches.group(2))
# is this an IP address?
# (ignore if ulr includes some basic TLDs)
real_tlds = [ 'com', 'net', 'org' ]
if any(('.' + ext) in url for ext in real_tlds) == False:
ip_addr, parts = is_ip_address(url)
if ip_addr:
return('', parts[0], '', parts[1])
# ok? split url
u = tldextract.extract(url)
return (u.subdomain, u.domain, u.suffix, rest)
# PARSE URL FROM TCPDUMP
# takes raw response from tcpdump and returns a url
# if not valid, returns None
def parse_url(row, local_domain, group_email_urls, compare_stored_urls, previous_urls):
row = row.strip()
url = row.split()[2]
# skip non-HTTP traffic
if 'tcp' not in row:
return None
# ignore ourselves
if local_domain in url.lower():
return None
# simplify email URLs, if specified
# (also ignore the insane # of Stevens sub-IPs... 🙄)
if group_email_urls and 'perfora.net' in url:
url = re.sub(r'[perfora\.net\.][0-9]+', '', url)
if group_email_urls and '155.246.200.20' in url:
return None
# ignore 'link' and 'output' (always the first 2 URLs...)
if url == 'link' or url == 'output':
return None
# format nicely
url = re.sub(r'\.https?', '', url)
url = re.sub(r':$', '', url)
# have we already seen this URL?
if compare_stored_urls:
if url in previous_urls:
return None
else:
previous_urls.add(url)
# done
return url