diff --git a/DocumentationAndEarlyVersions/SortCSVTest.py b/DocumentationAndEarlyVersions/SortCSVTest.py
new file mode 100644
index 0000000..ac8674f
--- /dev/null
+++ b/DocumentationAndEarlyVersions/SortCSVTest.py
@@ -0,0 +1,24 @@
+
+import csv, operator
+
+log_filename = '../AllServers.csv'
+
+data = csv.reader(open(log_filename), delimiter=',')
+headers = ','.join(next(data, None)) + '\n'
+
+url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False)
+with open('AllServers_URL.csv', 'w') as f:
+ f.write(headers)
+ for i, line in enumerate(url_sorted):
+ if i==0:
+ header = line
+ continue
+ f.write(','.join(line) + '\n')
+
+print '- sorting by count'
+with open(log_filename) as f:
+ s = f.readlines()
+ s.reverse()
+with open('AllServers_DESC.csv', 'w') as f:
+ f.write(headers)
+ f.writelines(s)
\ No newline at end of file
diff --git a/functions.py b/functions.py
index 0fb27db..11183f8 100644
--- a/functions.py
+++ b/functions.py
@@ -1,7 +1,9 @@
+# -*- coding: utf-8 -*-
-import os, re, time, tldextract, urllib2, json, socket
+import os, re, time, tldextract, urllib2, json, socket, csv, operator
from ftplib import FTP
from datetime import datetime
+from settings import ftp_settings
attempt_to_resolve_hostname_from_ip = True # try to get hostname from IP address?
@@ -23,24 +25,50 @@ def r_align(s, col_width):
return (' ' * (col_width-len(s))) + s
+# SORT FILES FOR UPLOAD
+def sort_files(log_filename):
+ data = csv.reader(open(log_filename), delimiter=',')
+ headers = ','.join(next(data, None)) + '\n'
+
+ # sort by URL
+ url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False)
+ with open('AllServers_URL.csv', 'w') as f:
+ f.write(headers)
+ for i, line in enumerate(url_sorted):
+ if i==0:
+ header = line
+ continue
+ f.write(','.join(line) + '\n')
+
+ # reverse file to sort by count (descending)
+ with open(log_filename) as f:
+ s = f.readlines()
+ s.reverse()
+ with open('AllServers_DESC.csv', 'w') as f:
+ f.write(headers)
+ f.writelines(s)
+
+
# UPLOAD FILE TO SERVER
# connects each time to avoid timeout and other
# issues for long FTP connections
-def upload(file, settings):
- ftp_address = settings['ftp_address']
- username = settings['username']
- password = settings['password']
- directory = settings['directory']
+def upload():
+ ftp_address = ftp_settings['ftp_address']
+ username = ftp_settings['username']
+ password = ftp_settings['password']
+ directory = ftp_settings['directory']
ftp = FTP(ftp_address)
ftp.login(username, password)
ftp.cwd(directory)
- ext = os.path.splitext(file)[1]
- if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'):
- ftp.storlines('STOR ' + file, open(file))
- else:
- ftp.storbinary('STOR ' + file, open(file, 'rb'), 1024)
+ files_to_upload = [ 'AllServers_ASC.csv', 'AllServers_DESC.csv', 'AllServers_URL.csv' ]
+ for f in files_to_upload:
+ ext = os.path.splitext(f)[1]
+ if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'):
+ ftp.storlines('STOR ' + f, open(f))
+ else:
+ ftp.storbinary('STOR ' + f, open(f, 'rb'), 1024)
ftp.quit()
diff --git a/header.php b/header.php
index c596b04..e04638f 100644
--- a/header.php
+++ b/header.php
@@ -57,10 +57,10 @@
' . PHP_EOL;
- echo ' ' . PHP_EOL;
- }
+ // if (performing) {
+ // echo '' . PHP_EOL;
+ // echo ' ' . PHP_EOL;
+ // }
?>
diff --git a/index.php b/index.php
index 9fe822b..1064a7c 100644
--- a/index.php
+++ b/index.php
@@ -9,25 +9,28 @@
$rows = array();
$by = array();
- $handle = fopen('AllServers.csv', 'r');
+ # load file based on sorting option
+ # by url
+ if (isset($_GET['sort']) && $_GET['sort'] == 'url') {
+ $handle = fopen('AllServers_URL.csv', 'r');
+ }
+
+ # by count: oldest to newest
+ else if (isset($_GET['sort']) && $_GET['sort'] == 'count') {
+ $handle = fopen('AllServers_ASC.csv', 'r');
+ }
+
+ # default: count, newest to oldest
+ else {
+ $handle = fopen('AllServers_DESC.csv', 'r');
+ }
+
$row = fgetcsv($handle, 1024, ','); // skip header
while (($row = fgetcsv($handle, 1024, ',')) !== FALSE) {
- if ($_GET['sort'] == 'url') {
- $by[] = $row[5];
- } else if ($_GET['sort'] == 'count') {
- $by[] = $row[0];
- }
$rows[] = $row;
}
fclose($handle);
- # sort it (if no sorting listed, reverse newest-to-oldest)
- if (isset($_GET['sort'])) {
- array_multisort($by, $rows);
- } else {
- array_multisort($rows, SORT_DESC);
- }
-
# display
foreach($rows as $row) {
$count = number_format($row[0]);
diff --git a/tcpdump.py b/tcpdump.py
index 3429064..3d0d798 100644
--- a/tcpdump.py
+++ b/tcpdump.py
@@ -16,7 +16,6 @@
# REQUIRED IMPORTS
-from settings import ftp_settings
from functions import *
import os, socket, sys, pwd, glob, grp, time
from threading import Thread
@@ -29,7 +28,12 @@
group_email_urls = True # combine 'perfora.net' urls?
col_width = 10 # width of the "count" column
upload_interval = 10 * 1000 # how often to re-upload index.php
-log_filename = 'AllServers.csv'
+skip_some_urls = True # ignore some URL/IPs?
+
+log_filename = 'AllServers_ASC.csv'
+
+# some IP/URLs to ignore
+skip_urls = [ '155.246.200.18.', 'jeff-thompson.home', 'jeff-thompsons-iphone', 'jeffs-ipad.local.' ]
# tlds that aren't really tlds
not_really_tlds = [ 'imap', 'imaps', 'ftp' ]
@@ -104,7 +108,7 @@
sys.stdout.flush()
previous_urls = set()
try:
- with open('AllServers.csv') as f:
+ with open(log_filename) as f:
for i, line in enumerate(f):
if i == 0:
continue
@@ -114,7 +118,7 @@
previous_urls.add(data[3].strip())
count = int(data[0])
except:
- with open('AllServers.csv', 'a') as f:
+ with open(log_filename, 'a') as f:
f.write('count,date,time,url,subdomain,domain,tld,rest,ip,country,region,city,zip_code,lat,lon' + '\n')
start_time = datetime.now().strftime('%B %d, %Y at %H:%M:%S')
@@ -161,24 +165,42 @@
compare_stored_urls, previous_urls)
if url == None:
continue
+
+ # skip internal Stevens IPs, etc?
+ # messy and nasty, I know...
+ if skip_some_urls:
+ skip = False
+ for u in skip_urls:
+ if u in url:
+ skip = True
+ break
+ if skip:
+ continue
+
+ # all good? what'd we get?
print FG_CYAN + r_align(str(count), col_width) + ' ' + BOLD + url + ALL_OFF
# split URL into parts
url_parts = split_url(url, not_really_tlds)
# get location for address
- location = get_location(url)
+ if '155.246.200.18.' in url:
+ location = ('', '', '', '', '', '', '') # ignore internal Stevens IP
+ else:
+ location = get_location(url)
# log to file
log_data(count, url, url_parts, location)
- # update_html(count, url, url_parts)
- # enough time passed? upload
+ # enough time passed? created sorted CSV files and upload
if millis() > prev_millis + upload_interval:
+ sort_files(log_filename)
+
+ # upload all three files
print '\n' + center_text('[ uploading... ]'),
sys.stdout.flush()
try:
- t = Thread(target=upload, args=(log_filename, ftp_settings))
+ t = Thread(target=upload, args=())
t.start()
t.join()
print CURSOR_UP + CLEAR_LINE + BOLD + center_text('[ uploading... DONE! ]') + ALL_OFF
@@ -196,12 +218,14 @@
print 'Final upload: ' + BOLD + FG_CYAN,
sys.stdout.flush()
try:
- t = Thread(target=upload, args=(log_filename, ftp_settings))
+ sort_files(log_filename)
+ t = Thread(target=upload, args=())
t.start()
t.join()
print 'Done' + ALL_OFF
- except:
+ except Exception, e:
print 'Error uploading' + ALL_OFF
+ # print str(e)
# close it
print 'Disconnecting FTP: ' + BOLD + FG_CYAN + 'Closed' + ALL_OFF