From b11af99c65a460da2df0c5b26e25a1a5dd768ec0 Mon Sep 17 00:00:00 2001 From: Jeff Thompson Date: Fri, 5 Feb 2016 12:29:32 -0500 Subject: [PATCH] Misc updates, cleanups, and tweaks --- DocumentationAndEarlyVersions/SortCSVTest.py | 24 ++++++++++ functions.py | 50 +++++++++++++++----- header.php | 8 ++-- index.php | 29 +++++++----- tcpdump.py | 44 +++++++++++++---- 5 files changed, 117 insertions(+), 38 deletions(-) create mode 100644 DocumentationAndEarlyVersions/SortCSVTest.py diff --git a/DocumentationAndEarlyVersions/SortCSVTest.py b/DocumentationAndEarlyVersions/SortCSVTest.py new file mode 100644 index 0000000..ac8674f --- /dev/null +++ b/DocumentationAndEarlyVersions/SortCSVTest.py @@ -0,0 +1,24 @@ + +import csv, operator + +log_filename = '../AllServers.csv' + +data = csv.reader(open(log_filename), delimiter=',') +headers = ','.join(next(data, None)) + '\n' + +url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False) +with open('AllServers_URL.csv', 'w') as f: + f.write(headers) + for i, line in enumerate(url_sorted): + if i==0: + header = line + continue + f.write(','.join(line) + '\n') + +print '- sorting by count' +with open(log_filename) as f: + s = f.readlines() + s.reverse() +with open('AllServers_DESC.csv', 'w') as f: + f.write(headers) + f.writelines(s) \ No newline at end of file diff --git a/functions.py b/functions.py index 0fb27db..11183f8 100644 --- a/functions.py +++ b/functions.py @@ -1,7 +1,9 @@ +# -*- coding: utf-8 -*- -import os, re, time, tldextract, urllib2, json, socket +import os, re, time, tldextract, urllib2, json, socket, csv, operator from ftplib import FTP from datetime import datetime +from settings import ftp_settings attempt_to_resolve_hostname_from_ip = True # try to get hostname from IP address? @@ -23,24 +25,50 @@ def r_align(s, col_width): return (' ' * (col_width-len(s))) + s +# SORT FILES FOR UPLOAD +def sort_files(log_filename): + data = csv.reader(open(log_filename), delimiter=',') + headers = ','.join(next(data, None)) + '\n' + + # sort by URL + url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False) + with open('AllServers_URL.csv', 'w') as f: + f.write(headers) + for i, line in enumerate(url_sorted): + if i==0: + header = line + continue + f.write(','.join(line) + '\n') + + # reverse file to sort by count (descending) + with open(log_filename) as f: + s = f.readlines() + s.reverse() + with open('AllServers_DESC.csv', 'w') as f: + f.write(headers) + f.writelines(s) + + # UPLOAD FILE TO SERVER # connects each time to avoid timeout and other # issues for long FTP connections -def upload(file, settings): - ftp_address = settings['ftp_address'] - username = settings['username'] - password = settings['password'] - directory = settings['directory'] +def upload(): + ftp_address = ftp_settings['ftp_address'] + username = ftp_settings['username'] + password = ftp_settings['password'] + directory = ftp_settings['directory'] ftp = FTP(ftp_address) ftp.login(username, password) ftp.cwd(directory) - ext = os.path.splitext(file)[1] - if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'): - ftp.storlines('STOR ' + file, open(file)) - else: - ftp.storbinary('STOR ' + file, open(file, 'rb'), 1024) + files_to_upload = [ 'AllServers_ASC.csv', 'AllServers_DESC.csv', 'AllServers_URL.csv' ] + for f in files_to_upload: + ext = os.path.splitext(f)[1] + if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'): + ftp.storlines('STOR ' + f, open(f)) + else: + ftp.storbinary('STOR ' + f, open(f, 'rb'), 1024) ftp.quit() diff --git a/header.php b/header.php index c596b04..e04638f 100644 --- a/header.php +++ b/header.php @@ -57,10 +57,10 @@ ' . PHP_EOL; - echo ' ' . PHP_EOL; - } + // if (performing) { + // echo '' . PHP_EOL; + // echo ' ' . PHP_EOL; + // } ?> diff --git a/index.php b/index.php index 9fe822b..1064a7c 100644 --- a/index.php +++ b/index.php @@ -9,25 +9,28 @@ $rows = array(); $by = array(); - $handle = fopen('AllServers.csv', 'r'); + # load file based on sorting option + # by url + if (isset($_GET['sort']) && $_GET['sort'] == 'url') { + $handle = fopen('AllServers_URL.csv', 'r'); + } + + # by count: oldest to newest + else if (isset($_GET['sort']) && $_GET['sort'] == 'count') { + $handle = fopen('AllServers_ASC.csv', 'r'); + } + + # default: count, newest to oldest + else { + $handle = fopen('AllServers_DESC.csv', 'r'); + } + $row = fgetcsv($handle, 1024, ','); // skip header while (($row = fgetcsv($handle, 1024, ',')) !== FALSE) { - if ($_GET['sort'] == 'url') { - $by[] = $row[5]; - } else if ($_GET['sort'] == 'count') { - $by[] = $row[0]; - } $rows[] = $row; } fclose($handle); - # sort it (if no sorting listed, reverse newest-to-oldest) - if (isset($_GET['sort'])) { - array_multisort($by, $rows); - } else { - array_multisort($rows, SORT_DESC); - } - # display foreach($rows as $row) { $count = number_format($row[0]); diff --git a/tcpdump.py b/tcpdump.py index 3429064..3d0d798 100644 --- a/tcpdump.py +++ b/tcpdump.py @@ -16,7 +16,6 @@ # REQUIRED IMPORTS -from settings import ftp_settings from functions import * import os, socket, sys, pwd, glob, grp, time from threading import Thread @@ -29,7 +28,12 @@ group_email_urls = True # combine 'perfora.net' urls? col_width = 10 # width of the "count" column upload_interval = 10 * 1000 # how often to re-upload index.php -log_filename = 'AllServers.csv' +skip_some_urls = True # ignore some URL/IPs? + +log_filename = 'AllServers_ASC.csv' + +# some IP/URLs to ignore +skip_urls = [ '155.246.200.18.', 'jeff-thompson.home', 'jeff-thompsons-iphone', 'jeffs-ipad.local.' ] # tlds that aren't really tlds not_really_tlds = [ 'imap', 'imaps', 'ftp' ] @@ -104,7 +108,7 @@ sys.stdout.flush() previous_urls = set() try: - with open('AllServers.csv') as f: + with open(log_filename) as f: for i, line in enumerate(f): if i == 0: continue @@ -114,7 +118,7 @@ previous_urls.add(data[3].strip()) count = int(data[0]) except: - with open('AllServers.csv', 'a') as f: + with open(log_filename, 'a') as f: f.write('count,date,time,url,subdomain,domain,tld,rest,ip,country,region,city,zip_code,lat,lon' + '\n') start_time = datetime.now().strftime('%B %d, %Y at %H:%M:%S') @@ -161,24 +165,42 @@ compare_stored_urls, previous_urls) if url == None: continue + + # skip internal Stevens IPs, etc? + # messy and nasty, I know... + if skip_some_urls: + skip = False + for u in skip_urls: + if u in url: + skip = True + break + if skip: + continue + + # all good? what'd we get? print FG_CYAN + r_align(str(count), col_width) + ' ' + BOLD + url + ALL_OFF # split URL into parts url_parts = split_url(url, not_really_tlds) # get location for address - location = get_location(url) + if '155.246.200.18.' in url: + location = ('', '', '', '', '', '', '') # ignore internal Stevens IP + else: + location = get_location(url) # log to file log_data(count, url, url_parts, location) - # update_html(count, url, url_parts) - # enough time passed? upload + # enough time passed? created sorted CSV files and upload if millis() > prev_millis + upload_interval: + sort_files(log_filename) + + # upload all three files print '\n' + center_text('[ uploading... ]'), sys.stdout.flush() try: - t = Thread(target=upload, args=(log_filename, ftp_settings)) + t = Thread(target=upload, args=()) t.start() t.join() print CURSOR_UP + CLEAR_LINE + BOLD + center_text('[ uploading... DONE! ]') + ALL_OFF @@ -196,12 +218,14 @@ print 'Final upload: ' + BOLD + FG_CYAN, sys.stdout.flush() try: - t = Thread(target=upload, args=(log_filename, ftp_settings)) + sort_files(log_filename) + t = Thread(target=upload, args=()) t.start() t.join() print 'Done' + ALL_OFF - except: + except Exception, e: print 'Error uploading' + ALL_OFF + # print str(e) # close it print 'Disconnecting FTP: ' + BOLD + FG_CYAN + 'Closed' + ALL_OFF