Skip to content

Commit

Permalink
Misc updates, cleanups, and tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffThompson committed Feb 5, 2016
1 parent 93acb27 commit b11af99
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 38 deletions.
24 changes: 24 additions & 0 deletions DocumentationAndEarlyVersions/SortCSVTest.py
@@ -0,0 +1,24 @@

import csv, operator

log_filename = '../AllServers.csv'

data = csv.reader(open(log_filename), delimiter=',')
headers = ','.join(next(data, None)) + '\n'

url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False)
with open('AllServers_URL.csv', 'w') as f:
f.write(headers)
for i, line in enumerate(url_sorted):
if i==0:
header = line
continue
f.write(','.join(line) + '\n')

print '- sorting by count'
with open(log_filename) as f:
s = f.readlines()
s.reverse()
with open('AllServers_DESC.csv', 'w') as f:
f.write(headers)
f.writelines(s)
50 changes: 39 additions & 11 deletions functions.py
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-

import os, re, time, tldextract, urllib2, json, socket
import os, re, time, tldextract, urllib2, json, socket, csv, operator
from ftplib import FTP
from datetime import datetime
from settings import ftp_settings

attempt_to_resolve_hostname_from_ip = True # try to get hostname from IP address?

Expand All @@ -23,24 +25,50 @@ def r_align(s, col_width):
return (' ' * (col_width-len(s))) + s


# SORT FILES FOR UPLOAD
def sort_files(log_filename):
data = csv.reader(open(log_filename), delimiter=',')
headers = ','.join(next(data, None)) + '\n'

# sort by URL
url_sorted = sorted(data, key=operator.itemgetter(3), reverse=False)
with open('AllServers_URL.csv', 'w') as f:
f.write(headers)
for i, line in enumerate(url_sorted):
if i==0:
header = line
continue
f.write(','.join(line) + '\n')

# reverse file to sort by count (descending)
with open(log_filename) as f:
s = f.readlines()
s.reverse()
with open('AllServers_DESC.csv', 'w') as f:
f.write(headers)
f.writelines(s)


# UPLOAD FILE TO SERVER
# connects each time to avoid timeout and other
# issues for long FTP connections
def upload(file, settings):
ftp_address = settings['ftp_address']
username = settings['username']
password = settings['password']
directory = settings['directory']
def upload():
ftp_address = ftp_settings['ftp_address']
username = ftp_settings['username']
password = ftp_settings['password']
directory = ftp_settings['directory']

ftp = FTP(ftp_address)
ftp.login(username, password)
ftp.cwd(directory)

ext = os.path.splitext(file)[1]
if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'):
ftp.storlines('STOR ' + file, open(file))
else:
ftp.storbinary('STOR ' + file, open(file, 'rb'), 1024)
files_to_upload = [ 'AllServers_ASC.csv', 'AllServers_DESC.csv', 'AllServers_URL.csv' ]
for f in files_to_upload:
ext = os.path.splitext(f)[1]
if ext.lower() in ('.txt', '.htm', '.html', '.css', '.js', '.php', '.csv'):
ftp.storlines('STOR ' + f, open(f))
else:
ftp.storbinary('STOR ' + f, open(f, 'rb'), 1024)
ftp.quit()


Expand Down
8 changes: 4 additions & 4 deletions header.php
Expand Up @@ -57,10 +57,10 @@
<meta property="og:description" content="A month-long performance documenting every unique server from which my computer attempts to download a file.">

<?php
if (performing) {
echo '<!-- auto-refresh while performance is running -->' . PHP_EOL;
echo ' <meta http-equiv="refresh" content="' . $refresh_rate . '">' . PHP_EOL;
}
// if (performing) {
// echo '<!-- auto-refresh while performance is running -->' . PHP_EOL;
// echo ' <meta http-equiv="refresh" content="' . $refresh_rate . '">' . PHP_EOL;
// }
?>

<!-- styles, Inconsolata, and FontAwesome -->
Expand Down
29 changes: 16 additions & 13 deletions index.php
Expand Up @@ -9,25 +9,28 @@
$rows = array();
$by = array();

$handle = fopen('AllServers.csv', 'r');
# load file based on sorting option
# by url
if (isset($_GET['sort']) && $_GET['sort'] == 'url') {
$handle = fopen('AllServers_URL.csv', 'r');
}

# by count: oldest to newest
else if (isset($_GET['sort']) && $_GET['sort'] == 'count') {
$handle = fopen('AllServers_ASC.csv', 'r');
}

# default: count, newest to oldest
else {
$handle = fopen('AllServers_DESC.csv', 'r');
}

$row = fgetcsv($handle, 1024, ','); // skip header
while (($row = fgetcsv($handle, 1024, ',')) !== FALSE) {
if ($_GET['sort'] == 'url') {
$by[] = $row[5];
} else if ($_GET['sort'] == 'count') {
$by[] = $row[0];
}
$rows[] = $row;
}
fclose($handle);

# sort it (if no sorting listed, reverse newest-to-oldest)
if (isset($_GET['sort'])) {
array_multisort($by, $rows);
} else {
array_multisort($rows, SORT_DESC);
}

# display
foreach($rows as $row) {
$count = number_format($row[0]);
Expand Down
44 changes: 34 additions & 10 deletions tcpdump.py
Expand Up @@ -16,7 +16,6 @@


# REQUIRED IMPORTS
from settings import ftp_settings
from functions import *
import os, socket, sys, pwd, glob, grp, time
from threading import Thread
Expand All @@ -29,7 +28,12 @@
group_email_urls = True # combine 'perfora.net' urls?
col_width = 10 # width of the "count" column
upload_interval = 10 * 1000 # how often to re-upload index.php
log_filename = 'AllServers.csv'
skip_some_urls = True # ignore some URL/IPs?

log_filename = 'AllServers_ASC.csv'

# some IP/URLs to ignore
skip_urls = [ '155.246.200.18.', 'jeff-thompson.home', 'jeff-thompsons-iphone', 'jeffs-ipad.local.' ]

# tlds that aren't really tlds
not_really_tlds = [ 'imap', 'imaps', 'ftp' ]
Expand Down Expand Up @@ -104,7 +108,7 @@
sys.stdout.flush()
previous_urls = set()
try:
with open('AllServers.csv') as f:
with open(log_filename) as f:
for i, line in enumerate(f):
if i == 0:
continue
Expand All @@ -114,7 +118,7 @@
previous_urls.add(data[3].strip())
count = int(data[0])
except:
with open('AllServers.csv', 'a') as f:
with open(log_filename, 'a') as f:
f.write('count,date,time,url,subdomain,domain,tld,rest,ip,country,region,city,zip_code,lat,lon' + '\n')
start_time = datetime.now().strftime('%B %d, %Y at %H:%M:%S')

Expand Down Expand Up @@ -161,24 +165,42 @@
compare_stored_urls, previous_urls)
if url == None:
continue

# skip internal Stevens IPs, etc?
# messy and nasty, I know...
if skip_some_urls:
skip = False
for u in skip_urls:
if u in url:
skip = True
break
if skip:
continue

# all good? what'd we get?
print FG_CYAN + r_align(str(count), col_width) + ' ' + BOLD + url + ALL_OFF

# split URL into parts
url_parts = split_url(url, not_really_tlds)

# get location for address
location = get_location(url)
if '155.246.200.18.' in url:
location = ('', '', '', '', '', '', '') # ignore internal Stevens IP
else:
location = get_location(url)

# log to file
log_data(count, url, url_parts, location)
# update_html(count, url, url_parts)

# enough time passed? upload
# enough time passed? created sorted CSV files and upload
if millis() > prev_millis + upload_interval:
sort_files(log_filename)

# upload all three files
print '\n' + center_text('[ uploading... ]'),
sys.stdout.flush()
try:
t = Thread(target=upload, args=(log_filename, ftp_settings))
t = Thread(target=upload, args=())
t.start()
t.join()
print CURSOR_UP + CLEAR_LINE + BOLD + center_text('[ uploading... DONE! ]') + ALL_OFF
Expand All @@ -196,12 +218,14 @@
print 'Final upload: ' + BOLD + FG_CYAN,
sys.stdout.flush()
try:
t = Thread(target=upload, args=(log_filename, ftp_settings))
sort_files(log_filename)
t = Thread(target=upload, args=())
t.start()
t.join()
print 'Done' + ALL_OFF
except:
except Exception, e:
print 'Error uploading' + ALL_OFF
# print str(e)

# close it
print 'Disconnecting FTP: ' + BOLD + FG_CYAN + 'Closed' + ALL_OFF
Expand Down

0 comments on commit b11af99

Please sign in to comment.