From e047d8e72aec8775898a34465e69f85ad0614d96 Mon Sep 17 00:00:00 2001 From: Arian Date: Wed, 28 Jun 2017 19:38:59 -0400 Subject: [PATCH 01/75] purevpn and ipvanish, getting ip addresses of vpns sometimes lead to an Error, try except added --- centinel/vpn/ipvanish.py | 8 ++++++-- centinel/vpn/purevpn.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 632360d..e5af504 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -57,8 +57,12 @@ def create_config_files(directory): for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] - ip = socket.gethostbyname(hostname) - break + # added because gethostbyname will fail on some hostnames + try: + ip = socket.gethostbyname(hostname) + break + except socket.gaierror: + continue if len(ip) > 0: new_path = os.path.join(directory, ip + '.ovpn') diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 756a82f..a945ac2 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -56,8 +56,12 @@ def create_config_files(directory): for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] - ip = socket.gethostbyname(hostname) - break + # added because gethostbyname will fail on some hostnames + try: + ip = socket.gethostbyname(hostname) + break + except socket.gaierror: + continue if len(ip) > 0: new_path = os.path.join(directory, ip + '.ovpn') From 1565476cca85aa20fcbe0a91ce0da2f17d7b5d62 Mon Sep 17 00:00:00 2001 From: Arian Date: Fri, 30 Jun 2017 14:40:58 -0400 Subject: [PATCH 02/75] adding log info when gethostname fails --- centinel/vpn/ipvanish.py | 1 + centinel/vpn/purevpn.py | 1 + 2 files changed, 2 insertions(+) diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index e5af504..3ad8d66 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -62,6 +62,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: + logging.info("Hostname %s failed" %hostname) continue if len(ip) > 0: diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index a945ac2..79ec005 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -61,6 +61,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: + logging.info("Hostname %s failed" %hostname) continue if len(ip) > 0: From 4e5af1fb360bcd45e2789fcdfc076ee1b41cc5e0 Mon Sep 17 00:00:00 2001 From: Arian Date: Fri, 30 Jun 2017 18:43:18 -0400 Subject: [PATCH 03/75] HMA vpn structure fixed to fetch updated config file --- centinel/vpn/hma.py | 135 +++++++++++++++++++++++++++++--------------- 1 file changed, 91 insertions(+), 44 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index ed19267..4f41483 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -5,56 +5,103 @@ import os import requests import sys +import shutil +import logging +import socket +import zipfile +import urllib2 - +def unzip(source_filename, dest_dir): + with zipfile.ZipFile(source_filename) as zf: + zf.extractall(dest_dir) + def create_config_files(directory): - """Create all available VPN configuration files in the given directory + """ + Initialize directory ready for vpn walker + :param directory: the path where you want this to happen + :return: + """ + config_zip_url = "https://hidemyass.com/vpn-config/vpn-configs.zip" + + if not os.path.exists(directory): + os.makedirs(directory) + + logging.info("Starting to download hma config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../vpn-configs.zip') - Note: I am basically just following along with what their script - client does + with open(zip_path,'w') as f: + f.write(zip_content) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + ca_url = "https://vpn.hidemyass.com/vpn-config/keys/ca.crt" + hmauserauth_url = "https://vpn.hidemyass.com/vpn-config/keys/hmauser.crt" + hmauserkey_url = "https://vpn.hidemyass.com/vpn-config/keys/hmauser.key" + + ca_response = urllib2.urlopen(ca_url) + ca_content = ca_response.read() + with open(os.path.join(directory, '../ca.crt'), 'w') as f: + f.write(ca_content) - """ - # get the config file template - template_url = ("https://securenetconnection.com/vpnconfig/" - "openvpn-template.ovpn") - resp = requests.get(template_url) - resp.raise_for_status() - template = resp.content - - # get the available servers and create a config file for each server - server_url = ("https://securenetconnection.com/vpnconfig/" - "servers-cli.php") - resp = requests.get(server_url) - resp.raise_for_status() - servers = resp.content.split("\n") + response_userauth = urllib2.urlopen(hmauserauth_url) + userauth_content = response_userauth.read() + with open(os.path.join(directory, '../hmauser.key'), 'w') as f: + f.write(userauth_content) - if not os.path.exists(directory): - os.makedirs(directory) - with open(os.path.join(directory, "servers.txt"), 'w') as f: - f.write(resp.content) - - for server_line in servers: - if server_line.strip() == "": - continue - server_line = server_line.split("|") - try: - ip, desc, country, udp_sup, tcp_sup = server_line - except ValueError: - ip, desc, country, udp_sup, tcp_sup, no_rand = server_line - with open(os.path.join(directory, ip + ".ovpn"), 'w') as file_o: - file_o.write(template) - # create tcp if available, else udp - tcp_sup = tcp_sup.strip() - if tcp_sup: - port, proto = 443, "tcp" - else: - port, proto = 53, "udp" - file_o.write("remote {0} {1}\n".format(ip, port)) - file_o.write("proto {0}\n".format(proto)) - # add automatic dns server update - file_o.write("up /etc/openvpn/update-resolv-conf\n") - file_o.write("down /etc/openvpn/update-resolv-conf\n") + response_userkey = urllib2.urlopen(hmauserkey_url) + userkey_content = response_userkey.read() + with open(os.path.join(directory, '../hmauser.key'), 'w') as f: + f.write(userkey_content) + + # remove zip file + os.remove(zip_path) + + # move all config files to /vpns + orig_path = os.path.join(directory, '../TCP') + + server_country = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('.')[0] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + ip = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + try: + ip = socket.gethostbyname(hostname) + break + except socket.gaierror: + logging.info("Hostname %s failed" %hostname) + continue + if len(ip) > 0: + new_path = os.path.join(directory, ip + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[ip] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../TCP')) + shutil.rmtree(os.path.join(directory, '../UDP')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for ip in server_country: + f.write('|'.join([ip, server_country[ip]]) + '\n') if __name__ == "__main__": if len(sys.argv) != 2: From b94f1cc87ef99dc44456dea51a4d333f0f774306 Mon Sep 17 00:00:00 2001 From: arian Date: Fri, 7 Jul 2017 14:36:57 -0700 Subject: [PATCH 04/75] fixed fetching the configs file for ipvanish --- centinel/vpn/ipvanish.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 3ad8d66..391b7ef 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -6,7 +6,7 @@ import sys import urllib import zipfile - +import urllib2 def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: @@ -26,13 +26,16 @@ def create_config_files(directory): os.makedirs(directory) logging.info("Starting to download IPVanish config file zip") - url_opener = urllib.URLopener() + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() zip_path = os.path.join(directory, '../configs.zip') unzip_path = os.path.join(directory, '../unzipped') + if not os.path.exists(unzip_path): os.makedirs(unzip_path) + with open(zip_path, 'w') as f: + f.write(zip_content) - url_opener.retrieve(config_zip_url, zip_path) logging.info("Extracting zip file") unzip(zip_path, unzip_path) From c925cd429c2961151078d1527d50eeed5b5eba5a Mon Sep 17 00:00:00 2001 From: arian Date: Tue, 11 Jul 2017 12:56:29 -0700 Subject: [PATCH 05/75] log messages modified for all vpn proxies --- centinel/vpn/hma.py | 2 +- centinel/vpn/ipvanish.py | 2 +- centinel/vpn/purevpn.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 4f41483..ba7f65d 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -77,7 +77,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Hostname %s failed" %hostname) + logging.info("Failed to resolve %s" %hostname) continue if len(ip) > 0: new_path = os.path.join(directory, ip + '.ovpn') diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 391b7ef..1d1083f 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -65,7 +65,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Hostname %s failed" %hostname) + logging.info("Failed to resolve %s" %hostname) continue if len(ip) > 0: diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 79ec005..69df519 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -61,7 +61,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Hostname %s failed" %hostname) + logging.info("Failed to resolve %s" %hostname) continue if len(ip) > 0: From e63de0235133d0a35988023c3cdfbe3b37b002c4 Mon Sep 17 00:00:00 2001 From: Arian Date: Tue, 11 Jul 2017 13:08:00 -0700 Subject: [PATCH 06/75] log messages fixed --- centinel/vpn/hma.py | 1 + centinel/vpn/ipvanish.py | 1 + centinel/vpn/purevpn.py | 1 + 3 files changed, 3 insertions(+) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index ba7f65d..2e8f773 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -59,6 +59,7 @@ def create_config_files(directory): # remove zip file os.remove(zip_path) + # move all config files to /vpns orig_path = os.path.join(directory, '../TCP') diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 1d1083f..3e7b86e 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -41,6 +41,7 @@ def create_config_files(directory): # remove zip file os.remove(zip_path) + # copy ca and key to root path shutil.copyfile(os.path.join(unzip_path, 'ca.ipvanish.com.crt'), os.path.join(directory, '../ca.ipvanish.com.crt')) diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 69df519..7fbbeca 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -33,6 +33,7 @@ def create_config_files(directory): # remove zip file os.remove(zip_path) + # copy ca and key to root path shutil.copyfile(os.path.join(directory, '../Linux OpenVPN Updated files', 'ca.crt'), os.path.join(directory, '../ca.crt')) From 871181f4900222766d6e0b0195739d1ca6a5e905 Mon Sep 17 00:00:00 2001 From: arian Date: Tue, 11 Jul 2017 18:21:13 -0700 Subject: [PATCH 07/75] Initial changes for sanity check module added --- centinel/vpn/cli.py | 56 ++++++++++- centinel/vpn/country_module.py | 102 +++++++++++++++++++++ centinel/vpn/hma.py | 2 +- centinel/vpn/ipvanish.py | 2 +- centinel/vpn/probe.py | 163 +++++++++++++++++++++++++++++++++ centinel/vpn/purevpn.py | 2 +- 6 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 centinel/vpn/country_module.py create mode 100644 centinel/vpn/probe.py diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 80a1f3c..b93c44d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -22,6 +22,9 @@ import centinel.vpn.purevpn as purevpn import centinel.vpn.vpngate as vpngate +import country_module as convertor +import probe as probe + PID_FILE = "/tmp/centinel.lock" @@ -241,10 +244,27 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # and use it to geolocate and fetch experiments before connecting # to VPN. vpn_address, extension = os.path.splitext(filename) + lines = [line.rstrip('\n') for line in open(centinel_config)] + + # get country for this vpn + country_in_config = "" + # reading the server.txt file in vpns folder + for line in lines: + if "country" in line: + (key, country_in_config) = line.split(': ') + country_in_config = country_in_config.replace('\"','').replace(',','') + + country = None try: + # we still might need some info from the Maximind query meta = centinel.backend.get_meta(config.params, vpn_address) + + # send country name to be converted to alpha2 code + if(len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] except: @@ -257,6 +277,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: + logging.info("country is %s" % country) centinel.backend.set_vpn_info(config.params, vpn_address, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -294,8 +315,28 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, json.dump(sched_info, f, indent=2) f.truncate() + # before starting the vpn do the sanity check + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + sanity_path = os.path.join(directory,'../sanitycheck') + if not os.path.exists(sanity_path): + os.makedirs(sanity_path) + + # fetch the list of RIPE anchors + anchors = probe.get_anchor_list(sanity_path) + + logging.info("Anchors list fetched") + # sending ping to the anchors + ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) + + # have to do this sanity check if timestamp is a certain value, needs changing + timestamp = time.time() + ping_result['timestamp'] = timestamp + + #Shinyoung, you can add the sanity check module here + logging.info("%s: Starting VPN." % filename) + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) @@ -393,8 +434,19 @@ def create_config_files(directory): :param directory: """ logging.info("Starting to create config files from openvpn files") - + server_country = {} vpn_dir = return_abs_path(directory, "vpns") + + # read servers.txt to find the country associated with the ip + with open (vpn_dir+ '/servers.txt') as server_file: + servers = server_file.readlines() + + for server_line in servers: + server_line = (server_line.split('|')) + server_country[server_line[0]] = server_line[1].replace('\n','') + + + conf_dir = return_abs_path(directory, "configs") os.mkdir(conf_dir) home_dirs = return_abs_path(directory, "home") @@ -424,6 +476,8 @@ def create_config_files(directory): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] + configuration.params['country'] = server_country[filename.replace('.ovpn','')] + conf_file = os.path.join(conf_dir, filename) configuration.write_out_config(conf_file) diff --git a/centinel/vpn/country_module.py b/centinel/vpn/country_module.py new file mode 100644 index 0000000..6d6cfa4 --- /dev/null +++ b/centinel/vpn/country_module.py @@ -0,0 +1,102 @@ +import geonamescache +from difflib import SequenceMatcher +from geopy.geocoders import Nominatim +from string import digits +import logging + + +def find_in_states(us_states, country): + """ + Given a country check if it is actually a US State + :param us_states: a list of us states + :param country: the country that we want to get its alpha code + :return: + """ + for state in us_states: + if(country in state): + return 'US' + return None + +def manual_check(country): + """ + Some of the country names have spelling errors, + This function manually fixes those + + :param country: the country that we want to get its alpha code + :return the alpha2 country codes: + """ + if(country == "Angula"): + return 'AO' + if(country == "Bosnia"): + return 'BA' + if(country == "UAE"): + return 'AE' + if(country == "LosAngeles"): + return 'US' + if(country == "Virgin Islands (British)"): + return 'VI' + if(country == "Korea"): + return 'KR' + if(country == "PitcairnIslands"): + return 'PN' + if(country == "RepublicofSingapore"): + return 'SG' + if(country == "USA"): + return 'US' + if(country == "Coted`Ivoire"): + return 'CI' + if(country == "Congo"): + return 'CD' + if(country == "Palestine"): + return 'PS' + if(country == "RepublicofDjibouti"): + return 'DJ' + return None + +def country_to_a2(country): + """ + This function converts country names to their alpha2 codes + :param country: the country that we want to get its alpha code + :return the alpha2 country codes: + """ + gc = geonamescache.GeonamesCache() + countries = gc.get_countries() + us_states = gc.get_us_states_by_names() + + # creating a dict between country name and alpha2 codes + countries_dict = {} + for item in countries: + countries_dict[countries[item]['name']] = item + countries_dict['United States of America'] = 'US' + countries_dict['Deutschland'] = 'DE' + countries_dict['UK'] = 'GB' + + if ',' in country: + country = country.split(',')[0] + iso2 = countries_dict.get(country) + if (iso2 != None): + return iso2 + else: + iso2 = find_in_states(us_states,country) + if(iso2 == None): + iso2 = manual_check(country) + if(iso2 == None): + for known_country in countries_dict: + if(SequenceMatcher(None, country, known_country).ratio()>0.70): + iso2 = countries_dict.get(known_country) + return iso2 + else: + iso2 = None + if (iso2 == None): + try: + # for removing numbers from country/city names + country = country.translate(None, digits) + geolocator = Nominatim() + location = geolocator.geocode(country) + location = (location.address).split(',') + iso2 = (countries_dict.get(location[len(location)-1].strip())) + except: + # no mapping found + return None + + return iso2 diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 2e8f773..6442e73 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -78,7 +78,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Failed to resolve %s" %hostname) + logging.exception("Failed to resolve %s" %hostname) continue if len(ip) > 0: new_path = os.path.join(directory, ip + '.ovpn') diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 3e7b86e..fdf4021 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -66,7 +66,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Failed to resolve %s" %hostname) + logging.exception("Failed to resolve %s" %hostname) continue if len(ip) > 0: diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py new file mode 100644 index 0000000..cc802da --- /dev/null +++ b/centinel/vpn/probe.py @@ -0,0 +1,163 @@ +import os +import sys +import csv +import logging +import pickle +import time +import subprocess +import multiprocessing as mp +import numpy as np +from urllib import urlopen +from bs4 import BeautifulSoup +from geopy.distance import vincenty +from geopy.geocoders import Nominatim + +#-d vpn_providers/ipvanish/ -u auth_file --crt-file ca.ipvanish.com.crt + + +def get_anchor_list(directory): + """Get a list of all RIPE Anchors + :return: anchors [hostname]:dict() "probe" + "city" + "country" + "ip" + "asn" + """ + logging.info("Starting to fetch RIPE anchors") + landmark_path = os.path.join(directory,"landmarks_list.pickle") + try: + with open(landmark_path, "r") as f: + anchors = pickle.load(f) + return anchors + + except: + try: + # sys.stderr.write("Retrieving landmark list...") + logging.info("landmarks_list pickle is not available, starting to fetch it") + anchors = dict() + try: + ## you can get "RIPE_anchor_list.csv" by crawling RIPE first page of anchors (table) + ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') + with open(ripe_path, "r") as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'Hostname': + continue + anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} + except: + logging.info("RIPE_anchor list is not available, starting to fetch it") + # parsing ripe anchor website + reload(sys) + sys.setdefaultencoding('utf-8') + + + html = urlopen('https://atlas.ripe.net/anchors/list/').read() + soup = BeautifulSoup(html,"html.parser") + ripe_records = (soup.find_all('tr')) + all_records = [] + for record in ripe_records: + columns = record.find_all('td') + rec = [] + for column in columns: + soup_column = BeautifulSoup(str(column),"html.parser") + rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') + if(len(rec) > 0): + all_records.append(rec) + ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') + with open(ripe_path,'w') as f: + f.write('Hostname,Probe,Company,City,Country,Capabilities\n') + for sublist in all_records: + for item in sublist: + f.write(item + ',') + f.write('\n') + logging.info("Creating RIPE_anchor list") + with open(ripe_path, "r") as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'Hostname': + continue + anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} + + + logging.info("Finished extracting RIPE anchors from file.") + count = 0 + for key, value in anchors.iteritems(): + count += 1 + logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) + url = 'https://atlas.ripe.net/probes/' + str(value['probe']) + '/#!tab-network/' + try: + html = urlopen(url).read() + soup = BeautifulSoup(html,"html.parser") + for script in soup(["script", "style"]): + script.extract() + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = '\n'.join(chunk for chunk in chunks if chunk) + s_text = text.encode('utf-8').split('\n') + index = s_text.index("Internet Address") + anchors[key]['ip'] = str(s_text[index+1]) + anchors[key]['asn'] = str(s_text[s_text.index("ASN")+1]) + except: + logging.exception("Connection reset by Peer on %s" % (url)) + with open(landmark_path, "w") as f: + pickle.dump(anchors, f) + return anchors + except (TypeError, ValueError, UnicodeError) as e: + sys.exit(1) + + + +def send_ping(param): + this_host, ip = param + logging.info("Pinging (%s, %s)" % (this_host, ip)) + times = dict() + ping = subprocess.Popen(["ping", "-c", "10", "-i", "0.3", ip], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, error = ping.communicate() + output = out.split('\n') + this_delays = list() + for i in output: + try: + this_delays.append(i.split('time=')[1]) + except: + continue + times[this_host] = this_delays + return times + + +def perform_probe(sanity_directory,vpn_provider, target_name, target_cnt, anchors): + """Send ping 10 times to landmarks and choose the minimum + :return: times [host] = list() + """ + logging.info("Start Probing") + + pickle_path = os.path.join(sanity_directory,'pickle') + if not os.path.exists(pickle_path): + os.makedirs(pickle_path) + + times = dict() + s_time = time.time() + results = [] + process_num = 6 + pool = mp.Pool(processes=process_num) + results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors.iteritems()])) + for output in results[0]: + for key, value in output.iteritems(): + if key not in times: + times[key] = list() + for this in value: + times[key].append(this) + e_time = time.time() + logging.info(e_time - s_time) + pool.close() + pool.join() + final = {target_name: dict()} + final[target_name]['pings'] = times + final[target_name]['cnt'] = target_cnt + logging.info("Creating pickle file") + with open(pickle_path + '/' + target_name + '-' + target_cnt + '.pickle', 'w') as f: + pickle.dump(final, f) + logging.info("Pickle file successfully created.") + return final diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 7fbbeca..39ca374 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -62,7 +62,7 @@ def create_config_files(directory): ip = socket.gethostbyname(hostname) break except socket.gaierror: - logging.info("Failed to resolve %s" %hostname) + logging.exception("Failed to resolve %s" %hostname) continue if len(ip) > 0: From 347fa654a4d21fa294a41513619f27f3670018d4 Mon Sep 17 00:00:00 2001 From: arian Date: Thu, 13 Jul 2017 16:20:43 -0700 Subject: [PATCH 08/75] sanity check must be done before the reduce asn list --- centinel/vpn/cli.py | 105 ++++++++++++++++++++++++++++++++++++------ centinel/vpn/probe.py | 4 +- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index b93c44d..3b33908 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -156,8 +156,84 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, config.parse_config(centinel_config) vp_ip = os.path.splitext(filename)[0] + vpn_config = os.path.join(vpn_dir, filename) + centinel_config = os.path.join(conf_dir, filename) + + # assuming that each VPN config file has a name like: + # [ip-address].ovpn, we can extract IP address from filename + # and use it to geolocate and fetch experiments before connecting + # to VPN. + vpn_address, extension = os.path.splitext(filename) + lines = [line.rstrip('\n') for line in open(centinel_config)] + + # get country for this vpn + country_in_config = "" + # reading the server.txt file in vpns folder + for line in lines: + if "country" in line: + (key, country_in_config) = line.split(': ') + country_in_config = country_in_config.replace('\"','').replace(',','') + + + country = None + + + try: meta = centinel.backend.get_meta(config.params, vp_ip) + # send country name to be converted to alpha2 code + if(len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) + if 'country' in meta: + country = meta['country'] + + # try setting the VPN info (IP and country) to get appropriate + # experiemnts and input data. + try: + logging.info("country is %s" % country) + centinel.backend.set_vpn_info(config.params, vpn_address, country) + except Exception as exp: + logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) + + # sanity check + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + sanity_path = os.path.join(directory,'../sanitycheck') + if not os.path.exists(sanity_path): + os.makedirs(sanity_path) + + # fetch the list of RIPE anchors + anchors = probe.get_anchor_list(sanity_path) + + logging.info("Anchors list fetched") + logging.info("%s: Starting VPN." % filename) + + + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + + vpn.start() + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) + vpn.stop() + time.sleep(5) + continue + + + # sending ping to the anchors + ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) + + # have to do this sanity check if timestamp is a certain value, needs changing + timestamp = time.time() + ping_result['timestamp'] = timestamp + + #Shinyoung, you can add the sanity check module here + + logging.info("%s: Stopping VPN." % filename) + vpn.stop() + time.sleep(5) + + if 'country' in meta and 'as_number' in meta \ and meta['country'] and meta['as_number']: country_asn = '_'.join([meta['country'], meta['as_number']]) @@ -317,23 +393,14 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # before starting the vpn do the sanity check # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well - sanity_path = os.path.join(directory,'../sanitycheck') - if not os.path.exists(sanity_path): - os.makedirs(sanity_path) + # sanity_path = os.path.join(directory,'../sanitycheck') + # if not os.path.exists(sanity_path): + # os.makedirs(sanity_path) # fetch the list of RIPE anchors - anchors = probe.get_anchor_list(sanity_path) - - logging.info("Anchors list fetched") - # sending ping to the anchors - ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) - - # have to do this sanity check if timestamp is a certain value, needs changing - timestamp = time.time() - ping_result['timestamp'] = timestamp - - #Shinyoung, you can add the sanity check module here + # anchors = probe.get_anchor_list(sanity_path) + # logging.info("Anchors list fetched") logging.info("%s: Starting VPN." % filename) @@ -347,6 +414,16 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, time.sleep(5) continue + + # sending ping to the anchors + # ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) + + # have to do this sanity check if timestamp is a certain value, needs changing + # timestamp = time.time() + # ping_result['timestamp'] = timestamp + + + logging.info("%s: Running Centinel." % filename) try: client = centinel.client.Client(config.params, vpn_provider) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index cc802da..0072165 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -157,7 +157,9 @@ def perform_probe(sanity_directory,vpn_provider, target_name, target_cnt, anchor final[target_name]['pings'] = times final[target_name]['cnt'] = target_cnt logging.info("Creating pickle file") - with open(pickle_path + '/' + target_name + '-' + target_cnt + '.pickle', 'w') as f: + # putting time as a part of the filename + time_unique = time.time() + with open(pickle_path + '/' + target_name + '-' + target_cnt + '-' + str(time_unique) + '.pickle', 'w') as f: pickle.dump(final, f) logging.info("Pickle file successfully created.") return final From 3164ade5fbde52d3a779d63f50d6db8b136f4b52 Mon Sep 17 00:00:00 2001 From: shicho Date: Sat, 15 Jul 2017 16:44:28 -0400 Subject: [PATCH 09/75] add arg for sanity check --- centinel/vpn/cli.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 3b33908..120232d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -41,6 +41,10 @@ def parse_args(): parser.add_argument('--key-direction', '-k', dest='key_direction', default=None, help=("Key direction for tls auth, must specify when " "tls-auth is used")) + parser.add_argument('--geo-sanity-check', dest='sanity_check', + action="store_true", default=False, + help=("Run sanity check module to remove lying VP servers " + "from our vantage point list")) parser.add_argument('--reduce-endpoint', dest='reduce_vp', action="store_true", default=False, help="Reduce the number of vantage points by only connect to " @@ -89,7 +93,7 @@ def parse_args(): def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, - exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp): + exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp, sanity_check): """ For each VPN, check if there are experiments and scan with it if necessary @@ -145,6 +149,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, else: logging.warning("Cannot determine VPN provider!") + # geolocation sanity check + if sanity_check: + pass + # reduce size of list if reduce_vp is true if reduce_vp: logging.info("Reducing list size. Original size: %d" % len(conf_list)) @@ -662,7 +670,8 @@ def _run(): crt_file=args.crt_file, tls_auth=args.tls_auth, key_direction=args.key_direction, exclude_list=args.exclude_list, shuffle_lists=args.shuffle_lists, vm_num=args.vm_num, - vm_index=args.vm_index, reduce_vp=args.reduce_vp) + vm_index=args.vm_index, reduce_vp=args.reduce_vp, + sanity_check=args.sanity_check) if __name__ == "__main__": run() From eb26a355c210ed4e062431943a57f0640d02fd74 Mon Sep 17 00:00:00 2001 From: shicho Date: Sat, 15 Jul 2017 23:51:00 -0400 Subject: [PATCH 10/75] move code under sanity check arg --- centinel/vpn/cli.py | 159 ++++++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 72 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 120232d..12fe9f6 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -151,96 +151,110 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: - pass - - # reduce size of list if reduce_vp is true - if reduce_vp: - logging.info("Reducing list size. Original size: %d" % len(conf_list)) - country_asn_set = set() - reduced_conf_set = set() + sanity_checked_set = set() for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) vp_ip = os.path.splitext(filename)[0] - vpn_config = os.path.join(vpn_dir, filename) - centinel_config = os.path.join(conf_dir, filename) - - # assuming that each VPN config file has a name like: - # [ip-address].ovpn, we can extract IP address from filename - # and use it to geolocate and fetch experiments before connecting - # to VPN. - vpn_address, extension = os.path.splitext(filename) - lines = [line.rstrip('\n') for line in open(centinel_config)] - - # get country for this vpn - country_in_config = "" - # reading the server.txt file in vpns folder - for line in lines: - if "country" in line: - (key, country_in_config) = line.split(': ') - country_in_config = country_in_config.replace('\"','').replace(',','') - - - country = None - - - + vpn_config = os.path.join(vpn_dir, filename) + centinel_config = os.path.join(conf_dir, filename) + # assuming that each VPN config file has a name like: + # [ip-address].ovpn, we can extract IP address from filename + # and use it to geolocate and fetch experiments before connecting + # to VPN. + vpn_address, extension = os.path.splitext(filename) + lines = [line.rstrip('\n') for line in open(centinel_config)] + + # get country for this vpn + country_in_config = "" + # reading the server.txt file in vpns folder + for line in lines: + if "country" in line: + (key, country_in_config) = line.split(': ') + country_in_config = country_in_config.replace('\"', '').replace(',', '') + + country = None try: meta = centinel.backend.get_meta(config.params, vp_ip) - # send country name to be converted to alpha2 code - if(len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - # some vpn config files already contain the alpha2 code (length == 2) + # send country name to be converted to alpha2 code + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] - - # try setting the VPN info (IP and country) to get appropriate - # experiemnts and input data. - try: - logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vpn_address, country) - except Exception as exp: - logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - - # sanity check - # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well - sanity_path = os.path.join(directory,'../sanitycheck') - if not os.path.exists(sanity_path): - os.makedirs(sanity_path) - - # fetch the list of RIPE anchors - anchors = probe.get_anchor_list(sanity_path) - logging.info("Anchors list fetched") - logging.info("%s: Starting VPN." % filename) + # try setting the VPN info (IP and country) to get appropriate + # experiemnts and input data. + try: + logging.info("country is %s" % country) + centinel.backend.set_vpn_info(config.params, vpn_address, country) + except Exception as exp: + logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) + + # sanity check + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + sanity_path = os.path.join(directory, '../sanitycheck') + if not os.path.exists(sanity_path): + os.makedirs(sanity_path) + + # fetch the list of RIPE anchors + anchors = probe.get_anchor_list(sanity_path) + + logging.info("Anchors list fetched") + logging.info("%s: Starting VPN." % filename) + + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + + vpn.start() + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) + vpn.stop() + time.sleep(5) + continue + + # sending ping to the anchors + ping_result = probe.perform_probe(sanity_path, vpn_provider, vpn_provider, country, anchors) + # have to do this sanity check if timestamp is a certain value, needs changing + timestamp = time.time() + ping_result['timestamp'] = timestamp - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, - crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + # Shinyoung, you can add the sanity check module here - vpn.start() - if not vpn.started: - logging.error("%s: Failed to start VPN!" % filename) - vpn.stop() - time.sleep(5) - continue + logging.info("%s: Stopping VPN." % filename) + vpn.stop() + time.sleep(5) + except: + logging.warning("Failed to geolocate %s" % vp_ip) - # sending ping to the anchors - ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) - - # have to do this sanity check if timestamp is a certain value, needs changing - timestamp = time.time() - ping_result['timestamp'] = timestamp - #Shinyoung, you can add the sanity check module here - - logging.info("%s: Stopping VPN." % filename) - vpn.stop() - time.sleep(5) + # reduce size of list if reduce_vp is true + if reduce_vp: + logging.info("Reducing list size. Original size: %d" % len(conf_list)) + country_asn_set = set() + reduced_conf_set = set() + for filename in conf_list: + centinel_config = os.path.join(conf_dir, filename) + config = centinel.config.Configuration() + config.parse_config(centinel_config) + vp_ip = os.path.splitext(filename)[0] + + # get country for this vpn + country_in_config = "" + # reading the server.txt file in vpns folder + for line in lines: + if "country" in line: + (key, country_in_config) = line.split(': ') + country_in_config = country_in_config.replace('\"', '').replace(',', '') + try: + meta = centinel.backend.get_meta(config.params, vp_ip) + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) if 'country' in meta and 'as_number' in meta \ and meta['country'] and meta['as_number']: @@ -258,6 +272,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, conf_list = list(reduced_conf_set) logging.info("List size reduced. New size: %d" % len(conf_list)) + # sort file list to ensure the same filename sequence in each VM conf_list = sorted(conf_list) From 845995c549968adc0406844bcd1b31d22820e3be Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 16 Jul 2017 00:01:05 -0400 Subject: [PATCH 11/75] add geo-sanity check module --- centinel/vpn/geosanity.py | 272 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 centinel/vpn/geosanity.py diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py new file mode 100644 index 0000000..c20d20e --- /dev/null +++ b/centinel/vpn/geosanity.py @@ -0,0 +1,272 @@ +""" Class for sanity check for vpn location""" +import datetime +import logging +import pickle +from geopandas import * +from geopy.distance import vincenty +from geopy.geocoders import Nominatim +import pyproj +import functools +from shapely.ops import transform as sh_transform +from shapely.geometry import Point, Polygon, box as Box + + + +def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map): + """ + :param proxy_id:(str) + :param iso_cnt:(str) + :param ping_results:(dict) {anchors: [pings]) + :param anchors_gps:(dict) {anchors: (lat, long)} + :param map:(dataframe) + :return: + """ + checker = Checker(proxy_id, iso_cnt) + # points = checker.check_ping_results(results, anchors_gps) + points = checker.check_ping_results(ping_results, anchors_gps) + if len(points) == 0: + logging.debug("No valid ping results for %s" % proxy_id) + return -1 + circles = checker.get_anchors_region(points) + proxy_region = checker.get_vpn_region(map) + if proxy_region.empty: + logging.debug("Fail to get proxy region: %s" % iso_cnt) + return -1 + results = checker.check_overlap(proxy_region, circles) + return checker.is_valid(results) + # time_now = str(datetime.datetime.now()).split(' ')[0] + # with open("results_" + proxy_id + "_" + time_now + ".pickle", "w") as f: + # pickle.dump(results, f) + +def load_map_from_shapefile(shapefile): + """ + Load all countries from shapefile + (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') + """ + temp = GeoDataFrame.from_file(shapefile) + map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] + return map + + +def get_gps_of_anchors(anchors): + """ + Get gps of all anchors + Note: geopy library has a limitation for query in a certain time. + While testing, better to store the query results so that we can reduce the number of query. + """ + anchors_gps = dict() + count = 0 + try: + with open("gps_of_anchors.pickle", "r") as f: + anchors_gps = pickle.load(f) + except: + for anchor, item in anchors.iteritems(): + count += 1 + logging.debug( + "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) + geolocator = Nominatim() + location = geolocator.geocode(item['city'] + ' ' + item['country']) + if location == None: + location = geolocator.geocode(item['country']) + if location == None: + logging.debug("Fail to read gps of %s" %anchor) + anchors_gps[anchor] = (location.latitude, location.longitude) + with open("gps_of_anchors.pickle", "w") as f: + pickle.dump(anchors_gps, f) + return anchors_gps + + +class Checker: + def __init__(self, proxy_id, iso): + self.proxy_id = proxy_id + self.iso = iso + self.gps = self._get_gps_of_proxy() + + def get_vpn_region(self, map): + """ + Get a region of given iso country + """ + region = map[map.ISO_A2 == self.iso].geometry + if region.empty: + logging.info("Fail to read country region: %s" % self.iso) + return None + df = geopandas.GeoDataFrame({'geometry': region}) + df.crs = {'init': 'epsg:4326'} + return df + + def _get_gps_of_proxy(self): + """ Return vp's gps + """ + vpn_gps = tuple() + try: + geolocator = Nominatim() + location = geolocator.geocode(self.iso) + if location == None: + logging.debug("Fail to get gps of location %s" %self.iso) + return None + vpn_gps = (location.latitude, location.longitude) + except: + logging.debug("Fail to get gps of proxy") + return vpn_gps + + def _disk(self, x, y, radius): + return Point(x, y).buffer(radius) + + def get_anchors_region(self, points): + """ Get anchors region + (referred from zack's paper & code Todo: add LICENSE?) + https://github.com/zackw/active-geolocator + Note that pyproj takes distances in meters & lon/lat order. + + """ + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") + ## Sort based on distance. if there is no distance, then sort with min delay + if points[0][0] != 0: + points.sort(key=lambda tup: tup[0]) #closest to the proxy + else: + points.sort(key=lambda tup: tup[1]) #order of min time + circles = list() + count = 0 + for dist, min_delay, lat, lon, radi in points: + count += 1 + # create azimuthal equidistant projector for each anchors + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + try: + # draw a disk (center = long/lat, radius) + disk = sh_transform( + functools.partial(pyproj.transform, aeqd, wgs_proj), + self._disk(0, 0, radi * 1000)) # km ---> m + north, south, west, east = 90., -90., -180, 180 + boundary = np.array(disk.boundary) + i = 0 + while i < boundary.shape[0] - 1: + if abs(boundary[i + 1, 0] - boundary[i, 0]) > 180: + pole = south if boundary[i, 1] < 0 else north + west = west if boundary[i, 0] < 0 else east + east = east if boundary[i, 0] < 0 else west + boundary = np.insert(boundary, i + 1, [ + [west, boundary[i, 1]], + [west, pole], + [east, pole], + [east, boundary[i + 1, 1]] + ], axis=0) + i += 5 + else: + i += 1 + disk = Polygon(boundary).buffer(0) + + # In the case of the generated disk is too large + origin = Point(lon, lat) + if not disk.contains(origin): + df1 = geopandas.GeoDataFrame({'geometry': [Box(-180., -90., 180., 90.)]}) + df2 = geopandas.GeoDataFrame({'geometry': [disk]}) + df3 = geopandas.overlay(df1, df2, how='difference') + disk = df3.geometry[0] + assert disk.is_valid + assert disk.contains(origin) + circles.append((lat, lon, radi, disk)) + except Exception as e: + logging.debug("Fail to get a circle %s" %self.proxy_id) + return circles + + def check_overlap(self, proxy_region, circles): + """ Check overlap between proxy region and anchors' region. + If there is an overlap check how much they are overlapped, + otherwise, check how far the distance is from a proxy. + :return results(list): if True: the percentage of overlapped area to a country + False: the distance (km) between a country and expected range + """ + results = list() + for lat, lon, radi, this_circle in circles: + df_anchor = geopandas.GeoDataFrame({'geometry': [this_circle]}) + overlap = geopandas.overlay(proxy_region, df_anchor, how="intersection") + if overlap.empty: + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d + ## country + azimu_cnt = sh_transform( + functools.partial(pyproj.transform, wgs_proj, aeqd), + proxy_region.geometry.item()) + ## min_distance + azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m + gap = azimu_anchor.distance(azimu_cnt) / 1000 #km + results.append((False, gap)) + else: + ## area + area_cnt = proxy_region['geometry'].area#/10**6 #km/sqr + area_cnt = sum(area_cnt.tolist()) + area_overlap = overlap['geometry'].area#/10**6 #km/sqr + area_overlap = sum(area_overlap.tolist()) + stack = area_overlap/area_cnt + results.append((True, stack)) + return results + + def _calculate_radius(self, time_ms): + """ + (the number got from zack's paper & code) + Network cable's propagation speed: around 2/3c = 199,862 km/s + + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) + """ + C = 299792 # km/s + speed = np.multiply(0.5104, C) + second = time_ms/float(1000) + dist_km = np.multiply(speed, second) + return dist_km + + def check_ping_results(self, results, anchors_gps): + """ + Because the equator circumference is 40,074.275km. + the range cannot be farther than 20,037.135km. + If there are anomalies pings (<3.0ms or >130.0ms), remove. + Otherwise, return latitude and longitude of vps, radius derived from ping delay. + Return points(list): (lat, lon, radius) + Todo: points (distance, lat, long, radius) + """ + points = list() + for anchor, pings in results.iteritems(): + valid_pings = list() + for this in pings: + # remove anomalies + ping = float(this.split(' ')[0]) + owtt = ping/2.0 + if float(owtt) >= 3.0 and float(owtt) <= 130.0: + valid_pings.append(owtt) + if len(valid_pings) == 0: + logging.debug("no valid pings results of anchor %s" %anchor) + continue + min_delay = min(valid_pings) + radi = self._calculate_radius(min_delay) + if anchor not in anchors_gps: + logging.debug("no gps for anchor %s" %anchor) + continue + # calculate the distance(km) between proxy and anchor + distance = 0 + if len(self.gps) != 0: + distance = vincenty(anchors_gps[anchor], self.gps).km + points.append((distance, min_delay, anchors_gps[anchor][0], anchors_gps[anchor][1], radi)) + if len(points) == 0: + logging.debug("no valid pings results") + return [] + return points + + def is_valid(self, results): + """ + Need reasonable threshold to answer the validation of location + For now, we say it is valid if 90% of 30 nearest anchors are True + """ + total = 0 + count_valid = 0 + limit = 30 + for valid, aux in results: + total += 1 + if valid: + count_valid += 1 + if total == limit: + break + frac = count_valid/float(limit) + if frac >= 0.9: + return True + else: + return False \ No newline at end of file From 1dcc04576f5d302a99f11ce4aea666074ba089d8 Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 16 Jul 2017 00:53:33 -0400 Subject: [PATCH 12/75] combine sanity module and cli.py --- centinel/vpn/cli.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 12fe9f6..e3fec83 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -24,6 +24,7 @@ import country_module as convertor import probe as probe +import geosanity as san PID_FILE = "/tmp/centinel.lock" @@ -152,6 +153,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: sanity_checked_set = set() + # get a world map from shapefile + shapefile = 'map/ne_10m_admin_0_countries.shp' + map = san.load_map_from_shapefile(shapefile) + for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() @@ -201,6 +206,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # fetch the list of RIPE anchors anchors = probe.get_anchor_list(sanity_path) + # get anchor's gps + anchors_gps = san.get_gps_of_anchors(anchors) logging.info("Anchors list fetched") logging.info("%s: Starting VPN." % filename) @@ -223,6 +230,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, ping_result['timestamp'] = timestamp # Shinyoung, you can add the sanity check module here + tag = sanity_check(vp_ip, country, ping_result[vpn_provider]['pings'], anchors_gps, map) + if tag: + sanity_checked_set.add(filename) logging.info("%s: Stopping VPN." % filename) vpn.stop() @@ -231,6 +241,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, except: logging.warning("Failed to geolocate %s" % vp_ip) + conf_list = list(sanity_checked_set) + logging.info("List size after sanity check. New size: %d" %len(conf_list)) + # reduce size of list if reduce_vp is true if reduce_vp: From 739b509fbdb7c7714c43466cff22cd2b2cc8dd3e Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 16 Jul 2017 01:27:59 -0400 Subject: [PATCH 13/75] get anchors before for loop --- centinel/vpn/cli.py | 24 ++++++++++++------------ centinel/vpn/geosanity.py | 10 ++++++---- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index e3fec83..0cbe0ba 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -153,8 +153,19 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: sanity_checked_set = set() + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + sanity_path = os.path.join(directory, '../sanitycheck') + if not os.path.exists(sanity_path): + os.makedirs(sanity_path) + # fetch the list of RIPE anchors + anchors = probe.get_anchor_list(sanity_path) + logging.info("Anchors list fetched") + # get anchor's gps + anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) + logging.info("Anchors gps fetched") # get a world map from shapefile - shapefile = 'map/ne_10m_admin_0_countries.shp' + # Todo: download a shapefile from server + shapefile = os.path.join(sanity_path, '/ne_10m_admin_0_countries.shp') map = san.load_map_from_shapefile(shapefile) for filename in conf_list: @@ -199,17 +210,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) # sanity check - # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well - sanity_path = os.path.join(directory, '../sanitycheck') - if not os.path.exists(sanity_path): - os.makedirs(sanity_path) - - # fetch the list of RIPE anchors - anchors = probe.get_anchor_list(sanity_path) - # get anchor's gps - anchors_gps = san.get_gps_of_anchors(anchors) - - logging.info("Anchors list fetched") logging.info("%s: Starting VPN." % filename) vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index c20d20e..774fcc1 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -1,6 +1,7 @@ """ Class for sanity check for vpn location""" import datetime import logging +import os import pickle from geopandas import * from geopy.distance import vincenty @@ -48,12 +49,13 @@ def load_map_from_shapefile(shapefile): return map -def get_gps_of_anchors(anchors): +def get_gps_of_anchors(anchors, directory): """ Get gps of all anchors Note: geopy library has a limitation for query in a certain time. While testing, better to store the query results so that we can reduce the number of query. """ + logging.info("Starting to get RIPE anchors' gps") anchors_gps = dict() count = 0 try: @@ -62,16 +64,16 @@ def get_gps_of_anchors(anchors): except: for anchor, item in anchors.iteritems(): count += 1 - logging.debug( + logging.info( "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) geolocator = Nominatim() location = geolocator.geocode(item['city'] + ' ' + item['country']) if location == None: location = geolocator.geocode(item['country']) if location == None: - logging.debug("Fail to read gps of %s" %anchor) + logging.info("Fail to read gps of %s" %anchor) anchors_gps[anchor] = (location.latitude, location.longitude) - with open("gps_of_anchors.pickle", "w") as f: + with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: pickle.dump(anchors_gps, f) return anchors_gps From 4c2071919527256ab72c299097a89db1c08f7f39 Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 16 Jul 2017 03:14:33 -0400 Subject: [PATCH 14/75] some update --- centinel/vpn/cli.py | 10 +++++----- centinel/vpn/geosanity.py | 32 ++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 0cbe0ba..d1cb669 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -165,7 +165,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.info("Anchors gps fetched") # get a world map from shapefile # Todo: download a shapefile from server - shapefile = os.path.join(sanity_path, '/ne_10m_admin_0_countries.shp') + shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" map = san.load_map_from_shapefile(shapefile) for filename in conf_list: @@ -223,14 +223,14 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, continue # sending ping to the anchors - ping_result = probe.perform_probe(sanity_path, vpn_provider, vpn_provider, country, anchors) + ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, country, anchors) # have to do this sanity check if timestamp is a certain value, needs changing timestamp = time.time() - ping_result['timestamp'] = timestamp + ping_result['timestamp'] = timestamp #Todo: # Shinyoung, you can add the sanity check module here - tag = sanity_check(vp_ip, country, ping_result[vpn_provider]['pings'], anchors_gps, map) + tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, sanity_path) if tag: sanity_checked_set.add(filename) @@ -239,7 +239,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, time.sleep(5) except: - logging.warning("Failed to geolocate %s" % vp_ip) + logging.warning("Failed to sanity check %s" % vp_ip) conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" %len(conf_list)) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 774fcc1..12b8e23 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -2,6 +2,7 @@ import datetime import logging import os +import time import pickle from geopandas import * from geopy.distance import vincenty @@ -13,7 +14,7 @@ -def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map): +def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map, directory): """ :param proxy_id:(str) :param iso_cnt:(str) @@ -22,16 +23,16 @@ def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map): :param map:(dataframe) :return: """ - checker = Checker(proxy_id, iso_cnt) + checker = Checker(proxy_id, iso_cnt, directory) # points = checker.check_ping_results(results, anchors_gps) points = checker.check_ping_results(ping_results, anchors_gps) if len(points) == 0: - logging.debug("No valid ping results for %s" % proxy_id) + logging.info("No valid ping results for %s" % proxy_id) return -1 circles = checker.get_anchors_region(points) proxy_region = checker.get_vpn_region(map) if proxy_region.empty: - logging.debug("Fail to get proxy region: %s" % iso_cnt) + logging.info("Fail to get proxy region: %s" % iso_cnt) return -1 results = checker.check_overlap(proxy_region, circles) return checker.is_valid(results) @@ -44,6 +45,7 @@ def load_map_from_shapefile(shapefile): Load all countries from shapefile (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') """ + logging.info("Loading a shapefile for the world map") temp = GeoDataFrame.from_file(shapefile) map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] return map @@ -79,15 +81,17 @@ def get_gps_of_anchors(anchors, directory): class Checker: - def __init__(self, proxy_id, iso): + def __init__(self, proxy_id, iso, path): self.proxy_id = proxy_id self.iso = iso self.gps = self._get_gps_of_proxy() + self.path = path def get_vpn_region(self, map): """ Get a region of given iso country """ + logging.info("Getting vpn region from a map") region = map[map.ISO_A2 == self.iso].geometry if region.empty: logging.info("Fail to read country region: %s" % self.iso) @@ -104,11 +108,11 @@ def _get_gps_of_proxy(self): geolocator = Nominatim() location = geolocator.geocode(self.iso) if location == None: - logging.debug("Fail to get gps of location %s" %self.iso) + logging.info("Fail to get gps of location %s" %self.iso) return None vpn_gps = (location.latitude, location.longitude) except: - logging.debug("Fail to get gps of proxy") + logging.info("Fail to get gps of proxy") return vpn_gps def _disk(self, x, y, radius): @@ -121,6 +125,7 @@ def get_anchors_region(self, points): Note that pyproj takes distances in meters & lon/lat order. """ + logging.info("Starting to draw anchors region") wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ## Sort based on distance. if there is no distance, then sort with min delay if points[0][0] != 0: @@ -179,6 +184,7 @@ def check_overlap(self, proxy_region, circles): :return results(list): if True: the percentage of overlapped area to a country False: the distance (km) between a country and expected range """ + logging.info("Starting to check overlap") results = list() for lat, lon, radi, this_circle in circles: df_anchor = geopandas.GeoDataFrame({'geometry': [this_circle]}) @@ -203,6 +209,15 @@ def check_overlap(self, proxy_region, circles): area_overlap = sum(area_overlap.tolist()) stack = area_overlap/area_cnt results.append((True, stack)) + + pickle_path = os.path.join(self.path, 'sanity') + if not os.path.exists(pickle_path): + os.makedirs(pickle_path) + time_unique = time.time() + with open(pickle_path + '/' + self.proxy_id + '-' + self.iso + '-' + str(time_unique) + '.pickle', 'w') as f: + pickle.dump(results, f) + logging.info("Pickle file successfully created.") + return results def _calculate_radius(self, time_ms): @@ -224,8 +239,8 @@ def check_ping_results(self, results, anchors_gps): If there are anomalies pings (<3.0ms or >130.0ms), remove. Otherwise, return latitude and longitude of vps, radius derived from ping delay. Return points(list): (lat, lon, radius) - Todo: points (distance, lat, long, radius) """ + logging.info("Starting checking ping results") points = list() for anchor, pings in results.iteritems(): valid_pings = list() @@ -258,6 +273,7 @@ def is_valid(self, results): Need reasonable threshold to answer the validation of location For now, we say it is valid if 90% of 30 nearest anchors are True """ + logging.info("checking validation") total = 0 count_valid = 0 limit = 30 From f8857c9c1ec8cbe54165c68cb824cef754a2b7ec Mon Sep 17 00:00:00 2001 From: shicho Date: Mon, 17 Jul 2017 08:58:35 -0400 Subject: [PATCH 15/75] decouple ping and sanity check --- centinel/vpn/cli.py | 32 ++++++++++++++++++++++---------- centinel/vpn/probe.py | 2 +- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index d1cb669..d172f06 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -12,6 +12,7 @@ import signal import dns.resolver import json +import pickle import centinel.backend import centinel.client @@ -193,19 +194,20 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, country = None try: - meta = centinel.backend.get_meta(config.params, vp_ip) + # meta = centinel.backend.get_meta(config.params, vp_ip) # send country name to be converted to alpha2 code if (len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) + # meta['country'] = convertor.country_to_a2(country_in_config) + country = convertor.country_to_a2(country_in_config) # some vpn config files already contain the alpha2 code (length == 2) - if 'country' in meta: - country = meta['country'] + # if 'country' in meta: + # country = meta['country'] # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vpn_address, country) + # centinel.backend.set_vpn_info(config.params, vpn_address, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -227,12 +229,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # have to do this sanity check if timestamp is a certain value, needs changing timestamp = time.time() - ping_result['timestamp'] = timestamp #Todo: + ping_result['timestamp'] = timestamp #Todo: ?? - # Shinyoung, you can add the sanity check module here - tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, sanity_path) - if tag: - sanity_checked_set.add(filename) logging.info("%s: Stopping VPN." % filename) vpn.stop() @@ -241,9 +239,23 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, except: logging.warning("Failed to sanity check %s" % vp_ip) + # sanity check + pickle_path = os.path.join(sanity_path, 'pings') + file_lists = os.listdir(pickle_path) + if file_lists: + for this_file in file_lists: + vp_ip = this_file.split('-')[0] + country = this_file.split('-')[1] + with open(os.path.join(pickle_path, this_file), 'r') as f: + ping_result = pickle.load(f) + tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, + sanity_path) + if tag: + sanity_checked_set.add(this_file) conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" %len(conf_list)) + # return 0 # reduce size of list if reduce_vp is true if reduce_vp: diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 0072165..fd74fff 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -133,7 +133,7 @@ def perform_probe(sanity_directory,vpn_provider, target_name, target_cnt, anchor """ logging.info("Start Probing") - pickle_path = os.path.join(sanity_directory,'pickle') + pickle_path = os.path.join(sanity_directory,'pings') if not os.path.exists(pickle_path): os.makedirs(pickle_path) From 0ffedcc9d8af3bb7248b3404752fcc8b3dd286b3 Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 18 Jul 2017 20:07:40 -0400 Subject: [PATCH 16/75] store sanity results as a file --- centinel/vpn/cli.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index d172f06..2a423ec 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -153,7 +153,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: - sanity_checked_set = set() # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): @@ -240,6 +239,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.warning("Failed to sanity check %s" % vp_ip) # sanity check + failed_sanity_check = set() + sanity_checked_set = set() pickle_path = os.path.join(sanity_path, 'pings') file_lists = os.listdir(pickle_path) if file_lists: @@ -252,6 +253,17 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, sanity_path) if tag: sanity_checked_set.add(this_file) + else: + failed_sanity_check.add(this_file) + with open(os.path.join(sanity_path, 'results_of_sanity_check.txt'), 'w') as f: + f.write("Pass\n") + for this_file in sanity_checked_set: + vp_ip = this_file.split('-')[0] + f.write(vp_ip + '\n') + f.write("Fail\n") + for this_file in failed_sanity_check: + vp_ip = this_file.split('-')[0] + f.write(vp_ip + '\n') conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" %len(conf_list)) From f8d0182ae148e176beb475883cf667ca7f3256a9 Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 18 Jul 2017 21:17:13 -0400 Subject: [PATCH 17/75] add logging info & LICENSE --- LICENSE | 21 +++++++++++++++++++++ centinel/vpn/cli.py | 43 +++++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/LICENSE b/LICENSE index 35cb1c5..0685e7a 100644 --- a/LICENSE +++ b/LICENSE @@ -21,3 +21,24 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Copyright (c) 2016 Zack Weinberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 2a423ec..1fc6cfe 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -28,7 +28,9 @@ import geosanity as san PID_FILE = "/tmp/centinel.lock" - +log_file = 'log_vpn.log' +logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", + filename=log_file ) def parse_args(): parser = argparse.ArgumentParser() @@ -193,20 +195,20 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, country = None try: - # meta = centinel.backend.get_meta(config.params, vp_ip) + meta = centinel.backend.get_meta(config.params, vp_ip) # send country name to be converted to alpha2 code if (len(country_in_config) > 2): - # meta['country'] = convertor.country_to_a2(country_in_config) - country = convertor.country_to_a2(country_in_config) + meta['country'] = convertor.country_to_a2(country_in_config) + # country = convertor.country_to_a2(country_in_config) # some vpn config files already contain the alpha2 code (length == 2) - # if 'country' in meta: - # country = meta['country'] + if 'country' in meta: + country = meta['country'] # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: logging.info("country is %s" % country) - # centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vpn_address, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -236,25 +238,30 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, time.sleep(5) except: - logging.warning("Failed to sanity check %s" % vp_ip) + logging.warning("Failed to send pings from %s" % vp_ip) # sanity check failed_sanity_check = set() sanity_checked_set = set() + vp_ip = 'unknown' pickle_path = os.path.join(sanity_path, 'pings') file_lists = os.listdir(pickle_path) if file_lists: for this_file in file_lists: - vp_ip = this_file.split('-')[0] - country = this_file.split('-')[1] - with open(os.path.join(pickle_path, this_file), 'r') as f: - ping_result = pickle.load(f) - tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, - sanity_path) - if tag: - sanity_checked_set.add(this_file) - else: - failed_sanity_check.add(this_file) + try: + vp_ip = this_file.split('-')[0] + country = this_file.split('-')[1] + with open(os.path.join(pickle_path, this_file), 'r') as f: + ping_result = pickle.load(f) + tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, + sanity_path) + if tag: + sanity_checked_set.add(this_file) + else: + failed_sanity_check.add(this_file) + except: + logging.warning("Failed to sanity check %s" % vp_ip) + with open(os.path.join(sanity_path, 'results_of_sanity_check.txt'), 'w') as f: f.write("Pass\n") for this_file in sanity_checked_set: From 579b557b0d997e449c34badbb9640e1a2f4291de Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 18 Jul 2017 21:55:00 -0400 Subject: [PATCH 18/75] add timestamp for RIPE anchors monthly update --- centinel/vpn/cli.py | 3 +- centinel/vpn/geosanity.py | 19 +++-- centinel/vpn/probe.py | 158 +++++++++++++++++++------------------- 3 files changed, 96 insertions(+), 84 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 1fc6cfe..aa74999 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -262,7 +262,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, except: logging.warning("Failed to sanity check %s" % vp_ip) - with open(os.path.join(sanity_path, 'results_of_sanity_check.txt'), 'w') as f: + time_unique = time.time() + with open(os.path.join(sanity_path, 'results-of-sanity-check'+str(time_unique)+'.txt'), 'w') as f: f.write("Pass\n") for this_file in sanity_checked_set: vp_ip = this_file.split('-')[0] diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 12b8e23..ebce67a 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -1,9 +1,9 @@ """ Class for sanity check for vpn location""" -import datetime import logging import os import time import pickle +from datetime import timedelta from geopandas import * from geopy.distance import vincenty from geopy.geocoders import Nominatim @@ -61,10 +61,19 @@ def get_gps_of_anchors(anchors, directory): anchors_gps = dict() count = 0 try: - with open("gps_of_anchors.pickle", "r") as f: + with open(os.path.join(directory, "gps_of_anchors.pickle"), "r") as f: anchors_gps = pickle.load(f) + if 'timestamp' in anchors_gps: + if time.time() - anchors_gps['timestamp'] <= timedelta(days=30): + return anchors_gps + else: + return anchors except: - for anchor, item in anchors.iteritems(): + logging.info("gps_of_anchors.pickle is not existed") + for anchor, item in anchors.iteritems(): + if anchor == 'timestamp': + anchors_gps['timestamp'] = item + else: count += 1 logging.info( "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) @@ -75,8 +84,8 @@ def get_gps_of_anchors(anchors, directory): if location == None: logging.info("Fail to read gps of %s" %anchor) anchors_gps[anchor] = (location.latitude, location.longitude) - with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: - pickle.dump(anchors_gps, f) + with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: + pickle.dump(anchors_gps, f) return anchors_gps diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index fd74fff..b67b90c 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -6,11 +6,9 @@ import time import subprocess import multiprocessing as mp -import numpy as np +from datetime import timedelta from urllib import urlopen from bs4 import BeautifulSoup -from geopy.distance import vincenty -from geopy.geocoders import Nominatim #-d vpn_providers/ipvanish/ -u auth_file --crt-file ca.ipvanish.com.crt @@ -28,85 +26,89 @@ def get_anchor_list(directory): try: with open(landmark_path, "r") as f: anchors = pickle.load(f) - return anchors - + if 'timestamp' in anchors: + if time.time() - anchors['timestamp'] <= timedelta(days=30): + return anchors + else: + try: os.remove(os.path.join(directory, 'RIPE_anchor_list.csv')) + except: pass + else: return anchors except: + logging.info("landmarks_list.pickle is not existed") + try: + # sys.stderr.write("Retrieving landmark list...") + logging.info("landmarks_list pickle is not available, starting to fetch it") + anchors = dict() + timestamp = time.time() + anchors['timestamp'] = timestamp try: - # sys.stderr.write("Retrieving landmark list...") - logging.info("landmarks_list pickle is not available, starting to fetch it") - anchors = dict() - try: - ## you can get "RIPE_anchor_list.csv" by crawling RIPE first page of anchors (table) - ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') - with open(ripe_path, "r") as f: - reader = csv.reader(f) - for row in reader: - if row[0] == 'Hostname': - continue - anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - except: - logging.info("RIPE_anchor list is not available, starting to fetch it") - # parsing ripe anchor website - reload(sys) - sys.setdefaultencoding('utf-8') + ## you can get "RIPE_anchor_list.csv" by crawling RIPE first page of anchors (table) + ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') + with open(ripe_path, "r") as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'Hostname': + continue + anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} + except: + logging.info("RIPE_anchor list is not available, starting to fetch it") + # parsing ripe anchor website + reload(sys) + sys.setdefaultencoding('utf-8') + html = urlopen('https://atlas.ripe.net/anchors/list/').read() + soup = BeautifulSoup(html,"html.parser") + ripe_records = (soup.find_all('tr')) + all_records = [] + for record in ripe_records: + columns = record.find_all('td') + rec = [] + for column in columns: + soup_column = BeautifulSoup(str(column),"html.parser") + rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') + if(len(rec) > 0): + all_records.append(rec) + ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') + with open(ripe_path,'w') as f: + f.write('Hostname,Probe,Company,City,Country,Capabilities\n') + for sublist in all_records: + for item in sublist: + f.write(item + ',') + f.write('\n') + logging.info("Creating RIPE_anchor list") + with open(ripe_path, "r") as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'Hostname': + continue + anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - html = urlopen('https://atlas.ripe.net/anchors/list/').read() + logging.info("Finished extracting RIPE anchors from file.") + count = 0 + for key, value in anchors.iteritems(): + count += 1 + logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) + url = 'https://atlas.ripe.net/probes/' + str(value['probe']) + '/#!tab-network/' + try: + html = urlopen(url).read() soup = BeautifulSoup(html,"html.parser") - ripe_records = (soup.find_all('tr')) - all_records = [] - for record in ripe_records: - columns = record.find_all('td') - rec = [] - for column in columns: - soup_column = BeautifulSoup(str(column),"html.parser") - rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') - if(len(rec) > 0): - all_records.append(rec) - ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') - with open(ripe_path,'w') as f: - f.write('Hostname,Probe,Company,City,Country,Capabilities\n') - for sublist in all_records: - for item in sublist: - f.write(item + ',') - f.write('\n') - logging.info("Creating RIPE_anchor list") - with open(ripe_path, "r") as f: - reader = csv.reader(f) - for row in reader: - if row[0] == 'Hostname': - continue - anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - - - logging.info("Finished extracting RIPE anchors from file.") - count = 0 - for key, value in anchors.iteritems(): - count += 1 - logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) - url = 'https://atlas.ripe.net/probes/' + str(value['probe']) + '/#!tab-network/' - try: - html = urlopen(url).read() - soup = BeautifulSoup(html,"html.parser") - for script in soup(["script", "style"]): - script.extract() - text = soup.get_text() - lines = (line.strip() for line in text.splitlines()) - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - text = '\n'.join(chunk for chunk in chunks if chunk) - s_text = text.encode('utf-8').split('\n') - index = s_text.index("Internet Address") - anchors[key]['ip'] = str(s_text[index+1]) - anchors[key]['asn'] = str(s_text[s_text.index("ASN")+1]) - except: - logging.exception("Connection reset by Peer on %s" % (url)) - with open(landmark_path, "w") as f: - pickle.dump(anchors, f) - return anchors - except (TypeError, ValueError, UnicodeError) as e: - sys.exit(1) - - + for script in soup(["script", "style"]): + script.extract() + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = '\n'.join(chunk for chunk in chunks if chunk) + s_text = text.encode('utf-8').split('\n') + index = s_text.index("Internet Address") + anchors[key]['ip'] = str(s_text[index+1]) + anchors[key]['asn'] = str(s_text[s_text.index("ASN")+1]) + except: + logging.exception("Connection reset by Peer on %s" % (url)) + with open(landmark_path, "w") as f: + pickle.dump(anchors, f) + return anchors + except (TypeError, ValueError, UnicodeError) as e: + sys.exit(1) def send_ping(param): this_host, ip = param @@ -127,7 +129,7 @@ def send_ping(param): return times -def perform_probe(sanity_directory,vpn_provider, target_name, target_cnt, anchors): +def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ From ba2dfa2c48d09b95bff0455d810d8b564d332128 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 19 Jul 2017 11:46:18 -0400 Subject: [PATCH 19/75] fix to deal with -1 in sanity check --- centinel/vpn/cli.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index aa74999..7bacbf2 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -169,7 +169,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # Todo: download a shapefile from server shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" map = san.load_map_from_shapefile(shapefile) - for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() @@ -232,7 +231,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, timestamp = time.time() ping_result['timestamp'] = timestamp #Todo: ?? - logging.info("%s: Stopping VPN." % filename) vpn.stop() time.sleep(5) @@ -243,6 +241,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # sanity check failed_sanity_check = set() sanity_checked_set = set() + error_sanity_check = set() vp_ip = 'unknown' pickle_path = os.path.join(sanity_path, 'pings') file_lists = os.listdir(pickle_path) @@ -255,7 +254,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, ping_result = pickle.load(f) tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, sanity_path) - if tag: + if tag == -1: + error_sanity_check.add(this_file) + elif tag == True: sanity_checked_set.add(this_file) else: failed_sanity_check.add(this_file) @@ -272,6 +273,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, for this_file in failed_sanity_check: vp_ip = this_file.split('-')[0] f.write(vp_ip + '\n') + f.write("Error\n") + for this_file in error_sanity_check: + vp_ip = this_file.split('-')[0] + f.write(vp_ip + '\n') conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" %len(conf_list)) From 35924fe409b63600c8e78720cb9c7c37868f96a8 Mon Sep 17 00:00:00 2001 From: shicho Date: Thu, 20 Jul 2017 20:08:35 -0400 Subject: [PATCH 20/75] fix minor errors --- centinel/vpn/cli.py | 27 ++++++++++++--------------- centinel/vpn/geosanity.py | 18 +++++++----------- centinel/vpn/probe.py | 29 +++++++++++++++++++++-------- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 7bacbf2..c14ae1d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -28,9 +28,9 @@ import geosanity as san PID_FILE = "/tmp/centinel.lock" -log_file = 'log_vpn.log' -logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", - filename=log_file ) +# log_file = 'log_vpn.log' +# logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", +# filename=log_file ) def parse_args(): parser = argparse.ArgumentParser() @@ -255,28 +255,25 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, sanity_path) if tag == -1: - error_sanity_check.add(this_file) + error_sanity_check.add(vp_ip + '-' + country) elif tag == True: - sanity_checked_set.add(this_file) + sanity_checked_set.add(vp_ip + '-' + country) else: - failed_sanity_check.add(this_file) + failed_sanity_check.add(vp_ip + '-' + country) except: logging.warning("Failed to sanity check %s" % vp_ip) time_unique = time.time() with open(os.path.join(sanity_path, 'results-of-sanity-check'+str(time_unique)+'.txt'), 'w') as f: f.write("Pass\n") - for this_file in sanity_checked_set: - vp_ip = this_file.split('-')[0] - f.write(vp_ip + '\n') + for server in sanity_checked_set: + f.write(server + '\n') f.write("Fail\n") - for this_file in failed_sanity_check: - vp_ip = this_file.split('-')[0] - f.write(vp_ip + '\n') + for server in failed_sanity_check: + f.write(server + '\n') f.write("Error\n") - for this_file in error_sanity_check: - vp_ip = this_file.split('-')[0] - f.write(vp_ip + '\n') + for server in error_sanity_check: + f.write(server + '\n') conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" %len(conf_list)) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index ebce67a..e6f9777 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -29,6 +29,9 @@ def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map, directory): if len(points) == 0: logging.info("No valid ping results for %s" % proxy_id) return -1 + logging.info("[%s] has %s valid anchors' results (valid pings) from %s anchors" + %(proxy_id, len(points), len(ping_results))) + circles = checker.get_anchors_region(points) proxy_region = checker.get_vpn_region(map) if proxy_region.empty: @@ -63,17 +66,10 @@ def get_gps_of_anchors(anchors, directory): try: with open(os.path.join(directory, "gps_of_anchors.pickle"), "r") as f: anchors_gps = pickle.load(f) - if 'timestamp' in anchors_gps: - if time.time() - anchors_gps['timestamp'] <= timedelta(days=30): - return anchors_gps - else: - return anchors except: logging.info("gps_of_anchors.pickle is not existed") - for anchor, item in anchors.iteritems(): - if anchor == 'timestamp': - anchors_gps['timestamp'] = item - else: + + for anchor, item in anchors.iteritems(): count += 1 logging.info( "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) @@ -84,8 +80,8 @@ def get_gps_of_anchors(anchors, directory): if location == None: logging.info("Fail to read gps of %s" %anchor) anchors_gps[anchor] = (location.latitude, location.longitude) - with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: - pickle.dump(anchors_gps, f) + with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: + pickle.dump(anchors_gps, f) return anchors_gps diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index b67b90c..181dd7e 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -26,12 +26,24 @@ def get_anchor_list(directory): try: with open(landmark_path, "r") as f: anchors = pickle.load(f) - if 'timestamp' in anchors: - if time.time() - anchors['timestamp'] <= timedelta(days=30): - return anchors + if 'timestamp' in anchors.keys(): + if (time.time() - anchors['timestamp']) <= timedelta(days=30).total_seconds(): + return anchors['anchors'] else: - try: os.remove(os.path.join(directory, 'RIPE_anchor_list.csv')) - except: pass + logging.info("List of anchors is expired.") + try: + file_path = os.path.join(directory, 'landmarks_list.pickle') + if os.path.isfile(file_path): + os.remove(file_path) + file_path = os.path.join(directory, 'RIPE_anchor_list.csv') + if os.path.isfile(file_path): + os.remove(file_path) + file_path = os.path.join(directory, 'gps_of_anchors.pickle') + if os.path.isfile(file_path): + os.remove(file_path) + except: + logging.info("Fail to delete expired files of anchors.") + pass else: return anchors except: logging.info("landmarks_list.pickle is not existed") @@ -39,8 +51,6 @@ def get_anchor_list(directory): # sys.stderr.write("Retrieving landmark list...") logging.info("landmarks_list pickle is not available, starting to fetch it") anchors = dict() - timestamp = time.time() - anchors['timestamp'] = timestamp try: ## you can get "RIPE_anchor_list.csv" by crawling RIPE first page of anchors (table) ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') @@ -85,6 +95,7 @@ def get_anchor_list(directory): logging.info("Finished extracting RIPE anchors from file.") count = 0 + for key, value in anchors.iteritems(): count += 1 logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) @@ -104,8 +115,10 @@ def get_anchor_list(directory): anchors[key]['asn'] = str(s_text[s_text.index("ASN")+1]) except: logging.exception("Connection reset by Peer on %s" % (url)) + timestamp = time.time() + ripe_anchors = {'timestamp': timestamp, 'anchors': anchors} with open(landmark_path, "w") as f: - pickle.dump(anchors, f) + pickle.dump(ripe_anchors, f) return anchors except (TypeError, ValueError, UnicodeError) as e: sys.exit(1) From 84d5630e5a0461cb4e58c8fa6648bd42d5516141 Mon Sep 17 00:00:00 2001 From: arian Date: Tue, 1 Aug 2017 22:09:28 -0400 Subject: [PATCH 21/75] added downloader for map file --- centinel/vpn/cli.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index c14ae1d..34369ae 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -13,6 +13,11 @@ import dns.resolver import json import pickle +import urllib2 +import zipfile +import requests +import StringIO + import centinel.backend import centinel.client @@ -96,6 +101,7 @@ def parse_args(): return parser.parse_args() + def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp, sanity_check): """ @@ -163,11 +169,20 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, anchors = probe.get_anchor_list(sanity_path) logging.info("Anchors list fetched") # get anchor's gps - anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) - logging.info("Anchors gps fetched") + #anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) + #logging.info("Anchors gps fetched") # get a world map from shapefile # Todo: download a shapefile from server + shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" + if not os.path.exists(shapefile): + logging.info("Shape file does not exist, Downloading from server") + shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' + logging.info("Starting to download map shape file zip") + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + map = san.load_map_from_shapefile(shapefile) for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) From 93716abaef2367a65b826d759e5ec49123a695e7 Mon Sep 17 00:00:00 2001 From: arian Date: Tue, 1 Aug 2017 22:27:41 -0400 Subject: [PATCH 22/75] try block added for fetching map shape file --- centinel/vpn/cli.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 34369ae..078e040 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -179,9 +179,12 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.info("Shape file does not exist, Downloading from server") shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' logging.info("Starting to download map shape file zip") - r = requests.get(shapefile_url, stream=True) - z = zipfile.ZipFile(StringIO.StringIO(r.content)) - z.extractall(sanity_path) + try: + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + except Exception as exp: + logging.error("Could not fetch map file : %s" %str(exp)) map = san.load_map_from_shapefile(shapefile) for filename in conf_list: From 9f766345698d0d37611700c50ff086f8e63e8c83 Mon Sep 17 00:00:00 2001 From: arian Date: Tue, 1 Aug 2017 22:36:51 -0400 Subject: [PATCH 23/75] map shape file download (completed) --- centinel/vpn/cli.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 078e040..faff59d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -169,11 +169,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, anchors = probe.get_anchor_list(sanity_path) logging.info("Anchors list fetched") # get anchor's gps - #anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) - #logging.info("Anchors gps fetched") + anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) + logging.info("Anchors gps fetched") # get a world map from shapefile - # Todo: download a shapefile from server - shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" if not os.path.exists(shapefile): logging.info("Shape file does not exist, Downloading from server") @@ -183,6 +181,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, r = requests.get(shapefile_url, stream=True) z = zipfile.ZipFile(StringIO.StringIO(r.content)) z.extractall(sanity_path) + logging.info("Map shape file downloaded") except Exception as exp: logging.error("Could not fetch map file : %s" %str(exp)) From fcb26e026ce4400cc480f67179f45f22fe641774 Mon Sep 17 00:00:00 2001 From: arian Date: Wed, 2 Aug 2017 14:51:02 -0400 Subject: [PATCH 24/75] fixed a bug where servers.txt would be in the configs folder --- centinel/vpn/cli.py | 55 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index faff59d..8ee3aa9 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -616,34 +616,33 @@ def create_config_files(directory): home_dirs = return_abs_path(directory, "home") os.mkdir(home_dirs) for filename in os.listdir(vpn_dir): - configuration = centinel.config.Configuration() - # setup the directories - home_dir = os.path.join(home_dirs, filename) - os.mkdir(home_dir) - configuration.params['user']['centinel_home'] = home_dir - exp_dir = os.path.join(home_dir, "experiments") - os.mkdir(exp_dir) - configuration.params['dirs']['experiments_dir'] = exp_dir - data_dir = os.path.join(home_dir, "data") - os.mkdir(data_dir) - configuration.params['dirs']['data_dir'] = data_dir - res_dir = os.path.join(home_dir, "results") - os.mkdir(res_dir) - configuration.params['dirs']['results_dir'] = res_dir - - log_file = os.path.join(home_dir, "centinel.log") - configuration.params['log']['log_file'] = log_file - login_file = os.path.join(home_dir, "login") - configuration.params['server']['login_file'] = login_file - configuration.params['user']['is_vpn'] = True - - configuration.params['server']['verify'] = True - configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] - - configuration.params['country'] = server_country[filename.replace('.ovpn','')] - - conf_file = os.path.join(conf_dir, filename) - configuration.write_out_config(conf_file) + if('server' not in filename): + configuration = centinel.config.Configuration() + # setup the directories + home_dir = os.path.join(home_dirs, filename) + os.mkdir(home_dir) + configuration.params['user']['centinel_home'] = home_dir + exp_dir = os.path.join(home_dir, "experiments") + os.mkdir(exp_dir) + configuration.params['dirs']['experiments_dir'] = exp_dir + data_dir = os.path.join(home_dir, "data") + os.mkdir(data_dir) + configuration.params['dirs']['data_dir'] = data_dir + res_dir = os.path.join(home_dir, "results") + os.mkdir(res_dir) + configuration.params['dirs']['results_dir'] = res_dir + + log_file = os.path.join(home_dir, "centinel.log") + configuration.params['log']['log_file'] = log_file + login_file = os.path.join(home_dir, "login") + configuration.params['server']['login_file'] = login_file + configuration.params['user']['is_vpn'] = True + + configuration.params['server']['verify'] = True + configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] + configuration.params['country'] = server_country[filename.replace('.ovpn','')] + conf_file = os.path.join(conf_dir, filename) + configuration.write_out_config(conf_file) def experiments_available(config): From 5fada31dc439e8ff6ee41cbc841830d67bcf83ff Mon Sep 17 00:00:00 2001 From: arian Date: Wed, 2 Aug 2017 15:08:21 -0400 Subject: [PATCH 25/75] fixed the previous bug completeley. purevpn had server in the name of its configs --- centinel/vpn/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 8ee3aa9..20cf2a2 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -616,7 +616,7 @@ def create_config_files(directory): home_dirs = return_abs_path(directory, "home") os.mkdir(home_dirs) for filename in os.listdir(vpn_dir): - if('server' not in filename): + if('servers' not in filename): configuration = centinel.config.Configuration() # setup the directories home_dir = os.path.join(home_dirs, filename) From 53ad69c082315d2a8341a7cd4f94a0c2ae4e3b70 Mon Sep 17 00:00:00 2001 From: arian Date: Wed, 2 Aug 2017 15:27:27 -0400 Subject: [PATCH 26/75] hostname resolution doesnt happen in the create configs anymore --- centinel/vpn/hma.py | 24 ++++++++++++------------ centinel/vpn/ipvanish.py | 26 +++++++++++++------------- centinel/vpn/purevpn.py | 26 +++++++++++++------------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 6442e73..64d0d35 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -70,20 +70,20 @@ def create_config_files(directory): file_path = os.path.join(orig_path, filename) lines = [line.rstrip('\n') for line in open(file_path)] - ip = "" + hostname = "" for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] - try: - ip = socket.gethostbyname(hostname) - break - except socket.gaierror: - logging.exception("Failed to resolve %s" %hostname) - continue - if len(ip) > 0: - new_path = os.path.join(directory, ip + '.ovpn') + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') shutil.copyfile(file_path, new_path) - server_country[ip] = country + server_country[hostname] = country # remove extracted folder shutil.rmtree(os.path.join(directory, '../TCP')) @@ -101,8 +101,8 @@ def create_config_files(directory): print os.path.join(directory, 'servers.txt'), len(server_country) with open(os.path.join(directory, 'servers.txt'), 'w') as f: - for ip in server_country: - f.write('|'.join([ip, server_country[ip]]) + '\n') + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') if __name__ == "__main__": if len(sys.argv) != 2: diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index fdf4021..d7ba94e 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -57,29 +57,29 @@ def create_config_files(directory): lines = [line.rstrip('\n') for line in open(file_path)] # get ip address for this vpn - ip = "" + hostname = "" for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] # added because gethostbyname will fail on some hostnames - try: - ip = socket.gethostbyname(hostname) - break - except socket.gaierror: - logging.exception("Failed to resolve %s" %hostname) - continue - - if len(ip) > 0: - new_path = os.path.join(directory, ip + '.ovpn') + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') shutil.copyfile(file_path, new_path) - server_country[ip] = country + server_country[hostname] = country else: logging.warn("Unable to resolve hostname and remove %s" % filename) os.remove(file_path) with open(os.path.join(directory, 'servers.txt'), 'w') as f: - for ip in server_country: - f.write('|'.join([ip, server_country[ip]]) + '\n') + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') # remove extracted folder shutil.rmtree(unzip_path) diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 39ca374..c4ecdc8 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -53,22 +53,22 @@ def create_config_files(directory): lines = [line.rstrip('\n') for line in open(file_path)] # get ip address for this vpn - ip = "" + hostname = "" for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] # added because gethostbyname will fail on some hostnames - try: - ip = socket.gethostbyname(hostname) - break - except socket.gaierror: - logging.exception("Failed to resolve %s" %hostname) - continue - - if len(ip) > 0: - new_path = os.path.join(directory, ip + '.ovpn') + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') shutil.copyfile(file_path, new_path) - server_country[ip] = country + server_country[hostname] = country # remove extracted folder shutil.rmtree(os.path.join(directory, '../Linux OpenVPN Updated files')) @@ -84,8 +84,8 @@ def create_config_files(directory): print os.path.join(directory, 'servers.txt'), len(server_country) with open(os.path.join(directory, 'servers.txt'), 'w') as f: - for ip in server_country: - f.write('|'.join([ip, server_country[ip]]) + '\n') + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') if __name__ == "__main__": From ff3c8d7fd3b9a671be48570866614ab1f5127463 Mon Sep 17 00:00:00 2001 From: arian Date: Wed, 2 Aug 2017 16:39:49 -0400 Subject: [PATCH 27/75] name resolution happens at sanity check instead of creating vpn config files --- centinel/vpn/cli.py | 48 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 20cf2a2..3ce5acd 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -17,6 +17,7 @@ import zipfile import requests import StringIO +import socket import centinel.backend @@ -190,7 +191,16 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) - vp_ip = os.path.splitext(filename)[0] + # get ip address of hostnames + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue + + vpn_config = os.path.join(vpn_dir, filename) centinel_config = os.path.join(conf_dir, filename) @@ -198,6 +208,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. + # filename is [hostname].ovpn, we resolved the hostname to ip + # using socket.gethostbyname() vpn_address, extension = os.path.splitext(filename) lines = [line.rstrip('\n') for line in open(centinel_config)] @@ -224,7 +236,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # experiemnts and input data. try: logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -305,7 +317,13 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) - vp_ip = os.path.splitext(filename)[0] + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue # get country for this vpn country_in_config = "" @@ -402,11 +420,23 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, config = centinel.config.Configuration() config.parse_config(centinel_config) + + # assuming that each VPN config file has a name like: # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - vpn_address, extension = os.path.splitext(filename) + # filename is [hostname].ovpn, we resolved the hostname to ip + # using socket.gethostbyname() + hostname = os.path.splitext(filename) + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue + +# vpn_address, extension = os.path.splitext(filename) lines = [line.rstrip('\n') for line in open(centinel_config)] # get country for this vpn @@ -422,7 +452,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, try: # we still might need some info from the Maximind query meta = centinel.backend.get_meta(config.params, - vpn_address) + vp_ip) # send country name to be converted to alpha2 code if(len(country_in_config) > 2): @@ -431,7 +461,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, if 'country' in meta: country = meta['country'] except: - logging.exception("%s: Failed to geolocate %s" % (filename, vpn_address)) + logging.exception("%s: Failed to geolocate %s" % (filename, vp_ip)) if country and exclude_list and country in exclude_list: logging.info("%s: Skipping this server (%s)" % (filename, country)) @@ -441,7 +471,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # experiemnts and input data. try: logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -454,7 +484,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, if not experiments_available(config.params): logging.info("%s: No experiments available." % filename) try: - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("Failed to set VPN info: %s" % exp) continue @@ -534,7 +564,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # try setting the VPN info (IP and country) to the correct address # after sync is over. try: - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("Failed to set VPN info: %s" % exp) From 7ced45f6863d67bb5c89607de469dc5e3199a025 Mon Sep 17 00:00:00 2001 From: arian Date: Wed, 2 Aug 2017 16:45:04 -0400 Subject: [PATCH 28/75] hostname resolution happens at sanity check instead of when creating vpn config files --- centinel/vpn/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 3ce5acd..843b9a7 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -451,8 +451,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, country = None try: # we still might need some info from the Maximind query - meta = centinel.backend.get_meta(config.params, - vp_ip) + meta = centinel.backend.get_meta(config.params, vp_ip) # send country name to be converted to alpha2 code if(len(country_in_config) > 2): From ab29b8112afcb9d9ae8bdb988c5f1b2006e0493a Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 12:23:06 -0400 Subject: [PATCH 29/75] hash file of ovpn files added --- centinel/vpn/hma.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 64d0d35..91a2b45 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -10,6 +10,31 @@ import socket import zipfile import urllib2 +import pickle +import hashlib + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() + def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: @@ -62,7 +87,7 @@ def create_config_files(directory): # move all config files to /vpns orig_path = os.path.join(directory, '../TCP') - + config_dict = {} server_country = {} for filename in os.listdir(orig_path): if filename.endswith('.ovpn'): @@ -97,7 +122,13 @@ def create_config_files(directory): f.write('\n') f.write('up /etc/openvpn/update-resolv-conf\n') f.write('down /etc/openvpn/update-resolv-conf\n') - + message = hash_file(file_path) + config_dict[filename] = message + print(config_dict) + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() + print os.path.join(directory, 'servers.txt'), len(server_country) with open(os.path.join(directory, 'servers.txt'), 'w') as f: From 6941a610e9c08b5ea18f6d0a0c0bd26e793e0f5f Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 12:36:32 -0400 Subject: [PATCH 30/75] update configs parameter added, reading configs pickle file added to HMA update configs --- centinel/vpn/cli.py | 14 +++++++++++++- centinel/vpn/hma.py | 21 +++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 843b9a7..b5c9234 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -63,6 +63,9 @@ def parse_args(): g1.add_argument('--create-hma-configs', dest='create_HMA', action="store_true", help='Create the openvpn config files for HMA') + g1.add_argument('--update-hma-configs', dest='update_HMA', + action="store_true", + help='Update the openvpn config files for HMA') g1.add_argument('--create-ipvanish-configs', dest='create_IPVANISH', action='store_true', help='Create the openvpn config files for IPVanish') @@ -83,7 +86,7 @@ def parse_args(): g2 = parser.add_mutually_exclusive_group(required=True) g2.add_argument('--directory', '-d', dest='directory', help='Directory with experiments, config files, etc.') - create_conf_help = ('Create configuration files for the given ' + create_conf_help = ('Create/Update configuration files for the given ' 'openvpn config files so that we can treat each ' 'one as a client. The argument should be a ' 'directory with a subdirectory called openvpn ' @@ -91,6 +94,10 @@ def parse_args(): g2.add_argument('--create-config', '-c', help=create_conf_help, dest='create_conf_dir') + g2.add_argument('--update-config', '-z', help=create_conf_help, + dest='update_conf_dir') + + # following args are used to support splitting clients among multiple VMs # each running vpn walker will use this to decide which portion of vpn # endpoints it should include @@ -765,6 +772,11 @@ def _run(): vpngate.create_config_files(vpngate_dir) # create the config files for the openvpn config files create_config_files(args.create_conf_dir) + + elif args.update_conf_dir: + if args.update_HMA: + hma_dir = return_abs_path(args.update_conf_dir, 'vpns') + hma.update_config_files(hma_dir) else: # sanity check tls_auth and key_direction if (args.tls_auth is not None and args.key_direction is None) or \ diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 91a2b45..f085e25 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -39,7 +39,24 @@ def hash_file(filename): def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(dest_dir) - + + +def update_config_files(directory): + """ + Update directory for vpn walker + :param directory: + :return: + """ + logging.info("Update HMA Configs") + + # read python dict back from the file + pkl_file = open(os.path.join(directory,'../config_hash.pkl'), 'rb') + config_dict = pickle.load(pkl_file) + pkl_file.close() + print(config_dict) + + + def create_config_files(directory): """ Initialize directory ready for vpn walker @@ -124,7 +141,7 @@ def create_config_files(directory): f.write('down /etc/openvpn/update-resolv-conf\n') message = hash_file(file_path) config_dict[filename] = message - print(config_dict) + # print(config_dict) output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') pickle.dump(config_dict, output) output.close() From 044e6d62ef1831d5e434f1d03b0f862afa40a325 Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 13:39:53 -0400 Subject: [PATCH 31/75] determine the update/add/delete vpn's in hma by comparing with the config hash --- centinel/vpn/hma.py | 97 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index f085e25..8fd87f3 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -47,13 +47,104 @@ def update_config_files(directory): :param directory: :return: """ + updated_vpn_path = os.path.join(directory, '../updated_vpns') + print(updated_vpn_path) + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + logging.info("Update HMA Configs") # read python dict back from the file pkl_file = open(os.path.join(directory,'../config_hash.pkl'), 'rb') - config_dict = pickle.load(pkl_file) + old_config_dict = pickle.load(pkl_file) pkl_file.close() - print(config_dict) +# print(config_dict) + + config_zip_url = "https://hidemyass.com/vpn-config/vpn-configs.zip" + + logging.info("Starting to download hma config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../vpn-configs.zip') + + with open(zip_path,'w') as f: + f.write(zip_content) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + # move all config files to /vpns + orig_path = os.path.join(directory, '../TCP') + config_dict = {} + server_country = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('.')[0] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../TCP')) + shutil.rmtree(os.path.join(directory, '../UDP')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(updated_vpn_path): + file_path = os.path.join(updated_vpn_path, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + message = hash_file(file_path) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): +# print('vpn update'+ str(vp)) + update_list.append(vp) + else: +# print('no update needed') + continue + if found_vpn_flag == 0: + delete_list.append(vp) + # new additions + print('vp\'s to be added: ' , set(new_config_dict.keys()) - set(old_config_dict.keys())) + print('vp\'s to be deleted: ' , delete_list) + print('vp\'s to be updated: ', update_list) + + # print(config_dict) + # output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + # pickle.dump(config_dict, output) + # output.close() + # + # + # print os.path.join(directory, 'servers.txt'), len(server_country) + # with open(os.path.join(directory, 'servers.txt'), 'w') as f: + # for hostname in server_country: + # f.write('|'.join([hostname, server_country[hostname]]) + '\n') + @@ -139,9 +230,9 @@ def create_config_files(directory): f.write('\n') f.write('up /etc/openvpn/update-resolv-conf\n') f.write('down /etc/openvpn/update-resolv-conf\n') + # print(config_dict) message = hash_file(file_path) config_dict[filename] = message - # print(config_dict) output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') pickle.dump(config_dict, output) output.close() From d519f41b72f6f89a76f6077bcc78c38c7820bac6 Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 13:48:07 -0400 Subject: [PATCH 32/75] new hash config file and server.txt file is written --- centinel/vpn/hma.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 8fd87f3..63b96ab 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -134,16 +134,16 @@ def update_config_files(directory): print('vp\'s to be deleted: ' , delete_list) print('vp\'s to be updated: ', update_list) - # print(config_dict) - # output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') - # pickle.dump(config_dict, output) - # output.close() - # - # - # print os.path.join(directory, 'servers.txt'), len(server_country) - # with open(os.path.join(directory, 'servers.txt'), 'w') as f: - # for hostname in server_country: - # f.write('|'.join([hostname, server_country[hostname]]) + '\n') +# print(new_config_dict) + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') From 71bdd6c58c678a73f19449190f1ba7d3b7e1d329 Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 13:55:09 -0400 Subject: [PATCH 33/75] add vpn config list implemented and all list of vp changes is returned --- centinel/vpn/hma.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 63b96ab..dc5dea8 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -130,7 +130,9 @@ def update_config_files(directory): if found_vpn_flag == 0: delete_list.append(vp) # new additions - print('vp\'s to be added: ' , set(new_config_dict.keys()) - set(old_config_dict.keys())) + add_list = [] + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ' , add_list) print('vp\'s to be deleted: ' , delete_list) print('vp\'s to be updated: ', update_list) @@ -145,7 +147,7 @@ def update_config_files(directory): for hostname in server_country: f.write('|'.join([hostname, server_country[hostname]]) + '\n') - + return [delete_list, update_list, add_list] def create_config_files(directory): From db54733b6fddfb0aecbf7b8f323ad11f1394f104 Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 19:49:45 -0400 Subject: [PATCH 34/75] remove/update/add vpn configs added in cli.py --- centinel/vpn/cli.py | 49 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index b5c9234..2f97471 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -18,6 +18,7 @@ import requests import StringIO import socket +import shutil import centinel.backend @@ -619,6 +620,48 @@ def signal_handler(signal, frame): sys.exit(0) +def update_config_files(directory, vp_list): + """ + For each VPN file in directory/vpns update its configuration if needed + :param directory: + :param vp_list: the list of vp updates/deletes/additions + :return: + """ + + logging.info("Starting to update config files") + server_country = {} + vpn_dir = return_abs_path(directory, "vpns") + print(vpn_dir) + new_vpn_dir = return_abs_path(directory, "updated_vpns") + + # read servers.txt to find the country associated with the ip + with open (vpn_dir+ '/servers.txt') as server_file: + servers = server_file.readlines() + + for server_line in servers: + server_line = (server_line.split('|')) + server_country[server_line[0]] = server_line[1].replace('\n','') + + conf_dir = return_abs_path(directory, "configs") + home_dirs = return_abs_path(directory, "home") + + # remove vps + for vp in vp_list[0]: + os.remove(os.path.join(directory,"vpns/"+vp)) + shutil.rmtree(os.path.join(directory,"home/"+vp)) + os.remove(os.path.join(directory,"configs/"+vp)) + + # update vps + for vp in vp_list[1]: + print('in update') + os.remove(os.path.join(directory,"configs/"+vp)) + os.remove(os.path.join(directory,"vpns/"+vp)) + shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) + # add vp + for vp in vp_list[2]: + print(os.path.join(directory,"vpns/"+vp)) + shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) + def create_config_files(directory): """ For each VPN file in directory/vpns, create a new configuration @@ -776,7 +819,11 @@ def _run(): elif args.update_conf_dir: if args.update_HMA: hma_dir = return_abs_path(args.update_conf_dir, 'vpns') - hma.update_config_files(hma_dir) + vp_list = hma.update_config_files(hma_dir) + update_config_files(args.update_conf_dir, vp_list) + + # add new ones + else: # sanity check tls_auth and key_direction if (args.tls_auth is not None and args.key_direction is None) or \ From 0898652ddfca17b1948d11b45f08cb7c2391e7d4 Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 7 Aug 2017 21:43:29 -0400 Subject: [PATCH 35/75] new configs created for new vps and in case the filenames are the same, only the vpn file is updated not the config --- centinel/vpn/cli.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 2f97471..7400b72 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -654,13 +654,39 @@ def update_config_files(directory, vp_list): # update vps for vp in vp_list[1]: print('in update') - os.remove(os.path.join(directory,"configs/"+vp)) os.remove(os.path.join(directory,"vpns/"+vp)) shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) # add vp for vp in vp_list[2]: print(os.path.join(directory,"vpns/"+vp)) shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) + configuration = centinel.config.Configuration() + # setup the directories + home_dir = os.path.join(home_dirs, vp) + os.mkdir(home_dir) + configuration.params['user']['centinel_home'] = home_dir + exp_dir = os.path.join(home_dir, "experiments") + os.mkdir(exp_dir) + configuration.params['dirs']['experiments_dir'] = exp_dir + data_dir = os.path.join(home_dir, "data") + os.mkdir(data_dir) + configuration.params['dirs']['data_dir'] = data_dir + res_dir = os.path.join(home_dir, "results") + os.mkdir(res_dir) + configuration.params['dirs']['results_dir'] = res_dir + + log_file = os.path.join(home_dir, "centinel.log") + configuration.params['log']['log_file'] = log_file + login_file = os.path.join(home_dir, "login") + configuration.params['server']['login_file'] = login_file + configuration.params['user']['is_vpn'] = True + + configuration.params['server']['verify'] = True + configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] + configuration.params['country'] = server_country[vp.replace('.ovpn','')] + conf_file = os.path.join(conf_dir, vp) + configuration.write_out_config(conf_file) + def create_config_files(directory): """ From 41595a427f76135da2c803d0fc77391347c58287 Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Wed, 9 Aug 2017 11:12:01 -0400 Subject: [PATCH 36/75] fixed a bug where hostname was a tuple of filename and extension rather than filename --- centinel/vpn/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 7400b72..7f9e03e 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -434,9 +434,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - # filename is [hostname].ovpn, we resolved the hostname to ip + # filename is [OBhostname].ovpn, we resolved the hostname to ip # using socket.gethostbyname() - hostname = os.path.splitext(filename) + hostname = os.path.splitext(filename)[0] vp_ip = "unknown" try: vp_ip = socket.gethostbyname(hostname) From bed8b1d45520d33eadaba21b612677246e8a2baa Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Wed, 9 Aug 2017 11:57:03 -0400 Subject: [PATCH 37/75] removing new vpn configs folder when update is finished --- centinel/vpn/hma.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index dc5dea8..941ce25 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -147,6 +147,8 @@ def update_config_files(directory): for hostname in server_country: f.write('|'.join([hostname, server_country[hostname]]) + '\n') + shutil.rmtree(os.path.join(directory, '../updated_vpns')) + return [delete_list, update_list, add_list] From 0c98f71ab4848d4d230d8e71954759dd425adde2 Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Wed, 9 Aug 2017 14:28:28 -0400 Subject: [PATCH 38/75] update configs for ipvanish added --- centinel/vpn/ipvanish.py | 138 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index d7ba94e..338598a 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -7,11 +7,136 @@ import urllib import zipfile import urllib2 +import pickle +import hashlib + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() + def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(dest_dir) +def update_config_files(directory): + """ + Update directory for vpn walker + :param directory: + :return: + """ + updated_vpn_path = os.path.join(directory, '../updated_vpn') + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + + logging.info("Update Ipvanish Configs") + + # read python dict back from file + pkl_file = open(os.path.join(directory, '../config_hash.pkl'), 'rb') + old_config_dict = pickle.load(pkl_file) + pkl_file.close() + + config_zip_url = "http://www.ipvanish.com/software/configs/configs.zip" + + logging.info("Starting to download IPVanish config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../configs.zip') + unzip_path = os.path.join(directory, '../unzipped') + + if not os.path.exists(unzip_path): + os.makedirs(unzip_path) + with open(zip_path, 'w') as f: + f.write(zip_content) + + logging.info("Extracting zip file") + unzip(zip_path, unzip_path) + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + for filename in os.listdir(unzip_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[1] + + file_path = os.path.join(unzip_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + # get ip address for this vpn + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + else: + logging.warn("Unable to resolve hostname and remove %s" % filename) + os.remove(file_path) + + for filename in os.listdir(updated_vpn_path): + file_path = os.path.jpin(updated_vpn_path, filename) + message = hash_file(file_path) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): + update_list.append(vp) + else: + continue + if found_vpn_flag == 0: + delete_list.append(vp) + # new additions + add_list = [] + add_list.expand((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ', add_list) + print('vp\'s tp be deleted: ', delete_list) + print('vp\'s to be updated: ', update_list) + + output = open(os.path.jpin(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') + + # remove extracted folder + shutil.rmtree(unzip_path) + + return [delete_list, update_list, add_list] + def create_config_files(directory): """ @@ -47,7 +172,7 @@ def create_config_files(directory): os.path.join(directory, '../ca.ipvanish.com.crt')) # move all config files to /vpns - + config_dict = {} server_country = {} for filename in os.listdir(unzip_path): if filename.endswith('.ovpn'): @@ -66,7 +191,7 @@ def create_config_files(directory): # ip = socket.gethostbyname(hostname) # break # except socket.gaierror: - # logging.exception("Failed to resolve %s" %hostname) + # logging.exception("Failed to resolve %s" %hostname) # continue if len(hostname) > 0: @@ -77,6 +202,15 @@ def create_config_files(directory): logging.warn("Unable to resolve hostname and remove %s" % filename) os.remove(file_path) + # writing pickle file of ovpn configs + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + message = hash_file(file_path) + config_dict[filename] = message + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() with open(os.path.join(directory, 'servers.txt'), 'w') as f: for hostname in server_country: f.write('|'.join([hostname, server_country[hostname]]) + '\n') From e6e2556fc4f71a2365aba712ad252e27abd8f1da Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Wed, 9 Aug 2017 15:37:22 -0400 Subject: [PATCH 39/75] updated vpns directory shouldn't be deleted in the vpnprovider.py code --- centinel/vpn/hma.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 941ce25..78c95fa 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -147,8 +147,7 @@ def update_config_files(directory): for hostname in server_country: f.write('|'.join([hostname, server_country[hostname]]) + '\n') - shutil.rmtree(os.path.join(directory, '../updated_vpns')) - + return [delete_list, update_list, add_list] From 2a089dcb9a0e324b84011f7ffe93307850d05601 Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Wed, 9 Aug 2017 15:38:37 -0400 Subject: [PATCH 40/75] ipvanish update vpn config merged with cli.py --- centinel/vpn/cli.py | 6 ++++++ centinel/vpn/ipvanish.py | 15 ++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 7f9e03e..ebdb581 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -70,6 +70,9 @@ def parse_args(): g1.add_argument('--create-ipvanish-configs', dest='create_IPVANISH', action='store_true', help='Create the openvpn config files for IPVanish') + g1.add_argument('--update-ipvanish-configs', dest='update_IPVANISH', + action="store_true", + help='Update the openvpn config files for IPVANISH') g1.add_argument('--create-purevpn-configs', dest='create_PUREVPN', action='store_true', help='Create the openvpn config files for PureVPN') @@ -846,6 +849,9 @@ def _run(): if args.update_HMA: hma_dir = return_abs_path(args.update_conf_dir, 'vpns') vp_list = hma.update_config_files(hma_dir) + if args.update_IPVANISH: + ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') + vp_list = ipvanish.update_config_files(ipvanish_dir) update_config_files(args.update_conf_dir, vp_list) # add new ones diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 338598a..7260ec3 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -43,7 +43,7 @@ def update_config_files(directory): :param directory: :return: """ - updated_vpn_path = os.path.join(directory, '../updated_vpn') + updated_vpn_path = os.path.join(directory, '../updated_vpns') if not os.path.exists(updated_vpn_path): os.makedirs(updated_vpn_path) @@ -54,6 +54,7 @@ def update_config_files(directory): old_config_dict = pickle.load(pkl_file) pkl_file.close() + config_zip_url = "http://www.ipvanish.com/software/configs/configs.zip" logging.info("Starting to download IPVanish config file zip") @@ -71,6 +72,7 @@ def update_config_files(directory): logging.info("Extracting zip file") unzip(zip_path, unzip_path) + # remove zip file os.remove(zip_path) @@ -90,7 +92,7 @@ def update_config_files(directory): if line.startswith('remote'): hostname = line.split(' ')[1] if len(hostname) > 0: - new_path = os.path.join(directory, hostname + '.ovpn') + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') shutil.copyfile(file_path, new_path) server_country[hostname] = country else: @@ -98,8 +100,9 @@ def update_config_files(directory): os.remove(file_path) for filename in os.listdir(updated_vpn_path): - file_path = os.path.jpin(updated_vpn_path, filename) + file_path = os.path.join(updated_vpn_path, filename) message = hash_file(file_path) + # print(filename, message) new_config_dict[filename] = message delete_list = [] @@ -118,12 +121,12 @@ def update_config_files(directory): delete_list.append(vp) # new additions add_list = [] - add_list.expand((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) print('vp\'s to be added: ', add_list) print('vp\'s tp be deleted: ', delete_list) print('vp\'s to be updated: ', update_list) - output = open(os.path.jpin(directory, '../config_hash.pkl'), 'wb') + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') pickle.dump(new_config_dict, output) output.close() @@ -135,6 +138,7 @@ def update_config_files(directory): # remove extracted folder shutil.rmtree(unzip_path) + return [delete_list, update_list, add_list] @@ -206,6 +210,7 @@ def create_config_files(directory): for filename in os.listdir(directory): file_path = os.path.join(directory, filename) message = hash_file(file_path) + # print filename, message config_dict[filename] = message output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') From 8866f0fa316195a0b3103e766ef6f3844dd56b89 Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 11 Aug 2017 23:45:13 -0400 Subject: [PATCH 41/75] optimize the sanity checker --- centinel/vpn/cli.py | 69 ++++++++++++++++------------ centinel/vpn/geosanity.py | 96 ++++++++++++++++++++++----------------- 2 files changed, 94 insertions(+), 71 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index ebdb581..a761ffa 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -19,7 +19,7 @@ import StringIO import socket import shutil - +import multiprocessing import centinel.backend import centinel.client @@ -173,6 +173,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: + # start_time = time.time() # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): @@ -193,10 +194,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, r = requests.get(shapefile_url, stream=True) z = zipfile.ZipFile(StringIO.StringIO(r.content)) z.extractall(sanity_path) - logging.info("Map shape file downloaded") + logging.info("Map shape file downloaded") except Exception as exp: - logging.error("Could not fetch map file : %s" %str(exp)) - + logging.error("Could not fetch map file : %s" % str(exp)) map = san.load_map_from_shapefile(shapefile) for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) @@ -206,24 +206,20 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, hostname = os.path.splitext(filename)[0] vp_ip = "unknown" try: - vp_ip = socket.gethostbyname(hostname) - except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) - continue - - - + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) + continue vpn_config = os.path.join(vpn_dir, filename) centinel_config = os.path.join(conf_dir, filename) # assuming that each VPN config file has a name like: # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - # filename is [hostname].ovpn, we resolved the hostname to ip - # using socket.gethostbyname() + # filename is [hostname].ovpn, we resolved the hostname to ip + # using socket.gethostbyname() vpn_address, extension = os.path.splitext(filename) lines = [line.rstrip('\n') for line in open(centinel_config)] - # get country for this vpn country_in_config = "" # reading the server.txt file in vpns folder @@ -231,7 +227,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, if "country" in line: (key, country_in_config) = line.split(': ') country_in_config = country_in_config.replace('\"', '').replace(',', '') - country = None try: meta = centinel.backend.get_meta(config.params, vp_ip) @@ -242,7 +237,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] - # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: @@ -250,34 +244,26 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - # sanity check logging.info("%s: Starting VPN." % filename) - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) - vpn.start() if not vpn.started: logging.error("%s: Failed to start VPN!" % filename) vpn.stop() time.sleep(5) continue - # sending ping to the anchors ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, country, anchors) - # have to do this sanity check if timestamp is a certain value, needs changing timestamp = time.time() - ping_result['timestamp'] = timestamp #Todo: ?? - + ping_result['timestamp'] = timestamp # Todo: ?? logging.info("%s: Stopping VPN." % filename) vpn.stop() time.sleep(5) - except: logging.warning("Failed to send pings from %s" % vp_ip) - # sanity check failed_sanity_check = set() sanity_checked_set = set() @@ -286,6 +272,29 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, pickle_path = os.path.join(sanity_path, 'pings') file_lists = os.listdir(pickle_path) if file_lists: + num = 1 + try: + num = multiprocessing.cpu_count() + except (ImportError, NotImplementedError): + pass + count = 0 + pool = multiprocessing.Pool(processes=num) + for vp_ip, country, tag in pool.imap_unordered(san.run_checker, + ((this_file, anchors_gps, map, sanity_path, pickle_path) for + this_file in file_lists), + chunksize=1): + if tag == -1: + error_sanity_check.add(vp_ip + '-' + country) + elif tag == True: + sanity_checked_set.add(vp_ip + '-' + country) + else: + failed_sanity_check.add(vp_ip + '-' + country) + count += 1 + logging.info("Finishing.. (%s/%s)" % (count, len(file_lists))) + pool.terminate() + pool.join() + for worker in pool._pool: + assert not worker.is_alive() for this_file in file_lists: try: vp_ip = this_file.split('-')[0] @@ -302,9 +311,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, failed_sanity_check.add(vp_ip + '-' + country) except: logging.warning("Failed to sanity check %s" % vp_ip) - time_unique = time.time() - with open(os.path.join(sanity_path, 'results-of-sanity-check'+str(time_unique)+'.txt'), 'w') as f: + with open(os.path.join(sanity_path, 'results-of-sanity-check' + str(time_unique) + '.txt'), 'w') as f: f.write("Pass\n") for server in sanity_checked_set: f.write(server + '\n') @@ -315,9 +323,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, for server in error_sanity_check: f.write(server + '\n') conf_list = list(sanity_checked_set) - logging.info("List size after sanity check. New size: %d" %len(conf_list)) - - # return 0 + logging.info("List size after sanity check. New size: %d" % len(conf_list)) + # end_time = time.time() - start_time + # logging.info("Total elapsed time: %s" %end_time) + # # return 0 # reduce size of list if reduce_vp is true if reduce_vp: diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index e6f9777..76755cf 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -3,18 +3,22 @@ import os import time import pickle -from datetime import timedelta +import matplotlib +matplotlib.use('Agg') from geopandas import * from geopy.distance import vincenty from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut import pyproj import functools +import pycountry from shapely.ops import transform as sh_transform from shapely.geometry import Point, Polygon, box as Box +def run_checker(args): + return sanity_check(*args) - -def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map, directory): +def sanity_check(this_file, anchors_gps, map, directory, pickle_path): """ :param proxy_id:(str) :param iso_cnt:(str) @@ -23,25 +27,35 @@ def sanity_check(proxy_id, iso_cnt, ping_results, anchors_gps, map, directory): :param map:(dataframe) :return: """ - checker = Checker(proxy_id, iso_cnt, directory) - # points = checker.check_ping_results(results, anchors_gps) - points = checker.check_ping_results(ping_results, anchors_gps) - if len(points) == 0: - logging.info("No valid ping results for %s" % proxy_id) - return -1 - logging.info("[%s] has %s valid anchors' results (valid pings) from %s anchors" - %(proxy_id, len(points), len(ping_results))) - - circles = checker.get_anchors_region(points) - proxy_region = checker.get_vpn_region(map) - if proxy_region.empty: - logging.info("Fail to get proxy region: %s" % iso_cnt) - return -1 - results = checker.check_overlap(proxy_region, circles) - return checker.is_valid(results) - # time_now = str(datetime.datetime.now()).split(' ')[0] - # with open("results_" + proxy_id + "_" + time_now + ".pickle", "w") as f: - # pickle.dump(results, f) + try: + start_time = time.time() + proxy_id = this_file.split('-')[0] + iso_cnt = this_file.split('-')[1] + tag = -1 + with open(os.path.join(pickle_path, this_file), 'r') as f: + ping_result = pickle.load(f) + ping_results = ping_result[proxy_id]['pings'] + checker = Checker(proxy_id, iso_cnt, directory) + # points = checker.check_ping_results(results, anchors_gps) + points = checker.check_ping_results(ping_results, anchors_gps) + if len(points) == 0: + logging.info("No valid ping results for %s" % proxy_id) + return proxy_id, iso_cnt, -1 + logging.info("[%s] has %s valid anchors' results (valid pings) from %s anchors" + %(proxy_id, len(points), len(ping_results))) + circles = checker.get_anchors_region(points) + proxy_region = checker.get_vpn_region(map) + if proxy_region.empty: + logging.info("[%s] Fail to get proxy region: %s" % (proxy_id, iso_cnt)) + return proxy_id, iso_cnt, -1 + results = checker.check_overlap(proxy_region, circles, this_file) + tag = checker.is_valid(results) + end_time = time.time() - start_time + logging.info("[%s] How long it takes: %s" % (this_file, end_time)) + except: + logging.warning("[%s] Failed to sanity check" % this_file) + return "N/A", "N/A", -1 + return proxy_id, iso_cnt, tag def load_map_from_shapefile(shapefile): """ @@ -50,10 +64,10 @@ def load_map_from_shapefile(shapefile): """ logging.info("Loading a shapefile for the world map") temp = GeoDataFrame.from_file(shapefile) + # print temp.dtypes.index map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] return map - def get_gps_of_anchors(anchors, directory): """ Get gps of all anchors @@ -68,23 +82,24 @@ def get_gps_of_anchors(anchors, directory): anchors_gps = pickle.load(f) except: logging.info("gps_of_anchors.pickle is not existed") - for anchor, item in anchors.iteritems(): count += 1 logging.info( "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) geolocator = Nominatim() - location = geolocator.geocode(item['city'] + ' ' + item['country']) - if location == None: - location = geolocator.geocode(item['country']) - if location == None: - logging.info("Fail to read gps of %s" %anchor) - anchors_gps[anchor] = (location.latitude, location.longitude) + try: + location = geolocator.geocode(item['city'] + ' ' + item['country'], timeout=10) + if location == None: + location = geolocator.geocode(item['country'], timeout=10) + if location == None: + logging.info("Fail to read gps of %s" %anchor) + anchors_gps[anchor] = (location.latitude, location.longitude) + except GeocoderTimedOut as e: + logging.info("Error geocode failed: %s" %(e)) with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: pickle.dump(anchors_gps, f) return anchors_gps - class Checker: def __init__(self, proxy_id, iso, path): self.proxy_id = proxy_id @@ -96,10 +111,13 @@ def get_vpn_region(self, map): """ Get a region of given iso country """ - logging.info("Getting vpn region from a map") + # logging.info("Getting vpn region from a map") region = map[map.ISO_A2 == self.iso].geometry if region.empty: - logging.info("Fail to read country region: %s" % self.iso) + cnt = pycountry.countries.get(alpha2=self.iso) + region = map[map.NAME == cnt.name].geometry + if region.empty: + logging.info("Fail to read country region: %s (%s)" % (self.iso, cnt)) return None df = geopandas.GeoDataFrame({'geometry': region}) df.crs = {'init': 'epsg:4326'} @@ -116,8 +134,8 @@ def _get_gps_of_proxy(self): logging.info("Fail to get gps of location %s" %self.iso) return None vpn_gps = (location.latitude, location.longitude) - except: - logging.info("Fail to get gps of proxy") + except GeocoderTimedOut as e: + logging.info("Error geocode failed: %s" %(e)) return vpn_gps def _disk(self, x, y, radius): @@ -128,7 +146,6 @@ def get_anchors_region(self, points): (referred from zack's paper & code Todo: add LICENSE?) https://github.com/zackw/active-geolocator Note that pyproj takes distances in meters & lon/lat order. - """ logging.info("Starting to draw anchors region") wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") @@ -167,7 +184,6 @@ def get_anchors_region(self, points): else: i += 1 disk = Polygon(boundary).buffer(0) - # In the case of the generated disk is too large origin = Point(lon, lat) if not disk.contains(origin): @@ -182,7 +198,7 @@ def get_anchors_region(self, points): logging.debug("Fail to get a circle %s" %self.proxy_id) return circles - def check_overlap(self, proxy_region, circles): + def check_overlap(self, proxy_region, circles, ping_filename): """ Check overlap between proxy region and anchors' region. If there is an overlap check how much they are overlapped, otherwise, check how far the distance is from a proxy. @@ -214,15 +230,13 @@ def check_overlap(self, proxy_region, circles): area_overlap = sum(area_overlap.tolist()) stack = area_overlap/area_cnt results.append((True, stack)) - pickle_path = os.path.join(self.path, 'sanity') if not os.path.exists(pickle_path): os.makedirs(pickle_path) time_unique = time.time() - with open(pickle_path + '/' + self.proxy_id + '-' + self.iso + '-' + str(time_unique) + '.pickle', 'w') as f: + with open(pickle_path + '/' + ping_filename, 'w') as f: pickle.dump(results, f) logging.info("Pickle file successfully created.") - return results def _calculate_radius(self, time_ms): From bd7128b5f5a0c94591fd3ea267fa0a7be864e633 Mon Sep 17 00:00:00 2001 From: Seyed Arian Akhavan Niaki Date: Mon, 14 Aug 2017 10:45:25 -0400 Subject: [PATCH 42/75] update vpn parameter added for purevpn --- centinel/vpn/cli.py | 6 ++ centinel/vpn/hma.py | 2 +- centinel/vpn/purevpn.py | 133 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 139 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index ebdb581..163363c 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -79,6 +79,9 @@ def parse_args(): g1.add_argument('--create-vpngate-configs', dest='create_VPNGATE', action='store_true', help='Create the openvpn config files for VPN Gate') + g1.add_argument('--update-purevpn-configs', dest='update_PUREVPN', + action="store_true", + help='Update the openvpn config files for PUREVPN') parser.add_argument('--shuffle', '-s', dest='shuffle_lists', action="store_true", default=False, help='Randomize the order of vantage points') @@ -852,6 +855,9 @@ def _run(): if args.update_IPVANISH: ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') vp_list = ipvanish.update_config_files(ipvanish_dir) + if args.update_PUREVPN: + purevpn_dir = return_abs_path(args.update_conf_dir, 'vpns') + vp_list = purevpn.update_config_files(purevpn_dir) update_config_files(args.update_conf_dir, vp_list) # add new ones diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index 78c95fa..ad22019 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -79,7 +79,7 @@ def update_config_files(directory): server_country = {} new_config_dict = {} - # move all config files to /vpns + orig_path = os.path.join(directory, '../TCP') config_dict = {} server_country = {} diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index c4ecdc8..f5c97a1 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -5,12 +5,137 @@ import sys import urllib import zipfile +import pickle +import hashlib + + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(dest_dir) +def update_config_files(directory): + """ + Update directory for vpn walker + :param directory: + :return: + """ + updated_vpn_path = os.path.join(directory, '../updated_vpns') + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + + logging.info("Update Purevpn Confgis") + + # read python dict back from file + pkl_file = open(os.path.join(directory, '../config_hash.pkl'), 'rb') + old_config_dict = pickle.load(pkl_file) + pkl_file.close() + + config_zip_url = "https://s3-us-west-1.amazonaws.com/heartbleed/linux/linux-files.zip" + logging.info("Startin to download Purevpn config file zip") + + url_opener = urllib.URLopener() + zip_path = os.path.join(directory, '../linux_files.zip') + url_opener.retrieve(config_zip_url, zip_path) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + orig_path = os.path.join(directory, '../Linux OpenVPN Updated files/TCP') + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[0] + if '(V)' in country: + country = country[:country.find('(V)')] + + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + # get ip address for this vpn + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + + if len(hostname) > 0: + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') + # shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../Linux OpenVPN Updated files')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(updated_vpn_path): + file_path = os.path.join(updated_vpn_path, filename) + # with open(file_path, 'a') as f: + # f.write("\n") + # f.write("up /etc/openvpn/update-resolv-conf\n") + # f.write("down /etc/openvpn/update-resolv-conf\n") + message = hash_file(file_path) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): + update_list.append(vp) + else: + continue + if found_vpn_flag == 0: + delete_list.append(vp) + + # new additions + add_list = [] + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ', add_list) + print('vp\'s to be deleted: ', delete_list) + print('vp\'s to be updated: ', update_list) + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') + + return [delete_list, update_list, add_list] def create_config_files(directory): """ @@ -41,7 +166,7 @@ def create_config_files(directory): os.path.join(directory, '../Wdc.key')) # move all config files to /vpns orig_path = os.path.join(directory, '../Linux OpenVPN Updated files/TCP') - + config_dict = {} server_country = {} for filename in os.listdir(orig_path): if filename.endswith('.ovpn'): @@ -81,6 +206,12 @@ def create_config_files(directory): f.write("\n") f.write("up /etc/openvpn/update-resolv-conf\n") f.write("down /etc/openvpn/update-resolv-conf\n") + message = hash_file(file_path) + config_dict[filename] = message + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() print os.path.join(directory, 'servers.txt'), len(server_country) with open(os.path.join(directory, 'servers.txt'), 'w') as f: From 3f43cad7c6cc32ae2d4c7ff4786cde45c1d75172 Mon Sep 17 00:00:00 2001 From: Arian Niaki Date: Mon, 14 Aug 2017 10:57:35 -0400 Subject: [PATCH 43/75] adding return value to the update config function --- centinel/vpn/hma.py | 3 +-- centinel/vpn/ipvanish.py | 2 +- centinel/vpn/purevpn.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index ad22019..d6be42e 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -45,7 +45,7 @@ def update_config_files(directory): """ Update directory for vpn walker :param directory: - :return: + :return a list of delete, update and added vps: """ updated_vpn_path = os.path.join(directory, '../updated_vpns') print(updated_vpn_path) @@ -58,7 +58,6 @@ def update_config_files(directory): pkl_file = open(os.path.join(directory,'../config_hash.pkl'), 'rb') old_config_dict = pickle.load(pkl_file) pkl_file.close() -# print(config_dict) config_zip_url = "https://hidemyass.com/vpn-config/vpn-configs.zip" diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 7260ec3..d2f021a 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -41,7 +41,7 @@ def update_config_files(directory): """ Update directory for vpn walker :param directory: - :return: + :return a list of delete, update and added vps: """ updated_vpn_path = os.path.join(directory, '../updated_vpns') if not os.path.exists(updated_vpn_path): diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index f5c97a1..fef0f55 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -40,7 +40,7 @@ def update_config_files(directory): """ Update directory for vpn walker :param directory: - :return: + :return a list of delete, update and added vps: """ updated_vpn_path = os.path.join(directory, '../updated_vpns') if not os.path.exists(updated_vpn_path): From 3bc494653c00a6b6e68703934d81f5947cde99ad Mon Sep 17 00:00:00 2001 From: Arian Niaki Date: Mon, 14 Aug 2017 11:16:11 -0400 Subject: [PATCH 44/75] removing new vpns folder fetched for the update --- centinel/vpn/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 247f5c4..9676359 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -701,7 +701,7 @@ def update_config_files(directory, vp_list): configuration.params['country'] = server_country[vp.replace('.ovpn','')] conf_file = os.path.join(conf_dir, vp) configuration.write_out_config(conf_file) - + shutil.rmtree(new_vpn_dir) def create_config_files(directory): """ From 46cdd8bffacd6078f7cede9fb7ca65a48f88dc74 Mon Sep 17 00:00:00 2001 From: Arian Niaki Date: Mon, 14 Aug 2017 16:43:04 -0400 Subject: [PATCH 45/75] changes to add custom_meta to configs file added --- centinel/vpn/cli.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 9676359..4dc548d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -703,7 +703,7 @@ def update_config_files(directory, vp_list): configuration.write_out_config(conf_file) shutil.rmtree(new_vpn_dir) -def create_config_files(directory): +def create_config_files(directory, provider): """ For each VPN file in directory/vpns, create a new configuration file and all the associated directories @@ -761,6 +761,19 @@ def create_config_files(directory): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] configuration.params['country'] = server_country[filename.replace('.ovpn','')] + + hostname = os.path.splitext(filename)[0] + print('hostname is %s' %hostname) + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue + + configuration.params['custom_meta']['provider'] = provider + configuration.params['custom_meta']['hostname'] = hostname + configuration.params['custom_meta']['ip_address'] = vp_ip conf_file = os.path.join(conf_dir, filename) configuration.write_out_config(conf_file) @@ -840,32 +853,39 @@ def _run(): if args.vm_index < 1 or args.vm_index > args.vm_num: print "vm_index value cannot be negative or greater than vm_num!" return - + provider = "None" if args.create_conf_dir: if args.create_HMA: hma_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'hma' hma.create_config_files(hma_dir) elif args.create_IPVANISH: ipvanish_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'ipvanish' ipvanish.create_config_files(ipvanish_dir) elif args.create_PUREVPN: purevpn_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'purevpn' purevpn.create_config_files(purevpn_dir) elif args.create_VPNGATE: vpngate_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'vpngate' vpngate.create_config_files(vpngate_dir) # create the config files for the openvpn config files - create_config_files(args.create_conf_dir) + create_config_files(args.create_conf_dir, provider) elif args.update_conf_dir: if args.update_HMA: hma_dir = return_abs_path(args.update_conf_dir, 'vpns') + provider = 'hma' vp_list = hma.update_config_files(hma_dir) if args.update_IPVANISH: ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') + provdier = 'ipvanish' vp_list = ipvanish.update_config_files(ipvanish_dir) if args.update_PUREVPN: purevpn_dir = return_abs_path(args.update_conf_dir, 'vpns') + provider = 'purevpn' vp_list = purevpn.update_config_files(purevpn_dir) update_config_files(args.update_conf_dir, vp_list) From cc93ca2bbc74d38f782529234a4c4b6e83b8f63c Mon Sep 17 00:00:00 2001 From: arian Date: Mon, 14 Aug 2017 23:14:52 -0400 Subject: [PATCH 46/75] custom meta successfully added in confgis for each vp --- centinel/config.py | 2 ++ centinel/vpn/cli.py | 10 +++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/centinel/config.py b/centinel/config.py index aa35a4c..62e7357 100644 --- a/centinel/config.py +++ b/centinel/config.py @@ -79,6 +79,8 @@ def __init__(self): proxy['proxy'] = {proxy['proxy_type']: proxy['proxy_url']} self.params['proxy'] = proxy + self.params['custom_meta'] = {} + def parse_config(self, config_file): """ Given a configuration file, read in and interpret the results diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 4dc548d..eee800d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -761,16 +761,12 @@ def create_config_files(directory, provider): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] configuration.params['country'] = server_country[filename.replace('.ovpn','')] - - hostname = os.path.splitext(filename)[0] - print('hostname is %s' %hostname) - vp_ip = "unknown" - try: + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: vp_ip = socket.gethostbyname(hostname) except Exception as exp: logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) - continue - configuration.params['custom_meta']['provider'] = provider configuration.params['custom_meta']['hostname'] = hostname configuration.params['custom_meta']['ip_address'] = vp_ip From e503b6cfb361da0f26a3377c29a1a541dc8b6296 Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 15 Sep 2017 16:44:00 -0400 Subject: [PATCH 47/75] fix anchor parsing part to get city, country info --- centinel/vpn/geosanity.py | 4 ++-- centinel/vpn/probe.py | 22 +++++++--------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 76755cf..e766f48 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -30,7 +30,7 @@ def sanity_check(this_file, anchors_gps, map, directory, pickle_path): try: start_time = time.time() proxy_id = this_file.split('-')[0] - iso_cnt = this_file.split('-')[1] + iso_cnt = this_file.split('-')[1] tag = -1 with open(os.path.join(pickle_path, this_file), 'r') as f: ping_result = pickle.load(f) @@ -92,7 +92,7 @@ def get_gps_of_anchors(anchors, directory): if location == None: location = geolocator.geocode(item['country'], timeout=10) if location == None: - logging.info("Fail to read gps of %s" %anchor) + logging.info("Fail to read gps of %s/%s" %(anchor, item['city'] + ' ' + item['country'])) anchors_gps[anchor] = (location.latitude, location.longitude) except GeocoderTimedOut as e: logging.info("Error geocode failed: %s" %(e)) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 181dd7e..ef05a60 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -9,10 +9,8 @@ from datetime import timedelta from urllib import urlopen from bs4 import BeautifulSoup - #-d vpn_providers/ipvanish/ -u auth_file --crt-file ca.ipvanish.com.crt - def get_anchor_list(directory): """Get a list of all RIPE Anchors :return: anchors [hostname]:dict() "probe" @@ -65,7 +63,6 @@ def get_anchor_list(directory): # parsing ripe anchor website reload(sys) sys.setdefaultencoding('utf-8') - html = urlopen('https://atlas.ripe.net/anchors/list/').read() soup = BeautifulSoup(html,"html.parser") ripe_records = (soup.find_all('tr')) @@ -75,16 +72,16 @@ def get_anchor_list(directory): rec = [] for column in columns: soup_column = BeautifulSoup(str(column),"html.parser") - rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') + # rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') + rec.append(soup_column.td.text.strip().replace('\n','')) if(len(rec) > 0): all_records.append(rec) ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') with open(ripe_path,'w') as f: - f.write('Hostname,Probe,Company,City,Country,Capabilities\n') + csvwriter = csv.writer(f) + csvwriter.writerow(('Hostname','Probe','Company','City','Country')) for sublist in all_records: - for item in sublist: - f.write(item + ',') - f.write('\n') + csvwriter.writerow((sublist[0], sublist[1], sublist[3].split(' ')[0], sublist[4], sublist[5])) logging.info("Creating RIPE_anchor list") with open(ripe_path, "r") as f: reader = csv.reader(f) @@ -92,10 +89,8 @@ def get_anchor_list(directory): if row[0] == 'Hostname': continue anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - logging.info("Finished extracting RIPE anchors from file.") count = 0 - for key, value in anchors.iteritems(): count += 1 logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) @@ -141,17 +136,14 @@ def send_ping(param): times[this_host] = this_delays return times - def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ logging.info("Start Probing") - pickle_path = os.path.join(sanity_directory,'pings') if not os.path.exists(pickle_path): os.makedirs(pickle_path) - times = dict() s_time = time.time() results = [] @@ -174,7 +166,7 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho logging.info("Creating pickle file") # putting time as a part of the filename time_unique = time.time() - with open(pickle_path + '/' + target_name + '-' + target_cnt + '-' + str(time_unique) + '.pickle', 'w') as f: + with open(pickle_path + '/' + vpn_provider + '-' + target_name + '-' + target_cnt + '-' + str(time_unique) + '.pickle', 'w') as f: pickle.dump(final, f) - logging.info("Pickle file successfully created.") + logging.info("Pickle file successfully created.") return final From b4e9ed180aac7e1558291587b3d0bdf63794bc78 Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 22 Sep 2017 14:56:39 -0400 Subject: [PATCH 48/75] fix problem: by stopping openvpn properly --- centinel/vpn/cli.py | 70 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index eee800d..7ad6108 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -35,9 +35,9 @@ import geosanity as san PID_FILE = "/tmp/centinel.lock" -# log_file = 'log_vpn.log' -# logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", -# filename=log_file ) +log_file = 'log_vpn.log' +logging.basicConfig(format="%(asctime)s %(filename)s:%(lineno)d %(levelname)s: %(message)s", + filename=log_file ) def parse_args(): parser = argparse.ArgumentParser() @@ -231,43 +231,45 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, (key, country_in_config) = line.split(': ') country_in_config = country_in_config.replace('\"', '').replace(',', '') country = None + meta = centinel.backend.get_meta(config.params, vp_ip) + # send country name to be converted to alpha2 code + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # country = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) + if 'country' in meta: + country = meta['country'] + # try setting the VPN info (IP and country) to get appropriate + # experiemnts and input data. try: - meta = centinel.backend.get_meta(config.params, vp_ip) - # send country name to be converted to alpha2 code - if (len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - # country = convertor.country_to_a2(country_in_config) - # some vpn config files already contain the alpha2 code (length == 2) - if 'country' in meta: - country = meta['country'] - # try setting the VPN info (IP and country) to get appropriate - # experiemnts and input data. - try: - logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vp_ip, country) - except Exception as exp: - logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - # sanity check - logging.info("%s: Starting VPN." % filename) - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, - crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) - vpn.start() - if not vpn.started: - logging.error("%s: Failed to start VPN!" % filename) - vpn.stop() - time.sleep(5) - continue - # sending ping to the anchors - ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, country, anchors) - # have to do this sanity check if timestamp is a certain value, needs changing - timestamp = time.time() - ping_result['timestamp'] = timestamp # Todo: ?? - logging.info("%s: Stopping VPN." % filename) + logging.info("country is %s" % country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) + except Exception as exp: + logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) + # sanity check + logging.info("%s: Starting VPN." % filename) + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + vpn.start() + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) vpn.stop() time.sleep(5) + continue + # sending ping to the anchors + try: + ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, country, anchors) + # have to do this sanity check if timestamp is a certain value, needs changing + timestamp = time.time() + ping_result['timestamp'] = timestamp except: logging.warning("Failed to send pings from %s" % vp_ip) + logging.info("%s: Stopping VPN." % filename) + vpn.stop() + time.sleep(5) + # sanity check + # return 0 failed_sanity_check = set() sanity_checked_set = set() error_sanity_check = set() From 35164b52beba83d6be62ad34901d402d6068ce62 Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 24 Oct 2017 11:32:53 -0400 Subject: [PATCH 49/75] speed up probing --- centinel/vpn/probe.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index ef05a60..aa93f9b 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -9,7 +9,6 @@ from datetime import timedelta from urllib import urlopen from bs4 import BeautifulSoup -#-d vpn_providers/ipvanish/ -u auth_file --crt-file ca.ipvanish.com.crt def get_anchor_list(directory): """Get a list of all RIPE Anchors @@ -140,24 +139,28 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ - logging.info("Start Probing") + logging.info("Start Probing (%s)" %target_name) pickle_path = os.path.join(sanity_directory,'pings') if not os.path.exists(pickle_path): os.makedirs(pickle_path) times = dict() s_time = time.time() results = [] - process_num = 6 + process_num = 25 pool = mp.Pool(processes=process_num) - results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors.iteritems()])) + results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors['anchors'].iteritems()])) + _sum = 0 + _total = 0 for output in results[0]: + _total += 1 for key, value in output.iteritems(): + _sum += len(value) if key not in times: times[key] = list() for this in value: times[key].append(this) e_time = time.time() - logging.info(e_time - s_time) + logging.info("Finish Probing (%s): %s/10 (%sec)" %(target_name, _sum/float(_total), e_time-s_time)) pool.close() pool.join() final = {target_name: dict()} From f98dcfc461f53ec0bc08b47a6a296dd139943070 Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 24 Oct 2017 11:51:02 -0400 Subject: [PATCH 50/75] move getting gps part to probe.py --- centinel/vpn/cli.py | 2 +- centinel/vpn/geosanity.py | 32 -------------------------------- centinel/vpn/probe.py | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 7ad6108..d6b154d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -185,7 +185,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, anchors = probe.get_anchor_list(sanity_path) logging.info("Anchors list fetched") # get anchor's gps - anchors_gps = san.get_gps_of_anchors(anchors, sanity_path) + anchors_gps = probe.get_gps_of_anchors(anchors, sanity_path) logging.info("Anchors gps fetched") # get a world map from shapefile shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index e766f48..ca5f647 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -68,38 +68,6 @@ def load_map_from_shapefile(shapefile): map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] return map -def get_gps_of_anchors(anchors, directory): - """ - Get gps of all anchors - Note: geopy library has a limitation for query in a certain time. - While testing, better to store the query results so that we can reduce the number of query. - """ - logging.info("Starting to get RIPE anchors' gps") - anchors_gps = dict() - count = 0 - try: - with open(os.path.join(directory, "gps_of_anchors.pickle"), "r") as f: - anchors_gps = pickle.load(f) - except: - logging.info("gps_of_anchors.pickle is not existed") - for anchor, item in anchors.iteritems(): - count += 1 - logging.info( - "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) - geolocator = Nominatim() - try: - location = geolocator.geocode(item['city'] + ' ' + item['country'], timeout=10) - if location == None: - location = geolocator.geocode(item['country'], timeout=10) - if location == None: - logging.info("Fail to read gps of %s/%s" %(anchor, item['city'] + ' ' + item['country'])) - anchors_gps[anchor] = (location.latitude, location.longitude) - except GeocoderTimedOut as e: - logging.info("Error geocode failed: %s" %(e)) - with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: - pickle.dump(anchors_gps, f) - return anchors_gps - class Checker: def __init__(self, proxy_id, iso, path): self.proxy_id = proxy_id diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index aa93f9b..7c75b6d 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -6,6 +6,8 @@ import time import subprocess import multiprocessing as mp +from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut from datetime import timedelta from urllib import urlopen from bs4 import BeautifulSoup @@ -117,6 +119,38 @@ def get_anchor_list(directory): except (TypeError, ValueError, UnicodeError) as e: sys.exit(1) +def get_gps_of_anchors(anchors, directory): + """ + Get gps of all anchors + Note: geopy library has a limitation for query in a certain time. + While testing, better to store the query results so that we can reduce the number of query. + """ + logging.info("Starting to get RIPE anchors' gps") + anchors_gps = dict() + count = 0 + try: + with open(os.path.join(directory, "gps_of_anchors.pickle"), "r") as f: + anchors_gps = pickle.load(f) + except: + logging.info("gps_of_anchors.pickle is not existed") + for anchor, item in anchors.iteritems(): + count += 1 + logging.info( + "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) + geolocator = Nominatim() + try: + location = geolocator.geocode(item['city'] + ' ' + item['country'], timeout=10) + if location == None: + location = geolocator.geocode(item['country'], timeout=10) + if location == None: + logging.info("Fail to read gps of %s/%s" %(anchor, item['city'] + ' ' + item['country'])) + anchors_gps[anchor] = (location.latitude, location.longitude) + except GeocoderTimedOut as e: + logging.info("Error geocode failed: %s" %(e)) + with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: + pickle.dump(anchors_gps, f) + return anchors_gps + def send_ping(param): this_host, ip = param logging.info("Pinging (%s, %s)" % (this_host, ip)) @@ -160,7 +194,7 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho for this in value: times[key].append(this) e_time = time.time() - logging.info("Finish Probing (%s): %s/10 (%sec)" %(target_name, _sum/float(_total), e_time-s_time)) + logging.info("Finish Probing (%s): average %s/10 (%sec)" %(target_name, _sum/float(_total), e_time-s_time)) pool.close() pool.join() final = {target_name: dict()} From cafc62c61b6b15e376b6d18305ef927dc33a90b5 Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 24 Oct 2017 14:53:19 -0400 Subject: [PATCH 51/75] use RIPE API to get an anchors list --- centinel/vpn/probe.py | 183 ++++++++++-------------------------------- 1 file changed, 41 insertions(+), 142 deletions(-) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 7c75b6d..66c9d3f 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -1,155 +1,53 @@ import os -import sys -import csv import logging import pickle import time import subprocess import multiprocessing as mp -from geopy.geocoders import Nominatim -from geopy.exc import GeocoderTimedOut from datetime import timedelta -from urllib import urlopen -from bs4 import BeautifulSoup +import requests +from urlparse import urljoin -def get_anchor_list(directory): - """Get a list of all RIPE Anchors - :return: anchors [hostname]:dict() "probe" - "city" - "country" - "ip" - "asn" +def retrieve_anchor_list(directory): + """ Retrieve anchor lists with RIPE API """ logging.info("Starting to fetch RIPE anchors") - landmark_path = os.path.join(directory,"landmarks_list.pickle") - try: + landmark_path = os.path.join(directory, "landmarks_list.pickle") + if os.path.isfile(landmark_path): with open(landmark_path, "r") as f: - anchors = pickle.load(f) - if 'timestamp' in anchors.keys(): - if (time.time() - anchors['timestamp']) <= timedelta(days=30).total_seconds(): - return anchors['anchors'] - else: - logging.info("List of anchors is expired.") - try: - file_path = os.path.join(directory, 'landmarks_list.pickle') - if os.path.isfile(file_path): - os.remove(file_path) - file_path = os.path.join(directory, 'RIPE_anchor_list.csv') - if os.path.isfile(file_path): - os.remove(file_path) - file_path = os.path.join(directory, 'gps_of_anchors.pickle') - if os.path.isfile(file_path): - os.remove(file_path) - except: - logging.info("Fail to delete expired files of anchors.") - pass - else: return anchors - except: - logging.info("landmarks_list.pickle is not existed") - try: - # sys.stderr.write("Retrieving landmark list...") - logging.info("landmarks_list pickle is not available, starting to fetch it") - anchors = dict() - try: - ## you can get "RIPE_anchor_list.csv" by crawling RIPE first page of anchors (table) - ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') - with open(ripe_path, "r") as f: - reader = csv.reader(f) - for row in reader: - if row[0] == 'Hostname': - continue - anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - except: - logging.info("RIPE_anchor list is not available, starting to fetch it") - # parsing ripe anchor website - reload(sys) - sys.setdefaultencoding('utf-8') - html = urlopen('https://atlas.ripe.net/anchors/list/').read() - soup = BeautifulSoup(html,"html.parser") - ripe_records = (soup.find_all('tr')) - all_records = [] - for record in ripe_records: - columns = record.find_all('td') - rec = [] - for column in columns: - soup_column = BeautifulSoup(str(column),"html.parser") - # rec.append('\"' + soup_column.td.text.strip().replace('\n','') + '\"') - rec.append(soup_column.td.text.strip().replace('\n','')) - if(len(rec) > 0): - all_records.append(rec) - ripe_path = os.path.join(directory,'RIPE_anchor_list.csv') - with open(ripe_path,'w') as f: - csvwriter = csv.writer(f) - csvwriter.writerow(('Hostname','Probe','Company','City','Country')) - for sublist in all_records: - csvwriter.writerow((sublist[0], sublist[1], sublist[3].split(' ')[0], sublist[4], sublist[5])) - logging.info("Creating RIPE_anchor list") - with open(ripe_path, "r") as f: - reader = csv.reader(f) - for row in reader: - if row[0] == 'Hostname': - continue - anchors[row[0]] = {'probe': row[1], 'city': row[3], 'country': row[4], 'ip': str(), 'asn': str()} - logging.info("Finished extracting RIPE anchors from file.") - count = 0 - for key, value in anchors.iteritems(): - count += 1 - logging.info("Retrieving anchor %s, %s/%s" % (value['probe'], count, len(anchors))) - url = 'https://atlas.ripe.net/probes/' + str(value['probe']) + '/#!tab-network/' - try: - html = urlopen(url).read() - soup = BeautifulSoup(html,"html.parser") - for script in soup(["script", "style"]): - script.extract() - text = soup.get_text() - lines = (line.strip() for line in text.splitlines()) - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - text = '\n'.join(chunk for chunk in chunks if chunk) - s_text = text.encode('utf-8').split('\n') - index = s_text.index("Internet Address") - anchors[key]['ip'] = str(s_text[index+1]) - anchors[key]['asn'] = str(s_text[s_text.index("ASN")+1]) - except: - logging.exception("Connection reset by Peer on %s" % (url)) - timestamp = time.time() - ripe_anchors = {'timestamp': timestamp, 'anchors': anchors} - with open(landmark_path, "w") as f: - pickle.dump(ripe_anchors, f) - return anchors - except (TypeError, ValueError, UnicodeError) as e: - sys.exit(1) + json_data = pickle.load(f) + if (time.time() - json_data['timestamp']) <= timedelta(days=30).total_seconds(): + return json_data['anchors'] + logging.info("landmarks_list pickle is not available or expired, starting to fetch it.") + s_time = time.time() + BASE_URL = 'https://atlas.ripe.net/api/v2' + query_url = BASE_URL + '/anchors/' + anchors = dict() + while True: + resp = requests.get(query_url) + temp = resp.json() + for this in temp['results']: + assert this['geometry']['type'] == "Point" + anchor_name = this['fqdn'].split('.')[0].strip() + anchors[anchor_name] = {'aid': this["id"], + 'pid': this["probe"], + 'ip_v4': this["ip_v4"], + 'asn_v4': this["as_v4"], + 'longitude': this["geometry"]["coordinates"][0], + 'latitude': this["geometry"]["coordinates"][1], + 'country': this["country"], + 'city': this["city"]} + next_url = temp.get("next") + if next_url is None: + break + query_url = urljoin(query_url, next_url) + ripe_anchors = {'timestamp': time.time(), 'anchors': anchors} + with open(landmark_path, "w") as f: + pickle.dump(ripe_anchors, f) + e_time = time.time() + logging.info("Finishing to fetch RIPE anchors (%s sec)" %(e_time-s_time)) + return anchors -def get_gps_of_anchors(anchors, directory): - """ - Get gps of all anchors - Note: geopy library has a limitation for query in a certain time. - While testing, better to store the query results so that we can reduce the number of query. - """ - logging.info("Starting to get RIPE anchors' gps") - anchors_gps = dict() - count = 0 - try: - with open(os.path.join(directory, "gps_of_anchors.pickle"), "r") as f: - anchors_gps = pickle.load(f) - except: - logging.info("gps_of_anchors.pickle is not existed") - for anchor, item in anchors.iteritems(): - count += 1 - logging.info( - "Retrieving... %s(%s/%s): %s" % (anchor, count, len(anchors), item['city'] + ' ' + item['country'])) - geolocator = Nominatim() - try: - location = geolocator.geocode(item['city'] + ' ' + item['country'], timeout=10) - if location == None: - location = geolocator.geocode(item['country'], timeout=10) - if location == None: - logging.info("Fail to read gps of %s/%s" %(anchor, item['city'] + ' ' + item['country'])) - anchors_gps[anchor] = (location.latitude, location.longitude) - except GeocoderTimedOut as e: - logging.info("Error geocode failed: %s" %(e)) - with open(os.path.join(directory, "gps_of_anchors.pickle"), "w") as f: - pickle.dump(anchors_gps, f) - return anchors_gps def send_ping(param): this_host, ip = param @@ -169,12 +67,13 @@ def send_ping(param): times[this_host] = this_delays return times + def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ logging.info("Start Probing (%s)" %target_name) - pickle_path = os.path.join(sanity_directory,'pings') + pickle_path = os.path.join(sanity_directory, 'pings') if not os.path.exists(pickle_path): os.makedirs(pickle_path) times = dict() @@ -182,7 +81,7 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho results = [] process_num = 25 pool = mp.Pool(processes=process_num) - results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors['anchors'].iteritems()])) + results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors.iteritems()])) _sum = 0 _total = 0 for output in results[0]: From 52437ebec957542ab9afc71dfd20a1b385ca5cef Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 24 Oct 2017 14:56:52 -0400 Subject: [PATCH 52/75] change cli.py to run ripe api --- centinel/vpn/cli.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index d6b154d..4c94a8d 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -177,16 +177,13 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: # start_time = time.time() - # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + # create a directory to store the RIPE anchor list in it so other vpns could use it as well sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): os.makedirs(sanity_path) # fetch the list of RIPE anchors - anchors = probe.get_anchor_list(sanity_path) + anchors = probe.retrieve_anchor_list(sanity_path) logging.info("Anchors list fetched") - # get anchor's gps - anchors_gps = probe.get_gps_of_anchors(anchors, sanity_path) - logging.info("Anchors gps fetched") # get a world map from shapefile shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" if not os.path.exists(shapefile): @@ -285,7 +282,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, count = 0 pool = multiprocessing.Pool(processes=num) for vp_ip, country, tag in pool.imap_unordered(san.run_checker, - ((this_file, anchors_gps, map, sanity_path, pickle_path) for + ((this_file, anchors, map, sanity_path, pickle_path) for this_file in file_lists), chunksize=1): if tag == -1: From 653bc62957acf3d98d8e1ab2c2c6015fff2f8e6d Mon Sep 17 00:00:00 2001 From: shicho Date: Tue, 24 Oct 2017 18:08:15 -0400 Subject: [PATCH 53/75] clean up probe part --- centinel/vpn/cli.py | 59 +++++++++++---------------------------- centinel/vpn/geosanity.py | 20 ++++++++++++- centinel/vpn/probe.py | 21 ++++++-------- 3 files changed, 44 insertions(+), 56 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 4c94a8d..0ce1356 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -13,10 +13,6 @@ import dns.resolver import json import pickle -import urllib2 -import zipfile -import requests -import StringIO import socket import shutil import multiprocessing @@ -176,7 +172,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: - # start_time = time.time() + start_time = time.time() # create a directory to store the RIPE anchor list in it so other vpns could use it as well sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): @@ -184,55 +180,32 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # fetch the list of RIPE anchors anchors = probe.retrieve_anchor_list(sanity_path) logging.info("Anchors list fetched") - # get a world map from shapefile - shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" - if not os.path.exists(shapefile): - logging.info("Shape file does not exist, Downloading from server") - shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' - logging.info("Starting to download map shape file zip") - try: - r = requests.get(shapefile_url, stream=True) - z = zipfile.ZipFile(StringIO.StringIO(r.content)) - z.extractall(sanity_path) - logging.info("Map shape file downloaded") - except Exception as exp: - logging.error("Could not fetch map file : %s" % str(exp)) - map = san.load_map_from_shapefile(shapefile) for filename in conf_list: + vpn_config = os.path.join(vpn_dir, filename) centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) # get ip address of hostnames hostname = os.path.splitext(filename)[0] - vp_ip = "unknown" try: vp_ip = socket.gethostbyname(hostname) except Exception as exp: logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) continue - vpn_config = os.path.join(vpn_dir, filename) - centinel_config = os.path.join(conf_dir, filename) - # assuming that each VPN config file has a name like: - # [ip-address].ovpn, we can extract IP address from filename - # and use it to geolocate and fetch experiments before connecting - # to VPN. - # filename is [hostname].ovpn, we resolved the hostname to ip - # using socket.gethostbyname() - vpn_address, extension = os.path.splitext(filename) - lines = [line.rstrip('\n') for line in open(centinel_config)] + # check if vp_ip is changed (when compared to ip in config file) + # if not changed, then we can use the current results of ping + sanity check + # otherwise, send ping again. # get country for this vpn + with open(centinel_config) as fc: + json_data = json.load(fc) country_in_config = "" - # reading the server.txt file in vpns folder - for line in lines: - if "country" in line: - (key, country_in_config) = line.split(': ') - country_in_config = country_in_config.replace('\"', '').replace(',', '') + if 'country' in json_data: + country_in_config = json_data['country'] country = None meta = centinel.backend.get_meta(config.params, vp_ip) # send country name to be converted to alpha2 code if (len(country_in_config) > 2): meta['country'] = convertor.country_to_a2(country_in_config) - # country = convertor.country_to_a2(country_in_config) # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] @@ -243,7 +216,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - # sanity check + # send pings logging.info("%s: Starting VPN." % filename) vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) @@ -255,18 +228,18 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, continue # sending ping to the anchors try: - ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, country, anchors) - # have to do this sanity check if timestamp is a certain value, needs changing - timestamp = time.time() - ping_result['timestamp'] = timestamp + ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) except: logging.warning("Failed to send pings from %s" % vp_ip) logging.info("%s: Stopping VPN." % filename) vpn.stop() time.sleep(5) + return 0 # sanity check # return 0 + # get a world map from shapefile + map = san.load_map_from_shapefile(sanity_path) failed_sanity_check = set() sanity_checked_set = set() error_sanity_check = set() @@ -326,8 +299,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, f.write(server + '\n') conf_list = list(sanity_checked_set) logging.info("List size after sanity check. New size: %d" % len(conf_list)) - # end_time = time.time() - start_time - # logging.info("Total elapsed time: %s" %end_time) + end_time = time.time() - start_time + logging.info("Total elapsed time: %s" %end_time) # # return 0 # reduce size of list if reduce_vp is true diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index ca5f647..b71b9a7 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -14,6 +14,12 @@ import pycountry from shapely.ops import transform as sh_transform from shapely.geometry import Point, Polygon, box as Box +import urllib2 +import zipfile +import requests +import StringIO + + def run_checker(args): return sanity_check(*args) @@ -57,12 +63,24 @@ def sanity_check(this_file, anchors_gps, map, directory, pickle_path): return "N/A", "N/A", -1 return proxy_id, iso_cnt, tag -def load_map_from_shapefile(shapefile): +def load_map_from_shapefile(sanity_path): """ Load all countries from shapefile (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') """ logging.info("Loading a shapefile for the world map") + shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" + if not os.path.exists(shapefile): + logging.info("Shape file does not exist, Downloading from server") + shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' + logging.info("Starting to download map shape file zip") + try: + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + logging.info("Map shape file downloaded") + except Exception as exp: + logging.error("Could not fetch map file : %s" % str(exp)) temp = GeoDataFrame.from_file(shapefile) # print temp.dtypes.index map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 66c9d3f..ab93f14 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -68,12 +68,12 @@ def send_ping(param): return times -def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, anchors): +def perform_probe(sanity_directory, vpn_provider, target_ip, hostname, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ - logging.info("Start Probing (%s)" %target_name) - pickle_path = os.path.join(sanity_directory, 'pings') + logging.info("Start Probing [%s(%s)]" %(hostname, target_ip)) + pickle_path = os.path.join(sanity_directory, 'pings/' + vpn_provider) if not os.path.exists(pickle_path): os.makedirs(pickle_path) times = dict() @@ -81,7 +81,7 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho results = [] process_num = 25 pool = mp.Pool(processes=process_num) - results.append(pool.map(send_ping, [(this_host, Param['ip']) for this_host, Param in anchors.iteritems()])) + results.append(pool.map(send_ping, [(this_host, Param['ip_v4']) for this_host, Param in anchors.iteritems()])) _sum = 0 _total = 0 for output in results[0]: @@ -93,16 +93,13 @@ def perform_probe(sanity_directory, vpn_provider, target_name, target_cnt, ancho for this in value: times[key].append(this) e_time = time.time() - logging.info("Finish Probing (%s): average %s/10 (%sec)" %(target_name, _sum/float(_total), e_time-s_time)) + logging.info("Finish Probing [%s(%s)]: average succeeded pings=%.2f/10 (%.2fsec)" + %(hostname, target_ip, _sum/float(_total), e_time - s_time)) pool.close() pool.join() - final = {target_name: dict()} - final[target_name]['pings'] = times - final[target_name]['cnt'] = target_cnt + final = {hostname: {'pings': times, 'cnt': target_cnt, 'ip_v4': target_ip}} logging.info("Creating pickle file") - # putting time as a part of the filename - time_unique = time.time() - with open(pickle_path + '/' + vpn_provider + '-' + target_name + '-' + target_cnt + '-' + str(time_unique) + '.pickle', 'w') as f: + with open(pickle_path+'/'+vpn_provider+'-'+hostname+'-'+target_ip+'-'+target_cnt+'.pickle', 'w') as f: pickle.dump(final, f) logging.info("Pickle file successfully created.") - return final + return final \ No newline at end of file From 2f7b845b984f50aefe785a33c0a4c561aba1e334 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 25 Oct 2017 16:17:22 -0400 Subject: [PATCH 54/75] clean up sanity check part --- centinel/vpn/cli.py | 98 ++++++++++++++------------------------- centinel/vpn/geosanity.py | 45 +++++++++--------- centinel/vpn/probe.py | 4 +- 3 files changed, 59 insertions(+), 88 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 0ce1356..1fab686 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -9,13 +9,15 @@ import os import time import sys +import csv import signal import dns.resolver import json import pickle import socket import shutil -import multiprocessing +import datetime +import multiprocessing as mp import centinel.backend import centinel.client @@ -173,15 +175,12 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # geolocation sanity check if sanity_check: start_time = time.time() - # create a directory to store the RIPE anchor list in it so other vpns could use it as well sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): os.makedirs(sanity_path) - # fetch the list of RIPE anchors anchors = probe.retrieve_anchor_list(sanity_path) logging.info("Anchors list fetched") for filename in conf_list: - vpn_config = os.path.join(vpn_dir, filename) centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) @@ -195,6 +194,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # check if vp_ip is changed (when compared to ip in config file) # if not changed, then we can use the current results of ping + sanity check # otherwise, send ping again. + # get country for this vpn with open(centinel_config) as fc: json_data = json.load(fc) @@ -216,7 +216,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - # send pings + + # start openvpn + vpn_config = os.path.join(vpn_dir, filename) logging.info("%s: Starting VPN." % filename) vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) @@ -228,80 +230,48 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, continue # sending ping to the anchors try: - ping_result = probe.perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) + probe.perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) except: logging.warning("Failed to send pings from %s" % vp_ip) logging.info("%s: Stopping VPN." % filename) vpn.stop() time.sleep(5) - return 0 # sanity check - # return 0 - # get a world map from shapefile + pickle_path = os.path.join(sanity_path, 'pings/' + vpn_provider) map = san.load_map_from_shapefile(sanity_path) - failed_sanity_check = set() - sanity_checked_set = set() - error_sanity_check = set() - vp_ip = 'unknown' - pickle_path = os.path.join(sanity_path, 'pings') file_lists = os.listdir(pickle_path) if file_lists: - num = 1 try: - num = multiprocessing.cpu_count() + num = mp.cpu_count() except (ImportError, NotImplementedError): + num = 1 pass - count = 0 - pool = multiprocessing.Pool(processes=num) - for vp_ip, country, tag in pool.imap_unordered(san.run_checker, - ((this_file, anchors, map, sanity_path, pickle_path) for - this_file in file_lists), - chunksize=1): - if tag == -1: - error_sanity_check.add(vp_ip + '-' + country) - elif tag == True: - sanity_checked_set.add(vp_ip + '-' + country) - else: - failed_sanity_check.add(vp_ip + '-' + country) - count += 1 - logging.info("Finishing.. (%s/%s)" % (count, len(file_lists))) - pool.terminate() + pool = mp.Pool(processes=num) + results = [] + results.append(pool.map(san.sanity_check, [(this_file, anchors, map, sanity_path, pickle_path) + for this_file in file_lists])) + pool.close() pool.join() - for worker in pool._pool: - assert not worker.is_alive() - for this_file in file_lists: - try: - vp_ip = this_file.split('-')[0] - country = this_file.split('-')[1] - with open(os.path.join(pickle_path, this_file), 'r') as f: - ping_result = pickle.load(f) - tag = san.sanity_check(vp_ip, country, ping_result[vp_ip]['pings'], anchors_gps, map, - sanity_path) - if tag == -1: - error_sanity_check.add(vp_ip + '-' + country) - elif tag == True: - sanity_checked_set.add(vp_ip + '-' + country) - else: - failed_sanity_check.add(vp_ip + '-' + country) - except: - logging.warning("Failed to sanity check %s" % vp_ip) - time_unique = time.time() - with open(os.path.join(sanity_path, 'results-of-sanity-check' + str(time_unique) + '.txt'), 'w') as f: - f.write("Pass\n") - for server in sanity_checked_set: - f.write(server + '\n') - f.write("Fail\n") - for server in failed_sanity_check: - f.write(server + '\n') - f.write("Error\n") - for server in error_sanity_check: - f.write(server + '\n') - conf_list = list(sanity_checked_set) - logging.info("List size after sanity check. New size: %d" % len(conf_list)) + new_conf_list = [] + result_path = os.path.join(sanity_path, 'results/' + vpn_provider) + if not os.path.exists(result_path): + os.makedirs(result_path) + current_time = datetime.date.today().strftime("%Y-%m-%d") + with open(os.path.join(result_path, vpn_provider + '-' + current_time + '.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerow(('proxy_name', 'country', 'truth')) + for output in results: + for proxy_name, iso_cnt, tag in output: + if tag == True: + new_conf_list.append(proxy_name + '.ovpn') + writer.writerow((proxy_name, iso_cnt, tag)) + logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) + conf_list = new_conf_list + end_time = time.time() - start_time - logging.info("Total elapsed time: %s" %end_time) - # # return 0 + logging.info("Finished sanity check: total elapsed time (%.2f)" %end_time) + # reduce size of list if reduce_vp is true if reduce_vp: diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index b71b9a7..3b0f858 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -24,7 +24,7 @@ def run_checker(args): return sanity_check(*args) -def sanity_check(this_file, anchors_gps, map, directory, pickle_path): +def sanity_check(args): """ :param proxy_id:(str) :param iso_cnt:(str) @@ -33,27 +33,27 @@ def sanity_check(this_file, anchors_gps, map, directory, pickle_path): :param map:(dataframe) :return: """ + this_file, anchors, map, sanity_path, pickle_path = args try: start_time = time.time() - proxy_id = this_file.split('-')[0] - iso_cnt = this_file.split('-')[1] - tag = -1 with open(os.path.join(pickle_path, this_file), 'r') as f: - ping_result = pickle.load(f) - ping_results = ping_result[proxy_id]['pings'] - checker = Checker(proxy_id, iso_cnt, directory) - # points = checker.check_ping_results(results, anchors_gps) - points = checker.check_ping_results(ping_results, anchors_gps) + json_data = pickle.load(f) + proxy_name = json_data.keys()[0] + iso_cnt = json_data[proxy_name]['cnt'] + pings = json_data[proxy_name]['pings'] + provider =json_data[proxy_name]['vpn_provider'] + checker = Checker(proxy_name, iso_cnt, sanity_path, provider) + points = checker.check_ping_results(pings, anchors) if len(points) == 0: - logging.info("No valid ping results for %s" % proxy_id) - return proxy_id, iso_cnt, -1 + logging.info("No valid ping results for %s" % proxy_name) + return proxy_name, iso_cnt, -1 logging.info("[%s] has %s valid anchors' results (valid pings) from %s anchors" - %(proxy_id, len(points), len(ping_results))) + %(proxy_name, len(points), len(pings))) circles = checker.get_anchors_region(points) proxy_region = checker.get_vpn_region(map) if proxy_region.empty: - logging.info("[%s] Fail to get proxy region: %s" % (proxy_id, iso_cnt)) - return proxy_id, iso_cnt, -1 + logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) + return proxy_name, iso_cnt, -1 results = checker.check_overlap(proxy_region, circles, this_file) tag = checker.is_valid(results) end_time = time.time() - start_time @@ -61,7 +61,7 @@ def sanity_check(this_file, anchors_gps, map, directory, pickle_path): except: logging.warning("[%s] Failed to sanity check" % this_file) return "N/A", "N/A", -1 - return proxy_id, iso_cnt, tag + return proxy_name, iso_cnt, tag def load_map_from_shapefile(sanity_path): """ @@ -69,7 +69,7 @@ def load_map_from_shapefile(sanity_path): (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') """ logging.info("Loading a shapefile for the world map") - shapefile = sanity_path + "/ne_10m_admin_0_countries.shp" + shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") if not os.path.exists(shapefile): logging.info("Shape file does not exist, Downloading from server") shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' @@ -87,7 +87,8 @@ def load_map_from_shapefile(sanity_path): return map class Checker: - def __init__(self, proxy_id, iso, path): + def __init__(self, proxy_id, iso, path, vpn_provider): + self.vpn_provider = vpn_provider self.proxy_id = proxy_id self.iso = iso self.gps = self._get_gps_of_proxy() @@ -216,11 +217,10 @@ def check_overlap(self, proxy_region, circles, ping_filename): area_overlap = sum(area_overlap.tolist()) stack = area_overlap/area_cnt results.append((True, stack)) - pickle_path = os.path.join(self.path, 'sanity') + pickle_path = os.path.join(self.path, 'sanity/'+self.vpn_provider) if not os.path.exists(pickle_path): os.makedirs(pickle_path) - time_unique = time.time() - with open(pickle_path + '/' + ping_filename, 'w') as f: + with open(os.path.join(pickle_path, ping_filename), 'w') as f: pickle.dump(results, f) logging.info("Pickle file successfully created.") return results @@ -265,9 +265,10 @@ def check_ping_results(self, results, anchors_gps): continue # calculate the distance(km) between proxy and anchor distance = 0 + anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) if len(self.gps) != 0: - distance = vincenty(anchors_gps[anchor], self.gps).km - points.append((distance, min_delay, anchors_gps[anchor][0], anchors_gps[anchor][1], radi)) + distance = vincenty(anchor_gps, self.gps).km + points.append((distance, min_delay, anchor_gps[0], anchor_gps[1], radi)) if len(points) == 0: logging.debug("no valid pings results") return [] diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index ab93f14..bed3e7b 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -97,9 +97,9 @@ def perform_probe(sanity_directory, vpn_provider, target_ip, hostname, target_cn %(hostname, target_ip, _sum/float(_total), e_time - s_time)) pool.close() pool.join() - final = {hostname: {'pings': times, 'cnt': target_cnt, 'ip_v4': target_ip}} + final = {hostname: {'pings': times, 'cnt': target_cnt, 'ip_v4': target_ip, + 'timestamp': time.time(), 'vpn_provider': vpn_provider}} logging.info("Creating pickle file") with open(pickle_path+'/'+vpn_provider+'-'+hostname+'-'+target_ip+'-'+target_cnt+'.pickle', 'w') as f: pickle.dump(final, f) logging.info("Pickle file successfully created.") - return final \ No newline at end of file From 13c32c0c6084edb886dec96bac2e08c98e2a8dc4 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 25 Oct 2017 16:31:51 -0400 Subject: [PATCH 55/75] move sending pings to probe.py --- centinel/vpn/cli.py | 74 ++++++------------------------------------- centinel/vpn/probe.py | 74 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 68 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 1fab686..d4fa6fc 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -13,7 +13,6 @@ import signal import dns.resolver import json -import pickle import socket import shutil import datetime @@ -180,63 +179,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, os.makedirs(sanity_path) anchors = probe.retrieve_anchor_list(sanity_path) logging.info("Anchors list fetched") - for filename in conf_list: - centinel_config = os.path.join(conf_dir, filename) - config = centinel.config.Configuration() - config.parse_config(centinel_config) - # get ip address of hostnames - hostname = os.path.splitext(filename)[0] - try: - vp_ip = socket.gethostbyname(hostname) - except Exception as exp: - logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) - continue - # check if vp_ip is changed (when compared to ip in config file) - # if not changed, then we can use the current results of ping + sanity check - # otherwise, send ping again. - - # get country for this vpn - with open(centinel_config) as fc: - json_data = json.load(fc) - country_in_config = "" - if 'country' in json_data: - country_in_config = json_data['country'] - country = None - meta = centinel.backend.get_meta(config.params, vp_ip) - # send country name to be converted to alpha2 code - if (len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - # some vpn config files already contain the alpha2 code (length == 2) - if 'country' in meta: - country = meta['country'] - # try setting the VPN info (IP and country) to get appropriate - # experiemnts and input data. - try: - logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vp_ip, country) - except Exception as exp: - logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - - # start openvpn - vpn_config = os.path.join(vpn_dir, filename) - logging.info("%s: Starting VPN." % filename) - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, - crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) - vpn.start() - if not vpn.started: - logging.error("%s: Failed to start VPN!" % filename) - vpn.stop() - time.sleep(5) - continue - # sending ping to the anchors - try: - probe.perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) - except: - logging.warning("Failed to send pings from %s" % vp_ip) - logging.info("%s: Stopping VPN." % filename) - vpn.stop() - time.sleep(5) - + # send pings + probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, + key_direction, sanity_path, vpn_provider, anchors) # sanity check pickle_path = os.path.join(sanity_path, 'pings/' + vpn_provider) map = san.load_map_from_shapefile(sanity_path) @@ -249,8 +194,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, pass pool = mp.Pool(processes=num) results = [] - results.append(pool.map(san.sanity_check, [(this_file, anchors, map, sanity_path, pickle_path) - for this_file in file_lists])) + results.append(pool.map(san.sanity_check, + [(this_file, anchors, map, sanity_path, pickle_path) for this_file in file_lists])) pool.close() pool.join() new_conf_list = [] @@ -268,7 +213,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, writer.writerow((proxy_name, iso_cnt, tag)) logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) conf_list = new_conf_list - end_time = time.time() - start_time logging.info("Finished sanity check: total elapsed time (%.2f)" %end_time) @@ -285,10 +229,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, hostname = os.path.splitext(filename)[0] vp_ip = "unknown" try: - vp_ip = socket.gethostbyname(hostname) - except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) - continue + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue # get country for this vpn country_in_config = "" diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index bed3e7b..2e3576e 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -1,13 +1,19 @@ import os -import logging -import pickle import time +import json +import pickle +import socket +import logging +import requests import subprocess import multiprocessing as mp from datetime import timedelta -import requests from urlparse import urljoin +import country_module as convertor +import centinel.backend +import centinel.vpn.openvpn as openvpn + def retrieve_anchor_list(directory): """ Retrieve anchor lists with RIPE API """ @@ -103,3 +109,65 @@ def perform_probe(sanity_directory, vpn_provider, target_ip, hostname, target_cn with open(pickle_path+'/'+vpn_provider+'-'+hostname+'-'+target_ip+'-'+target_cnt+'.pickle', 'w') as f: pickle.dump(final, f) logging.info("Pickle file successfully created.") + + +def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, + key_direction, sanity_path, vpn_provider, anchors): + """ Run vpn_walk to get pings from proxy to anchors + """ + for filename in conf_list: + centinel_config = os.path.join(conf_dir, filename) + config = centinel.config.Configuration() + config.parse_config(centinel_config) + # get ip address of hostnames + hostname = os.path.splitext(filename)[0] + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) + continue + # check if vp_ip is changed (when compared to ip in config file) + # if not changed, then we can use the current results of ping + sanity check + # otherwise, send ping again. + + # get country for this vpn + with open(centinel_config) as fc: + json_data = json.load(fc) + country_in_config = "" + if 'country' in json_data: + country_in_config = json_data['country'] + country = None + meta = centinel.backend.get_meta(config.params, vp_ip) + # send country name to be converted to alpha2 code + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) + if 'country' in meta: + country = meta['country'] + # try setting the VPN info (IP and country) to get appropriate + # experiemnts and input data. + try: + logging.info("country is %s" % country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) + except Exception as exp: + logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) + + # start openvpn + vpn_config = os.path.join(vpn_dir, filename) + logging.info("%s: Starting VPN." % filename) + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + vpn.start() + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) + vpn.stop() + time.sleep(5) + continue + # sending ping to the anchors + try: + perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) + except: + logging.warning("Failed to send pings from %s" % vp_ip) + logging.info("%s: Stopping VPN." % filename) + vpn.stop() + time.sleep(5) From ef52c1ae40d65f63b318ed02a66039d1feebe610 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 25 Oct 2017 21:38:46 -0400 Subject: [PATCH 56/75] remove unnecessary config addition part --- centinel/vpn/cli.py | 15 ++++++--------- centinel/vpn/probe.py | 25 +++++-------------------- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index d4fa6fc..3b0fbb1 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -227,26 +227,23 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, config = centinel.config.Configuration() config.parse_config(centinel_config) hostname = os.path.splitext(filename)[0] - vp_ip = "unknown" try: vp_ip = socket.gethostbyname(hostname) except Exception as exp: logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) continue - # get country for this vpn + with open(centinel_config) as fc: + json_data = json.load(fc) country_in_config = "" - # reading the server.txt file in vpns folder - for line in lines: - if "country" in line: - (key, country_in_config) = line.split(': ') - country_in_config = country_in_config.replace('\"', '').replace(',', '') + if 'country' in json_data: + country_in_config = json_data['country'] try: meta = centinel.backend.get_meta(config.params, vp_ip) + # send country name to be converted to alpha2 code if (len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - + meta['country'] = convertor.country_to_a2(country_in_config) if 'country' in meta and 'as_number' in meta \ and meta['country'] and meta['as_number']: country_asn = '_'.join([meta['country'], meta['as_number']]) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 2e3576e..f07b678 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -115,6 +115,7 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, key_direction, sanity_path, vpn_provider, anchors): """ Run vpn_walk to get pings from proxy to anchors """ + start_time = time.time() for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() @@ -126,35 +127,17 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, except Exception as exp: logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) continue - # check if vp_ip is changed (when compared to ip in config file) - # if not changed, then we can use the current results of ping + sanity check - # otherwise, send ping again. - # get country for this vpn with open(centinel_config) as fc: json_data = json.load(fc) country_in_config = "" if 'country' in json_data: country_in_config = json_data['country'] - country = None - meta = centinel.backend.get_meta(config.params, vp_ip) - # send country name to be converted to alpha2 code if (len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - # some vpn config files already contain the alpha2 code (length == 2) - if 'country' in meta: - country = meta['country'] - # try setting the VPN info (IP and country) to get appropriate - # experiemnts and input data. - try: - logging.info("country is %s" % country) - centinel.backend.set_vpn_info(config.params, vp_ip, country) - except Exception as exp: - logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) - + country = convertor.country_to_a2(country_in_config) # start openvpn vpn_config = os.path.join(vpn_dir, filename) - logging.info("%s: Starting VPN." % filename) + logging.info("%s: Starting VPN. (%s)" %(filename, country)) vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) vpn.start() @@ -171,3 +154,5 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, logging.info("%s: Stopping VPN." % filename) vpn.stop() time.sleep(5) + end_time = time.time() - start_time + logging.info("Finished all probing: %.2fsec" %(end_time)) \ No newline at end of file From c92ad07f69d6145d454bbc021670de8ac659647f Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 27 Oct 2017 15:19:09 -0400 Subject: [PATCH 57/75] collect info for analysis --- centinel/vpn/geosanity.py | 61 ++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 3b0f858..f39665e 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -2,6 +2,7 @@ import logging import os import time +import csv import pickle import matplotlib matplotlib.use('Agg') @@ -14,16 +15,10 @@ import pycountry from shapely.ops import transform as sh_transform from shapely.geometry import Point, Polygon, box as Box -import urllib2 import zipfile import requests import StringIO - - -def run_checker(args): - return sanity_check(*args) - def sanity_check(args): """ :param proxy_id:(str) @@ -42,22 +37,23 @@ def sanity_check(args): iso_cnt = json_data[proxy_name]['cnt'] pings = json_data[proxy_name]['pings'] provider =json_data[proxy_name]['vpn_provider'] - checker = Checker(proxy_name, iso_cnt, sanity_path, provider) + proxy_ip = json_data[proxy_name]['ip_v4'] + checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) points = checker.check_ping_results(pings, anchors) if len(points) == 0: logging.info("No valid ping results for %s" % proxy_name) return proxy_name, iso_cnt, -1 - logging.info("[%s] has %s valid anchors' results (valid pings) from %s anchors" + logging.info("[%s] has %s valid pings from %s anchors" %(proxy_name, len(points), len(pings))) circles = checker.get_anchors_region(points) proxy_region = checker.get_vpn_region(map) if proxy_region.empty: logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) return proxy_name, iso_cnt, -1 - results = checker.check_overlap(proxy_region, circles, this_file) + results = checker.check_overlap(proxy_region, circles, this_file, anchors) tag = checker.is_valid(results) end_time = time.time() - start_time - logging.info("[%s] How long it takes: %s" % (this_file, end_time)) + logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) except: logging.warning("[%s] Failed to sanity check" % this_file) return "N/A", "N/A", -1 @@ -87,12 +83,13 @@ def load_map_from_shapefile(sanity_path): return map class Checker: - def __init__(self, proxy_id, iso, path, vpn_provider): + def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.vpn_provider = vpn_provider self.proxy_id = proxy_id self.iso = iso self.gps = self._get_gps_of_proxy() self.path = path + self.ip = ip def get_vpn_region(self, map): """ @@ -134,7 +131,7 @@ def get_anchors_region(self, points): https://github.com/zackw/active-geolocator Note that pyproj takes distances in meters & lon/lat order. """ - logging.info("Starting to draw anchors region") + # logging.info("Starting to draw anchors region") wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ## Sort based on distance. if there is no distance, then sort with min delay if points[0][0] != 0: @@ -143,7 +140,7 @@ def get_anchors_region(self, points): points.sort(key=lambda tup: tup[1]) #order of min time circles = list() count = 0 - for dist, min_delay, lat, lon, radi in points: + for dist, min_delay, lat, lon, radi, anchor_name in points: count += 1 # create azimuthal equidistant projector for each anchors aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', @@ -180,21 +177,21 @@ def get_anchors_region(self, points): disk = df3.geometry[0] assert disk.is_valid assert disk.contains(origin) - circles.append((lat, lon, radi, disk)) + circles.append((lat, lon, radi, disk, anchor_name, dist, min_delay)) except Exception as e: logging.debug("Fail to get a circle %s" %self.proxy_id) return circles - def check_overlap(self, proxy_region, circles, ping_filename): + def check_overlap(self, proxy_region, circles, ping_filename, anchors): """ Check overlap between proxy region and anchors' region. If there is an overlap check how much they are overlapped, otherwise, check how far the distance is from a proxy. :return results(list): if True: the percentage of overlapped area to a country False: the distance (km) between a country and expected range """ - logging.info("Starting to check overlap") + # logging.info("Starting to check overlap") results = list() - for lat, lon, radi, this_circle in circles: + for lat, lon, radi, this_circle, anchor_name, distance, min_delay in circles: df_anchor = geopandas.GeoDataFrame({'geometry': [this_circle]}) overlap = geopandas.overlay(proxy_region, df_anchor, how="intersection") if overlap.empty: @@ -208,21 +205,38 @@ def check_overlap(self, proxy_region, circles, ping_filename): ## min_distance azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m gap = azimu_anchor.distance(azimu_cnt) / 1000 #km - results.append((False, gap)) + results.append({'anchor_name': anchor_name, 'distanct': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': False, 'extra': gap, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) else: ## area area_cnt = proxy_region['geometry'].area#/10**6 #km/sqr area_cnt = sum(area_cnt.tolist()) area_overlap = overlap['geometry'].area#/10**6 #km/sqr area_overlap = sum(area_overlap.tolist()) - stack = area_overlap/area_cnt - results.append((True, stack)) + overlapped = area_overlap/area_cnt + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': True, 'extra': overlapped, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) pickle_path = os.path.join(self.path, 'sanity/'+self.vpn_provider) if not os.path.exists(pickle_path): os.makedirs(pickle_path) + with open(os.path.join(pickle_path, ping_filename+'.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerow(('proxy_name','proxy_ip','proxy_country','truth','extra', + 'anchor_name','anchor_ip','anchor_cnt','anchor_gps','distance','min_delay','radius')) + for this in results: + writer.writerow((this['proxy_name'],this['proxy_ip'],this['proxy_country'], + this['truth'],this['extra'], + this['anchor_name'],this['anchor_ip'],this['anchor_cnt'], + this['anchor_gps'],this['distance'],this['min_delay'],this['radius'])) with open(os.path.join(pickle_path, ping_filename), 'w') as f: pickle.dump(results, f) - logging.info("Pickle file successfully created.") + # logging.info("Pickle file successfully created.") return results def _calculate_radius(self, time_ms): @@ -245,7 +259,6 @@ def check_ping_results(self, results, anchors_gps): Otherwise, return latitude and longitude of vps, radius derived from ping delay. Return points(list): (lat, lon, radius) """ - logging.info("Starting checking ping results") points = list() for anchor, pings in results.iteritems(): valid_pings = list() @@ -268,7 +281,7 @@ def check_ping_results(self, results, anchors_gps): anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) if len(self.gps) != 0: distance = vincenty(anchor_gps, self.gps).km - points.append((distance, min_delay, anchor_gps[0], anchor_gps[1], radi)) + points.append((distance, min_delay, anchor_gps[0], anchor_gps[1], radi, anchor)) if len(points) == 0: logging.debug("no valid pings results") return [] @@ -279,7 +292,7 @@ def is_valid(self, results): Need reasonable threshold to answer the validation of location For now, we say it is valid if 90% of 30 nearest anchors are True """ - logging.info("checking validation") + # logging.info("checking validation") total = 0 count_valid = 0 limit = 30 From 8c98199e5de2d75abd1e99a965fb5ab9450913fd Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 27 Oct 2017 15:23:49 -0400 Subject: [PATCH 58/75] erro msq --- centinel/vpn/geosanity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index f39665e..c19a492 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -54,8 +54,8 @@ def sanity_check(args): tag = checker.is_valid(results) end_time = time.time() - start_time logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) - except: - logging.warning("[%s] Failed to sanity check" % this_file) + except Exception, e: + logging.warning("[%s] Failed to sanity check: %s" % (this_file, str(e))) return "N/A", "N/A", -1 return proxy_name, iso_cnt, tag From ff4348bcdd43b3ba792660416b656500d8e45187 Mon Sep 17 00:00:00 2001 From: shicho Date: Fri, 27 Oct 2017 16:06:21 -0400 Subject: [PATCH 59/75] fix minor errors --- centinel/vpn/geosanity.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index c19a492..caa46a9 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -204,8 +204,8 @@ def check_overlap(self, proxy_region, circles, ping_filename, anchors): proxy_region.geometry.item()) ## min_distance azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m - gap = azimu_anchor.distance(azimu_cnt) / 1000 #km - results.append({'anchor_name': anchor_name, 'distanct': distance, 'proxy_name': self.proxy_id, + gap = azimu_anchor.distance(azimu_cnt) / float(1000) #km + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, 'min_delay': min_delay, 'truth': False, 'extra': gap, 'anchor_gps': (lat, lon), 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), @@ -296,7 +296,9 @@ def is_valid(self, results): total = 0 count_valid = 0 limit = 30 - for valid, aux in results: + for this in results: + valid = this['truth'] + aux = this['extra'] total += 1 if valid: count_valid += 1 From ea8938f19dc4564e12444ccfc435343f9e547cda Mon Sep 17 00:00:00 2001 From: shicho Date: Thu, 19 Apr 2018 13:19:18 -0400 Subject: [PATCH 60/75] Fix indents --- centinel/vpn/cli.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 3b0fbb1..a9da512 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -192,6 +192,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, except (ImportError, NotImplementedError): num = 1 pass + # num = 10 pool = mp.Pool(processes=num) results = [] results.append(pool.map(san.sanity_check, @@ -216,7 +217,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, end_time = time.time() - start_time logging.info("Finished sanity check: total elapsed time (%.2f)" %end_time) - # reduce size of list if reduce_vp is true if reduce_vp: logging.info("Reducing list size. Original size: %d" % len(conf_list)) @@ -332,22 +332,22 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - # filename is [OBhostname].ovpn, we resolved the hostname to ip - # using socket.gethostbyname() + # filename is [OBhostname].ovpn, we resolved the hostname to ip + # using socket.gethostbyname() hostname = os.path.splitext(filename)[0] vp_ip = "unknown" try: - vp_ip = socket.gethostbyname(hostname) - except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) - continue + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + continue # vpn_address, extension = os.path.splitext(filename) lines = [line.rstrip('\n') for line in open(centinel_config)] # get country for this vpn country_in_config = "" - # reading the server.txt file in vpns folder + # reading the server.txt file in vpns folder for line in lines: if "country" in line: (key, country_in_config) = line.split(': ') @@ -356,13 +356,13 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, country = None try: - # we still might need some info from the Maximind query + # we still might need some info from the Maximind query meta = centinel.backend.get_meta(config.params, vp_ip) - # send country name to be converted to alpha2 code - if(len(country_in_config) > 2): - meta['country'] = convertor.country_to_a2(country_in_config) - # some vpn config files already contain the alpha2 code (length == 2) + # send country name to be converted to alpha2 code + if(len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] except: @@ -375,7 +375,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: - logging.info("country is %s" % country) + logging.info("country is %s" % country) centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -437,7 +437,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, continue - # sending ping to the anchors + # sending ping to the anchors # ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) # have to do this sanity check if timestamp is a certain value, needs changing From 31e03d095470761d705d9f608d7f23993fc95afa Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 29 Apr 2018 02:06:22 -0400 Subject: [PATCH 61/75] Creat configs for other VP --- centinel/vpn/expressvpn.py | 0 centinel/vpn/totalvpn.py | 0 centinel/vpn/tunnelbear.py | 74 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 centinel/vpn/expressvpn.py create mode 100644 centinel/vpn/totalvpn.py create mode 100644 centinel/vpn/tunnelbear.py diff --git a/centinel/vpn/expressvpn.py b/centinel/vpn/expressvpn.py new file mode 100644 index 0000000..e69de29 diff --git a/centinel/vpn/totalvpn.py b/centinel/vpn/totalvpn.py new file mode 100644 index 0000000..e69de29 diff --git a/centinel/vpn/tunnelbear.py b/centinel/vpn/tunnelbear.py new file mode 100644 index 0000000..393bd5d --- /dev/null +++ b/centinel/vpn/tunnelbear.py @@ -0,0 +1,74 @@ +#-c /home/katja/project/new_centinel/centinel/vpn_configs/expressvpn --create-expressvpn-configs +import os +import shutil +import logging +import hashlib +import pickle + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() + +def create_config_files(directory): + """ + Initialize directory ready for vpn walker + :param directory: the path where you want this to happen + :return: + """ + if not os.path.exists(directory): + os.makedirs(directory) + + #TODO: write a code to download credentials and config files from its site + orig_path = '/nfs/london/data2/shicho/proxy-configs-2018/ovpn.tbear-split' + server_country = {} + config_dict = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[1] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # add dns server + logging.info("Appending DNS update options") + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + message = hash_file(file_path) + config_dict[filename] = message + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') From 2b7e5dae06d2b17cecc88d5965e625d5e52c2d42 Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 29 Apr 2018 02:35:05 -0400 Subject: [PATCH 62/75] Fast enough not to store anchors lists --- centinel/vpn/probe.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index f07b678..936250f 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -14,25 +14,18 @@ import centinel.backend import centinel.vpn.openvpn as openvpn -def retrieve_anchor_list(directory): +def retrieve_anchor_list(): """ Retrieve anchor lists with RIPE API """ logging.info("Starting to fetch RIPE anchors") - landmark_path = os.path.join(directory, "landmarks_list.pickle") - if os.path.isfile(landmark_path): - with open(landmark_path, "r") as f: - json_data = pickle.load(f) - if (time.time() - json_data['timestamp']) <= timedelta(days=30).total_seconds(): - return json_data['anchors'] - logging.info("landmarks_list pickle is not available or expired, starting to fetch it.") s_time = time.time() BASE_URL = 'https://atlas.ripe.net/api/v2' query_url = BASE_URL + '/anchors/' anchors = dict() while True: resp = requests.get(query_url) - temp = resp.json() - for this in temp['results']: + resp = resp.json() + for this in resp['results']: assert this['geometry']['type'] == "Point" anchor_name = this['fqdn'].split('.')[0].strip() anchors[anchor_name] = {'aid': this["id"], @@ -43,18 +36,14 @@ def retrieve_anchor_list(directory): 'latitude': this["geometry"]["coordinates"][1], 'country': this["country"], 'city': this["city"]} - next_url = temp.get("next") + next_url = resp.get("next") if next_url is None: break query_url = urljoin(query_url, next_url) - ripe_anchors = {'timestamp': time.time(), 'anchors': anchors} - with open(landmark_path, "w") as f: - pickle.dump(ripe_anchors, f) e_time = time.time() logging.info("Finishing to fetch RIPE anchors (%s sec)" %(e_time-s_time)) return anchors - def send_ping(param): this_host, ip = param logging.info("Pinging (%s, %s)" % (this_host, ip)) @@ -130,11 +119,11 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, # get country for this vpn with open(centinel_config) as fc: json_data = json.load(fc) - country_in_config = "" + country = "" if 'country' in json_data: - country_in_config = json_data['country'] - if (len(country_in_config) > 2): - country = convertor.country_to_a2(country_in_config) + country = json_data['country'] + if (len(country) > 2): + country = convertor.country_to_a2(country) # start openvpn vpn_config = os.path.join(vpn_dir, filename) logging.info("%s: Starting VPN. (%s)" %(filename, country)) From d7e0ad97662de3946d5f5fccc522bf5f2adca36a Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 29 Apr 2018 04:36:20 -0400 Subject: [PATCH 63/75] Subtract ping time from local to vp --- centinel/vpn/cli.py | 115 ++++++++++++++++++++---------------------- centinel/vpn/probe.py | 43 ++++++++++------ 2 files changed, 81 insertions(+), 77 deletions(-) mode change 100755 => 100644 centinel/vpn/cli.py diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py old mode 100755 new mode 100644 index a9da512..9b48759 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -34,7 +34,8 @@ PID_FILE = "/tmp/centinel.lock" log_file = 'log_vpn.log' logging.basicConfig(format="%(asctime)s %(filename)s:%(lineno)d %(levelname)s: %(message)s", - filename=log_file ) + filename=log_file) + def parse_args(): parser = argparse.ArgumentParser() @@ -101,7 +102,6 @@ def parse_args(): g2.add_argument('--update-config', '-z', help=create_conf_help, dest='update_conf_dir') - # following args are used to support splitting clients among multiple VMs # each running vpn walker will use this to decide which portion of vpn # endpoints it should include @@ -113,7 +113,6 @@ def parse_args(): return parser.parse_args() - def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp, sanity_check): """ @@ -177,7 +176,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): os.makedirs(sanity_path) - anchors = probe.retrieve_anchor_list(sanity_path) + anchors = probe.retrieve_anchor_list() logging.info("Anchors list fetched") # send pings probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, @@ -192,7 +191,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, except (ImportError, NotImplementedError): num = 1 pass - # num = 10 pool = mp.Pool(processes=num) results = [] results.append(pool.map(san.sanity_check, @@ -230,7 +228,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, try: vp_ip = socket.gethostbyname(hostname) except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + logging.exception("Failed to resolve %s : %s" %(hostname, str(exp))) continue # get country for this vpn with open(centinel_config) as fc: @@ -260,7 +258,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, conf_list = list(reduced_conf_set) logging.info("List size reduced. New size: %d" % len(conf_list)) - # sort file list to ensure the same filename sequence in each VM conf_list = sorted(conf_list) @@ -326,8 +323,6 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, config = centinel.config.Configuration() config.parse_config(centinel_config) - - # assuming that each VPN config file has a name like: # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting @@ -339,10 +334,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, try: vp_ip = socket.gethostbyname(hostname) except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) + logging.exception("Failed to resolve %s : %s" %(hostname, str(exp))) continue -# vpn_address, extension = os.path.splitext(filename) + # vpn_address, extension = os.path.splitext(filename) lines = [line.rstrip('\n') for line in open(centinel_config)] # get country for this vpn @@ -351,16 +346,15 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, for line in lines: if "country" in line: (key, country_in_config) = line.split(': ') - country_in_config = country_in_config.replace('\"','').replace(',','') - + country_in_config = country_in_config.replace('\"', '').replace(',', '') country = None try: - # we still might need some info from the Maximind query + # we still might need some info from the Maximind query meta = centinel.backend.get_meta(config.params, vp_ip) # send country name to be converted to alpha2 code - if(len(country_in_config) > 2): + if (len(country_in_config) > 2): meta['country'] = convertor.country_to_a2(country_in_config) # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: @@ -414,18 +408,17 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, f.truncate() # before starting the vpn do the sanity check - # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well # sanity_path = os.path.join(directory,'../sanitycheck') # if not os.path.exists(sanity_path): # os.makedirs(sanity_path) - - # fetch the list of RIPE anchors + + # fetch the list of RIPE anchors # anchors = probe.get_anchor_list(sanity_path) # logging.info("Anchors list fetched") logging.info("%s: Starting VPN." % filename) - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) @@ -437,10 +430,10 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, continue - # sending ping to the anchors + # sending ping to the anchors # ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) - - # have to do this sanity check if timestamp is a certain value, needs changing + + # have to do this sanity check if timestamp is a certain value, needs changing # timestamp = time.time() # ping_result['timestamp'] = timestamp @@ -533,31 +526,31 @@ def update_config_files(directory, vp_list): new_vpn_dir = return_abs_path(directory, "updated_vpns") # read servers.txt to find the country associated with the ip - with open (vpn_dir+ '/servers.txt') as server_file: + with open(vpn_dir + '/servers.txt') as server_file: servers = server_file.readlines() for server_line in servers: server_line = (server_line.split('|')) - server_country[server_line[0]] = server_line[1].replace('\n','') + server_country[server_line[0]] = server_line[1].replace('\n', '') conf_dir = return_abs_path(directory, "configs") home_dirs = return_abs_path(directory, "home") # remove vps for vp in vp_list[0]: - os.remove(os.path.join(directory,"vpns/"+vp)) - shutil.rmtree(os.path.join(directory,"home/"+vp)) - os.remove(os.path.join(directory,"configs/"+vp)) + os.remove(os.path.join(directory, "vpns/" + vp)) + shutil.rmtree(os.path.join(directory, "home/" + vp)) + os.remove(os.path.join(directory, "configs/" + vp)) # update vps for vp in vp_list[1]: print('in update') - os.remove(os.path.join(directory,"vpns/"+vp)) - shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) + os.remove(os.path.join(directory, "vpns/" + vp)) + shutil.copyfile(os.path.join(directory, "updated_vpns/" + vp), os.path.join(directory, "vpns/" + vp)) # add vp for vp in vp_list[2]: - print(os.path.join(directory,"vpns/"+vp)) - shutil.copyfile(os.path.join(directory,"updated_vpns/"+vp), os.path.join(directory,"vpns/"+vp)) + print(os.path.join(directory, "vpns/" + vp)) + shutil.copyfile(os.path.join(directory, "updated_vpns/" + vp), os.path.join(directory, "vpns/" + vp)) configuration = centinel.config.Configuration() # setup the directories home_dir = os.path.join(home_dirs, vp) @@ -581,11 +574,12 @@ def update_config_files(directory, vp_list): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] - configuration.params['country'] = server_country[vp.replace('.ovpn','')] + configuration.params['country'] = server_country[vp.replace('.ovpn', '')] conf_file = os.path.join(conf_dir, vp) configuration.write_out_config(conf_file) shutil.rmtree(new_vpn_dir) + def create_config_files(directory, provider): """ For each VPN file in directory/vpns, create a new configuration @@ -605,21 +599,19 @@ def create_config_files(directory, provider): vpn_dir = return_abs_path(directory, "vpns") # read servers.txt to find the country associated with the ip - with open (vpn_dir+ '/servers.txt') as server_file: + with open(vpn_dir + '/servers.txt') as server_file: servers = server_file.readlines() for server_line in servers: server_line = (server_line.split('|')) - server_country[server_line[0]] = server_line[1].replace('\n','') - - + server_country[server_line[0]] = server_line[1].replace('\n', '') conf_dir = return_abs_path(directory, "configs") os.mkdir(conf_dir) home_dirs = return_abs_path(directory, "home") os.mkdir(home_dirs) for filename in os.listdir(vpn_dir): - if('servers' not in filename): + if ('servers' not in filename): configuration = centinel.config.Configuration() # setup the directories home_dir = os.path.join(home_dirs, filename) @@ -643,16 +635,16 @@ def create_config_files(directory, provider): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] - configuration.params['country'] = server_country[filename.replace('.ovpn','')] - hostname = os.path.splitext(filename)[0] - vp_ip = "unknown" - try: - vp_ip = socket.gethostbyname(hostname) - except Exception as exp: - logging.exception("Failed to resolve %s : %s" %(hostname,str(exp))) - configuration.params['custom_meta']['provider'] = provider - configuration.params['custom_meta']['hostname'] = hostname - configuration.params['custom_meta']['ip_address'] = vp_ip + configuration.params['country'] = server_country[filename.replace('.ovpn', '')] + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) + configuration.params['custom_meta']['provider'] = provider + configuration.params['custom_meta']['hostname'] = hostname + configuration.params['custom_meta']['ip_address'] = vp_ip conf_file = os.path.join(conf_dir, filename) configuration.write_out_config(conf_file) @@ -736,19 +728,19 @@ def _run(): if args.create_conf_dir: if args.create_HMA: hma_dir = return_abs_path(args.create_conf_dir, 'vpns') - provider = 'hma' + provider = 'hma' hma.create_config_files(hma_dir) elif args.create_IPVANISH: ipvanish_dir = return_abs_path(args.create_conf_dir, 'vpns') - provider = 'ipvanish' + provider = 'ipvanish' ipvanish.create_config_files(ipvanish_dir) elif args.create_PUREVPN: purevpn_dir = return_abs_path(args.create_conf_dir, 'vpns') - provider = 'purevpn' + provider = 'purevpn' purevpn.create_config_files(purevpn_dir) elif args.create_VPNGATE: vpngate_dir = return_abs_path(args.create_conf_dir, 'vpns') - provider = 'vpngate' + provider = 'vpngate' vpngate.create_config_files(vpngate_dir) # create the config files for the openvpn config files create_config_files(args.create_conf_dir, provider) @@ -756,19 +748,19 @@ def _run(): elif args.update_conf_dir: if args.update_HMA: hma_dir = return_abs_path(args.update_conf_dir, 'vpns') - provider = 'hma' + provider = 'hma' vp_list = hma.update_config_files(hma_dir) - if args.update_IPVANISH: - ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') - provdier = 'ipvanish' - vp_list = ipvanish.update_config_files(ipvanish_dir) - if args.update_PUREVPN: - purevpn_dir = return_abs_path(args.update_conf_dir, 'vpns') - provider = 'purevpn' - vp_list = purevpn.update_config_files(purevpn_dir) + if args.update_IPVANISH: + ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') + provdier = 'ipvanish' + vp_list = ipvanish.update_config_files(ipvanish_dir) + if args.update_PUREVPN: + purevpn_dir = return_abs_path(args.update_conf_dir, 'vpns') + provider = 'purevpn' + vp_list = purevpn.update_config_files(purevpn_dir) update_config_files(args.update_conf_dir, vp_list) - # add new ones + # add new ones else: # sanity check tls_auth and key_direction @@ -785,5 +777,6 @@ def _run(): vm_index=args.vm_index, reduce_vp=args.reduce_vp, sanity_check=args.sanity_check) + if __name__ == "__main__": run() diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 936250f..c405256 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -1,13 +1,12 @@ import os import time import json -import pickle +import csv import socket import logging import requests import subprocess import multiprocessing as mp -from datetime import timedelta from urlparse import urljoin import country_module as convertor @@ -56,21 +55,21 @@ def send_ping(param): this_delays = list() for i in output: try: - this_delays.append(i.split('time=')[1]) + this_delays.append(float(i.split('time=')[1].split(' ms')[0])) except: continue times[this_host] = this_delays return times - -def perform_probe(sanity_directory, vpn_provider, target_ip, hostname, target_cnt, anchors): +def perform_probe(fname, vpn_provider, target_ip, hostname, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ logging.info("Start Probing [%s(%s)]" %(hostname, target_ip)) - pickle_path = os.path.join(sanity_directory, 'pings/' + vpn_provider) - if not os.path.exists(pickle_path): - os.makedirs(pickle_path) + # ping from local to vpn + vp_ping = send_ping((hostname, target_ip)) + vp_min = min(vp_ping[hostname]) + # get to others times = dict() s_time = time.time() results = [] @@ -92,19 +91,31 @@ def perform_probe(sanity_directory, vpn_provider, target_ip, hostname, target_cn %(hostname, target_ip, _sum/float(_total), e_time - s_time)) pool.close() pool.join() - final = {hostname: {'pings': times, 'cnt': target_cnt, 'ip_v4': target_ip, - 'timestamp': time.time(), 'vpn_provider': vpn_provider}} - logging.info("Creating pickle file") - with open(pickle_path+'/'+vpn_provider+'-'+hostname+'-'+target_ip+'-'+target_cnt+'.pickle', 'w') as f: - pickle.dump(final, f) - logging.info("Pickle file successfully created.") - + # store results + # store as csv file: "vpn_provider, vp_name, vp_ip, vpn_cnt, all_keys()" + keys = sorted(anchors.keys()) + with open(fname, "a") as csv_file: + writer = csv.writer(csv_file) + line = [vpn_provider, hostname, target_ip, target_cnt] + for this_anchor in keys: + if len(times[this_anchor]) > 0: + ping_min = min(times[this_anchor]) + the_ping = (ping_min - vp_min) + else: + the_ping = None + line.append(the_ping) + writer.writerow(line) def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, key_direction, sanity_path, vpn_provider, anchors): """ Run vpn_walk to get pings from proxy to anchors """ start_time = time.time() + ping_path = os.path.join(sanity_path, 'pings') + if not os.path.exists(ping_path): + os.makedirs(ping_path) + u_time = time.time() + fname = os.path.join(ping_path, 'pings_' + vpn_provider + '_' + str(u_time) + '.csv') for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() @@ -137,7 +148,7 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, continue # sending ping to the anchors try: - perform_probe(sanity_path, vpn_provider, vp_ip, hostname, country, anchors) + perform_probe(fname, vpn_provider, vp_ip, hostname, country, anchors) except: logging.warning("Failed to send pings from %s" % vp_ip) logging.info("%s: Stopping VPN." % filename) From e40f20bac6bc11882fc39b026915b9030ee18fb0 Mon Sep 17 00:00:00 2001 From: shicho Date: Sun, 29 Apr 2018 04:51:01 -0400 Subject: [PATCH 64/75] Store online anchors list --- centinel/vpn/cli.py | 2 +- centinel/vpn/probe.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 9b48759..b077ec0 100644 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -176,7 +176,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, sanity_path = os.path.join(directory, '../sanitycheck') if not os.path.exists(sanity_path): os.makedirs(sanity_path) - anchors = probe.retrieve_anchor_list() + anchors = probe.retrieve_anchor_list(sanity_path) logging.info("Anchors list fetched") # send pings probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index c405256..647ae16 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -2,6 +2,7 @@ import time import json import csv +import pickle import socket import logging import requests @@ -13,7 +14,7 @@ import centinel.backend import centinel.vpn.openvpn as openvpn -def retrieve_anchor_list(): +def retrieve_anchor_list(directory): """ Retrieve anchor lists with RIPE API """ logging.info("Starting to fetch RIPE anchors") @@ -41,6 +42,9 @@ def retrieve_anchor_list(): query_url = urljoin(query_url, next_url) e_time = time.time() logging.info("Finishing to fetch RIPE anchors (%s sec)" %(e_time-s_time)) + landmark_path = os.path.join(directory, "landmarks_list_" + str(time.time()) + ".pickle") + with open(landmark_path, "w") as f: + pickle.dump(anchors, f) return anchors def send_ping(param): From d83574bc4f9c15ac7f4ea2bfcfae8801e8ac92ac Mon Sep 17 00:00:00 2001 From: shicho Date: Mon, 30 Apr 2018 01:04:53 -0400 Subject: [PATCH 65/75] Move code to geosanity.py --- centinel/vpn/cli.py | 36 +++--------------------------------- centinel/vpn/geosanity.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index b077ec0..76a05e9 100644 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -9,14 +9,11 @@ import os import time import sys -import csv import signal import dns.resolver import json import socket import shutil -import datetime -import multiprocessing as mp import centinel.backend import centinel.client @@ -182,36 +179,9 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, key_direction, sanity_path, vpn_provider, anchors) # sanity check - pickle_path = os.path.join(sanity_path, 'pings/' + vpn_provider) - map = san.load_map_from_shapefile(sanity_path) - file_lists = os.listdir(pickle_path) - if file_lists: - try: - num = mp.cpu_count() - except (ImportError, NotImplementedError): - num = 1 - pass - pool = mp.Pool(processes=num) - results = [] - results.append(pool.map(san.sanity_check, - [(this_file, anchors, map, sanity_path, pickle_path) for this_file in file_lists])) - pool.close() - pool.join() - new_conf_list = [] - result_path = os.path.join(sanity_path, 'results/' + vpn_provider) - if not os.path.exists(result_path): - os.makedirs(result_path) - current_time = datetime.date.today().strftime("%Y-%m-%d") - with open(os.path.join(result_path, vpn_provider + '-' + current_time + '.csv'), 'w') as f: - writer = csv.writer(f) - writer.writerow(('proxy_name', 'country', 'truth')) - for output in results: - for proxy_name, iso_cnt, tag in output: - if tag == True: - new_conf_list.append(proxy_name + '.ovpn') - writer.writerow((proxy_name, iso_cnt, tag)) - logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) - conf_list = new_conf_list + new_conf_list = san.start_sanity_check(sanity_path, vpn_provider, anchors) + logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) + conf_list = new_conf_list end_time = time.time() - start_time logging.info("Finished sanity check: total elapsed time (%.2f)" %end_time) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index caa46a9..4a8a580 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -18,6 +18,38 @@ import zipfile import requests import StringIO +import datetime +import multiprocessing as mp + + +def start_sanity_check(sanity_path, vpn_provider, anchors): + pickle_path = os.path.join(sanity_path, 'pings/' + vpn_provider) + map = load_map_from_shapefile(sanity_path) + try: + num = mp.cpu_count() + except (ImportError, NotImplementedError): + num = 1 + pass + pool = mp.Pool(processes=num) + results = [] + results.append(pool.map(sanity_check, + [(this_file, anchors, map, sanity_path, pickle_path) for this_file in file_lists])) + pool.close() + pool.join() + new_conf_list = [] + result_path = os.path.join(sanity_path, 'results/' + vpn_provider) + if not os.path.exists(result_path): + os.makedirs(result_path) + current_time = datetime.date.today().strftime("%Y-%m-%d") + with open(os.path.join(result_path, vpn_provider + '-' + current_time + '.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerow(('proxy_name', 'country', 'truth')) + for output in results: + for proxy_name, iso_cnt, tag in output: + if tag == True: + new_conf_list.append(proxy_name + '.ovpn') + writer.writerow((proxy_name, iso_cnt, tag)) + return new_conf_list def sanity_check(args): """ From b6202f5b1d3848be30e92a3ac8801bcbacd45b19 Mon Sep 17 00:00:00 2001 From: shicho Date: Mon, 30 Apr 2018 03:17:38 -0400 Subject: [PATCH 66/75] Add column name for ping results --- centinel/vpn/probe.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 647ae16..943ec18 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -120,6 +120,14 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, os.makedirs(ping_path) u_time = time.time() fname = os.path.join(ping_path, 'pings_' + vpn_provider + '_' + str(u_time) + '.csv') + keys = sorted(anchors.keys()) + with open(fname, "w") as f: + writer = csv.writer(f) + line = ['vpn_provider', 'vp_name', 'vp_ip', 'vpn_cnt'] + for k2 in keys: + line.append(k2) + writer.writerow(line) + for filename in conf_list: centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() From 32291440bbe9446966fbf8cee3fc7436dadb023b Mon Sep 17 00:00:00 2001 From: shicho Date: Mon, 30 Apr 2018 04:51:52 -0400 Subject: [PATCH 67/75] Finish geosanity.py --- centinel/vpn/cli.py | 2 +- centinel/vpn/geosanity.py | 189 +++++++++++++++++++++----------------- 2 files changed, 108 insertions(+), 83 deletions(-) diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 76a05e9..e5adfda 100644 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -177,7 +177,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.info("Anchors list fetched") # send pings probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, - key_direction, sanity_path, vpn_provider, anchors) + key_direction, sanity_path, vpn_provider, anchors) # sanity check new_conf_list = san.start_sanity_check(sanity_path, vpn_provider, anchors) logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 4a8a580..518f1ca 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -22,8 +22,33 @@ import multiprocessing as mp +def read_ping_results_from_file(fname, ping_path, anchors): + vp_info = dict() + keys = sorted(anchors.keys()) + with open(os.path.join(ping_path, fname), 'r') as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'proxy_name': + keys = sorted(row[4:]) + continue + name = row[1] + vp_info[name] = dict() + vp_info[name]['vpn_provider'] = row[0] + vp_info[name]['ip_v4'] = row[2] + vp_info[name]['cnt'] = row[3] + vp_info[name]['pings'] = dict() + count = 4 + for key in keys: + vp_info[name]['pings'][key] = row[count] + count += 1 + return vp_info + def start_sanity_check(sanity_path, vpn_provider, anchors): - pickle_path = os.path.join(sanity_path, 'pings/' + vpn_provider) + ping_path = os.path.join(sanity_path, 'pings') + # get the recent ping results of the vpn provider + file_lists = {float(i.split('_')[2].split('.csv')[0]): i for i in os.listdir(ping_path) if vpn_provider in i} + fname = file_lists[max(file_lists.keys())] + vpn_pings = read_ping_results_from_file(fname, ping_path, anchors) map = load_map_from_shapefile(sanity_path) try: num = mp.cpu_count() @@ -33,22 +58,22 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): pool = mp.Pool(processes=num) results = [] results.append(pool.map(sanity_check, - [(this_file, anchors, map, sanity_path, pickle_path) for this_file in file_lists])) + [(this, vpn_pings[this], anchors, map, sanity_path) for this in vpn_pings])) pool.close() pool.join() new_conf_list = [] - result_path = os.path.join(sanity_path, 'results/' + vpn_provider) + result_path = os.path.join(sanity_path, 'results') if not os.path.exists(result_path): os.makedirs(result_path) current_time = datetime.date.today().strftime("%Y-%m-%d") - with open(os.path.join(result_path, vpn_provider + '-' + current_time + '.csv'), 'w') as f: + with open(os.path.join(result_path, 'results_' + vpn_provider + '_' + current_time + '.csv'), 'w') as f: writer = csv.writer(f) - writer.writerow(('proxy_name', 'country', 'truth')) + writer.writerow(('vpn_provider', 'proxy_name', 'proxy_cnt', 'truth', 'proxy_ip')) for output in results: - for proxy_name, iso_cnt, tag in output: + for provider, proxy_name, iso_cnt, tag, ip in output: if tag == True: new_conf_list.append(proxy_name + '.ovpn') - writer.writerow((proxy_name, iso_cnt, tag)) + writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list def sanity_check(args): @@ -60,16 +85,13 @@ def sanity_check(args): :param map:(dataframe) :return: """ - this_file, anchors, map, sanity_path, pickle_path = args + proxy_name, vp_info, anchors, map, sanity_path = args + iso_cnt = vp_info['cnt'] + pings = vp_info['pings'] + provider = vp_info['vpn_provider'] + proxy_ip = vp_info['ip_v4'] try: start_time = time.time() - with open(os.path.join(pickle_path, this_file), 'r') as f: - json_data = pickle.load(f) - proxy_name = json_data.keys()[0] - iso_cnt = json_data[proxy_name]['cnt'] - pings = json_data[proxy_name]['pings'] - provider =json_data[proxy_name]['vpn_provider'] - proxy_ip = json_data[proxy_name]['ip_v4'] checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) points = checker.check_ping_results(pings, anchors) if len(points) == 0: @@ -82,14 +104,13 @@ def sanity_check(args): if proxy_region.empty: logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) return proxy_name, iso_cnt, -1 - results = checker.check_overlap(proxy_region, circles, this_file, anchors) - tag = checker.is_valid(results) + tag = checker.check_overlap(proxy_region, circles, anchors) end_time = time.time() - start_time logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) except Exception, e: - logging.warning("[%s] Failed to sanity check: %s" % (this_file, str(e))) - return "N/A", "N/A", -1 - return proxy_name, iso_cnt, tag + logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) + return provider, proxy_name, iso_cnt, -1, proxy_ip + return provider, proxy_name, iso_cnt, tag, proxy_ip def load_map_from_shapefile(sanity_path): """ @@ -165,11 +186,8 @@ def get_anchors_region(self, points): """ # logging.info("Starting to draw anchors region") wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") - ## Sort based on distance. if there is no distance, then sort with min delay - if points[0][0] != 0: - points.sort(key=lambda tup: tup[0]) #closest to the proxy - else: - points.sort(key=lambda tup: tup[1]) #order of min time + ## Sort based on distance. + points.sort(key=lambda tup: tup[0], reverse=True) # further to the proxy circles = list() count = 0 for dist, min_delay, lat, lon, radi, anchor_name in points: @@ -214,7 +232,7 @@ def get_anchors_region(self, points): logging.debug("Fail to get a circle %s" %self.proxy_id) return circles - def check_overlap(self, proxy_region, circles, ping_filename, anchors): + def check_overlap(self, proxy_region, circles, anchors): """ Check overlap between proxy region and anchors' region. If there is an overlap check how much they are overlapped, otherwise, check how far the distance is from a proxy. @@ -223,53 +241,64 @@ def check_overlap(self, proxy_region, circles, ping_filename, anchors): """ # logging.info("Starting to check overlap") results = list() + simple = True + claimed_cnt = True for lat, lon, radi, this_circle, anchor_name, distance, min_delay in circles: df_anchor = geopandas.GeoDataFrame({'geometry': [this_circle]}) overlap = geopandas.overlay(proxy_region, df_anchor, how="intersection") - if overlap.empty: - aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', - lat_0=lat, lon_0=lon) - wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d - ## country - azimu_cnt = sh_transform( - functools.partial(pyproj.transform, wgs_proj, aeqd), - proxy_region.geometry.item()) - ## min_distance - azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m - gap = azimu_anchor.distance(azimu_cnt) / float(1000) #km - results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, - 'min_delay': min_delay, 'truth': False, 'extra': gap, 'anchor_gps': (lat, lon), - 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, - 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), - 'proxy_country': self.iso}) + if simple: + if overlap.empty: + claimed_cnt = False + break else: - ## area - area_cnt = proxy_region['geometry'].area#/10**6 #km/sqr - area_cnt = sum(area_cnt.tolist()) - area_overlap = overlap['geometry'].area#/10**6 #km/sqr - area_overlap = sum(area_overlap.tolist()) - overlapped = area_overlap/area_cnt - results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, - 'min_delay': min_delay, 'truth': True, 'extra': overlapped, 'anchor_gps': (lat, lon), - 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, - 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), - 'proxy_country': self.iso}) - pickle_path = os.path.join(self.path, 'sanity/'+self.vpn_provider) - if not os.path.exists(pickle_path): - os.makedirs(pickle_path) - with open(os.path.join(pickle_path, ping_filename+'.csv'), 'w') as f: - writer = csv.writer(f) - writer.writerow(('proxy_name','proxy_ip','proxy_country','truth','extra', - 'anchor_name','anchor_ip','anchor_cnt','anchor_gps','distance','min_delay','radius')) - for this in results: - writer.writerow((this['proxy_name'],this['proxy_ip'],this['proxy_country'], - this['truth'],this['extra'], - this['anchor_name'],this['anchor_ip'],this['anchor_cnt'], - this['anchor_gps'],this['distance'],this['min_delay'],this['radius'])) - with open(os.path.join(pickle_path, ping_filename), 'w') as f: - pickle.dump(results, f) - # logging.info("Pickle file successfully created.") - return results + # When we wanna do further investigation + if overlap.empty: + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d + ## country + azimu_cnt = sh_transform( + functools.partial(pyproj.transform, wgs_proj, aeqd), + proxy_region.geometry.item()) + ## min_distance + azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m + gap = azimu_anchor.distance(azimu_cnt) / float(1000) #km + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': False, 'extra': gap, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) + else: + ## area + area_cnt = proxy_region['geometry'].area#/10**6 #km/sqr + area_cnt = sum(area_cnt.tolist()) + area_overlap = overlap['geometry'].area#/10**6 #km/sqr + area_overlap = sum(area_overlap.tolist()) + overlapped = area_overlap/area_cnt + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': True, 'extra': overlapped, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) + if not simple: + ping_filename = self.vpn_provider + '_' + self.proxy_id + '_' + str(time.time) + pickle_path = os.path.join(self.path, 'sanity/'+self.vpn_provider) + if not os.path.exists(pickle_path): + os.makedirs(pickle_path) + with open(os.path.join(pickle_path, ping_filename+'.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerow(('proxy_name','proxy_ip','proxy_country','truth','extra', + 'anchor_name','anchor_ip','anchor_cnt','anchor_gps','distance','min_delay','radius')) + for this in results: + writer.writerow((this['proxy_name'],this['proxy_ip'],this['proxy_country'], + this['truth'],this['extra'], + this['anchor_name'],this['anchor_ip'],this['anchor_cnt'], + this['anchor_gps'],this['distance'],this['min_delay'],this['radius'])) + with open(os.path.join(pickle_path, ping_filename), 'w') as f: + pickle.dump(results, f) + # logging.info("Pickle file successfully created.") + claimed_cnt = self.is_valid(results) + return claimed_cnt def _calculate_radius(self, time_ms): """ @@ -292,19 +321,15 @@ def check_ping_results(self, results, anchors_gps): Return points(list): (lat, lon, radius) """ points = list() - for anchor, pings in results.iteritems(): - valid_pings = list() - for this in pings: - # remove anomalies - ping = float(this.split(' ')[0]) - owtt = ping/2.0 - if float(owtt) >= 3.0 and float(owtt) <= 130.0: - valid_pings.append(owtt) - if len(valid_pings) == 0: + for anchor, ping in results.iteritems(): + # remove anomalies + if ping == '': continue + ping = float(ping) + owtt = ping/2.0 + if owtt < 3.0 or owtt >= 130.0: logging.debug("no valid pings results of anchor %s" %anchor) continue - min_delay = min(valid_pings) - radi = self._calculate_radius(min_delay) + radi = self._calculate_radius(ping) if anchor not in anchors_gps: logging.debug("no gps for anchor %s" %anchor) continue @@ -313,7 +338,7 @@ def check_ping_results(self, results, anchors_gps): anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) if len(self.gps) != 0: distance = vincenty(anchor_gps, self.gps).km - points.append((distance, min_delay, anchor_gps[0], anchor_gps[1], radi, anchor)) + points.append((distance, ping, anchor_gps[0], anchor_gps[1], radi, anchor)) if len(points) == 0: logging.debug("no valid pings results") return [] @@ -340,4 +365,4 @@ def is_valid(self, results): if frac >= 0.9: return True else: - return False \ No newline at end of file + return False From fa9f4234aa628f28dcc0f49288aa53bc3c09b9d3 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 2 May 2018 11:10:08 -0400 Subject: [PATCH 68/75] Change fname of landmark --- centinel/vpn/probe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index 943ec18..ea6b607 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -9,7 +9,7 @@ import subprocess import multiprocessing as mp from urlparse import urljoin - +import datetime import country_module as convertor import centinel.backend import centinel.vpn.openvpn as openvpn @@ -42,7 +42,8 @@ def retrieve_anchor_list(directory): query_url = urljoin(query_url, next_url) e_time = time.time() logging.info("Finishing to fetch RIPE anchors (%s sec)" %(e_time-s_time)) - landmark_path = os.path.join(directory, "landmarks_list_" + str(time.time()) + ".pickle") + current_time = datetime.date.today().strftime("%Y-%m-%d") + landmark_path = os.path.join(directory, "landmarks_list_" + str(current_time) + ".pickle") with open(landmark_path, "w") as f: pickle.dump(anchors, f) return anchors From a3c3d42748bee4651a361768206db763a86fa7a9 Mon Sep 17 00:00:00 2001 From: shicho Date: Sat, 5 May 2018 17:09:29 -0400 Subject: [PATCH 69/75] Store ping both to anchor and to proxy --- centinel/vpn/probe.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py index ea6b607..e8eb278 100644 --- a/centinel/vpn/probe.py +++ b/centinel/vpn/probe.py @@ -66,13 +66,13 @@ def send_ping(param): times[this_host] = this_delays return times -def perform_probe(fname, vpn_provider, target_ip, hostname, target_cnt, anchors): +def perform_probe(fname, vpn_provider, proxy_ip, hostname, target_cnt, anchors): """Send ping 10 times to landmarks and choose the minimum :return: times [host] = list() """ - logging.info("Start Probing [%s(%s)]" %(hostname, target_ip)) + logging.info("Start Probing [%s(%s)]" %(hostname, proxy_ip)) # ping from local to vpn - vp_ping = send_ping((hostname, target_ip)) + vp_ping = send_ping((hostname, proxy_ip)) vp_min = min(vp_ping[hostname]) # get to others times = dict() @@ -93,7 +93,7 @@ def perform_probe(fname, vpn_provider, target_ip, hostname, target_cnt, anchors) times[key].append(this) e_time = time.time() logging.info("Finish Probing [%s(%s)]: average succeeded pings=%.2f/10 (%.2fsec)" - %(hostname, target_ip, _sum/float(_total), e_time - s_time)) + %(hostname, proxy_ip, _sum/float(_total), e_time - s_time)) pool.close() pool.join() # store results @@ -101,11 +101,10 @@ def perform_probe(fname, vpn_provider, target_ip, hostname, target_cnt, anchors) keys = sorted(anchors.keys()) with open(fname, "a") as csv_file: writer = csv.writer(csv_file) - line = [vpn_provider, hostname, target_ip, target_cnt] + line = [vpn_provider, hostname, proxy_ip, target_cnt, e_time-s_time, vp_min] for this_anchor in keys: if len(times[this_anchor]) > 0: - ping_min = min(times[this_anchor]) - the_ping = (ping_min - vp_min) + the_ping = min(times[this_anchor]) else: the_ping = None line.append(the_ping) @@ -119,12 +118,12 @@ def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, ping_path = os.path.join(sanity_path, 'pings') if not os.path.exists(ping_path): os.makedirs(ping_path) - u_time = time.time() - fname = os.path.join(ping_path, 'pings_' + vpn_provider + '_' + str(u_time) + '.csv') + current_time = datetime.date.today().strftime("%Y-%m-%d") + fname = os.path.join(ping_path, 'pings_' + vpn_provider + '_' + str(current_time) + '.csv') keys = sorted(anchors.keys()) with open(fname, "w") as f: writer = csv.writer(f) - line = ['vpn_provider', 'vp_name', 'vp_ip', 'vpn_cnt'] + line = ['vpn_provider', 'vp_name', 'vp_ip', 'vpn_cnt', 'time_taken', 'ping_to_vp'] for k2 in keys: line.append(k2) writer.writerow(line) From 3d3f0e1e040bfe3f8b2347f0a3e134ab6787d7bd Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 9 May 2018 11:36:38 -0400 Subject: [PATCH 70/75] Change vp_info format --- centinel/vpn/geosanity.py | 310 ++++++++++++++++++++++---------------- 1 file changed, 181 insertions(+), 129 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 518f1ca..df219f6 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -22,34 +22,14 @@ import multiprocessing as mp -def read_ping_results_from_file(fname, ping_path, anchors): - vp_info = dict() - keys = sorted(anchors.keys()) - with open(os.path.join(ping_path, fname), 'r') as f: - reader = csv.reader(f) - for row in reader: - if row[0] == 'proxy_name': - keys = sorted(row[4:]) - continue - name = row[1] - vp_info[name] = dict() - vp_info[name]['vpn_provider'] = row[0] - vp_info[name]['ip_v4'] = row[2] - vp_info[name]['cnt'] = row[3] - vp_info[name]['pings'] = dict() - count = 4 - for key in keys: - vp_info[name]['pings'][key] = row[count] - count += 1 - return vp_info def start_sanity_check(sanity_path, vpn_provider, anchors): ping_path = os.path.join(sanity_path, 'pings') # get the recent ping results of the vpn provider - file_lists = {float(i.split('_')[2].split('.csv')[0]): i for i in os.listdir(ping_path) if vpn_provider in i} + file_lists = {i.split('_')[2].split('.csv')[0]: i for i in os.listdir(ping_path) if vpn_provider in i} fname = file_lists[max(file_lists.keys())] - vpn_pings = read_ping_results_from_file(fname, ping_path, anchors) - map = load_map_from_shapefile(sanity_path) + vpn_pings = Checker.read_ping_results_from_file(fname, ping_path, anchors) + map = Checker.load_map_from_shapefile(sanity_path) try: num = mp.cpu_count() except (ImportError, NotImplementedError): @@ -57,16 +37,17 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): pass pool = mp.Pool(processes=num) results = [] - results.append(pool.map(sanity_check, - [(this, vpn_pings[this], anchors, map, sanity_path) for this in vpn_pings])) + results.append(pool.map(Checker.sanity_check, + [(this, vpn_pings[this], anchors, map, sanity_path) for this in vpn_pings])) pool.close() pool.join() new_conf_list = [] result_path = os.path.join(sanity_path, 'results') if not os.path.exists(result_path): os.makedirs(result_path) - current_time = datetime.date.today().strftime("%Y-%m-%d") - with open(os.path.join(result_path, 'results_' + vpn_provider + '_' + current_time + '.csv'), 'w') as f: + current_time = fname.split('_')[2].split('.csv')[0] + csv_name = 'results_' + vpn_provider + '_' + current_time + '.csv' + with open(os.path.join(result_path, csv_name), 'w') as f: writer = csv.writer(f) writer.writerow(('vpn_provider', 'proxy_name', 'proxy_cnt', 'truth', 'proxy_ip')) for output in results: @@ -76,64 +57,6 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list -def sanity_check(args): - """ - :param proxy_id:(str) - :param iso_cnt:(str) - :param ping_results:(dict) {anchors: [pings]) - :param anchors_gps:(dict) {anchors: (lat, long)} - :param map:(dataframe) - :return: - """ - proxy_name, vp_info, anchors, map, sanity_path = args - iso_cnt = vp_info['cnt'] - pings = vp_info['pings'] - provider = vp_info['vpn_provider'] - proxy_ip = vp_info['ip_v4'] - try: - start_time = time.time() - checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) - points = checker.check_ping_results(pings, anchors) - if len(points) == 0: - logging.info("No valid ping results for %s" % proxy_name) - return proxy_name, iso_cnt, -1 - logging.info("[%s] has %s valid pings from %s anchors" - %(proxy_name, len(points), len(pings))) - circles = checker.get_anchors_region(points) - proxy_region = checker.get_vpn_region(map) - if proxy_region.empty: - logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) - return proxy_name, iso_cnt, -1 - tag = checker.check_overlap(proxy_region, circles, anchors) - end_time = time.time() - start_time - logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) - except Exception, e: - logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) - return provider, proxy_name, iso_cnt, -1, proxy_ip - return provider, proxy_name, iso_cnt, tag, proxy_ip - -def load_map_from_shapefile(sanity_path): - """ - Load all countries from shapefile - (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') - """ - logging.info("Loading a shapefile for the world map") - shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") - if not os.path.exists(shapefile): - logging.info("Shape file does not exist, Downloading from server") - shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' - logging.info("Starting to download map shape file zip") - try: - r = requests.get(shapefile_url, stream=True) - z = zipfile.ZipFile(StringIO.StringIO(r.content)) - z.extractall(sanity_path) - logging.info("Map shape file downloaded") - except Exception as exp: - logging.error("Could not fetch map file : %s" % str(exp)) - temp = GeoDataFrame.from_file(shapefile) - # print temp.dtypes.index - map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] - return map class Checker: def __init__(self, proxy_id, iso, path, vpn_provider, ip): @@ -144,6 +67,94 @@ def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.path = path self.ip = ip + @staticmethod + def read_ping_results_from_file(fname, ping_path, anchors): + vp_info = dict() + keys = sorted(anchors.keys()) + with open(os.path.join(ping_path, fname), 'r') as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'vpn_provider': + keys = row[6:] + continue + name = row[1] + vp_info[name] = dict() + vp_info[name]['vpn_provider'] = row[0] + vp_info[name]['ip_v4'] = row[2] + vp_info[name]['cnt'] = row[3] + vp_info[name]['time_taken'] = row[4] + vp_info[name]['ping_to_vp'] = row[5] + vp_info[name]['pings'] = dict() + count = 6 + for key in keys: + if row[count] == '': rtt = None + else: rtt = float(row[count]) + vp_info[name]['pings'][key] = rtt + count += 1 + return vp_info + + @staticmethod + def sanity_check(args): + """ + :param proxy_id:(str) + :param iso_cnt:(str) + :param ping_results:(dict) {anchors: [pings]) + :param anchors_gps:(dict) {anchors: (lat, long)} + :param map:(dataframe) + :return: + """ + proxy_name, vp_info, anchors, map, sanity_path = args + iso_cnt = vp_info['cnt'] + pings = vp_info['pings'] + provider = vp_info['vpn_provider'] + proxy_ip = vp_info['ip_v4'] + try: + start_time = time.time() + checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) + points = checker.check_ping_results(pings, anchors) + if len(points) == 0: + logging.info("No valid ping results for %s" % proxy_name) + return proxy_name, iso_cnt, -1 + logging.info("[%s] has %s valid pings from %s anchors" + % (proxy_name, len(points), len(pings))) + # circles = checker.get_anchors_region(points) + proxy_region = checker.get_vpn_region(map) + if proxy_region.empty: + logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) + return proxy_name, iso_cnt, -2 + # tag = checker.check_overlap(proxy_region, circles, anchors) + tag = checker.check_sol_violation(proxy_region, points) + end_time = time.time() - start_time + logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) + except Exception, e: + logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) + return provider, proxy_name, iso_cnt, -1, proxy_ip + return provider, proxy_name, iso_cnt, tag, proxy_ip + + @staticmethod + def load_map_from_shapefile(sanity_path): + """ + Load all countries from shapefile + (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') + """ + logging.info("Loading a shapefile for the world map") + shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") + if not os.path.exists(shapefile): + logging.info("Shape file does not exist, Downloading from server") + shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' + logging.info("Starting to download map shape file zip") + try: + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + logging.info("Map shape file downloaded") + except Exception as exp: + logging.error("Could not fetch map file : %s" % str(exp)) + temp = GeoDataFrame.from_file(shapefile) + # print temp.dtypes.index + map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] + return map + def get_vpn_region(self, map): """ Get a region of given iso country @@ -178,6 +189,91 @@ def _get_gps_of_proxy(self): def _disk(self, x, y, radius): return Point(x, y).buffer(radius) + def _calculate_radius(self, time_ms): + """ + (the number got from zack's paper & code) + Network cable's propagation speed: around 2/3c = 199,862 km/s + + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) + """ + C = 299792 # km/s + speed = np.multiply(0.5104, C) + second = time_ms/float(1000) + dist_km = np.multiply(speed, second) + return dist_km + + def check_ping_results(self, results, anchors_gps): + """ + Because the equator circumference is 40,074.275km. + the range cannot be farther than 20,037.135km. + If there are anomalies pings (<3.0ms or >130.0ms), remove. + Otherwise, return latitude and longitude of vps, radius derived from ping delay. + Return points(list): (lat, lon, radius) + """ + points = list() + for anchor, ping in results.iteritems(): + # remove anomalies + if ping == '': continue + ping = float(ping) + owtt = ping/2.0 + if owtt < 3.0 or owtt >= 250.0: + logging.debug("no valid pings results of anchor %s" %anchor) + continue + radi = self._calculate_radius(ping) + if anchor not in anchors_gps: + logging.debug("no gps for anchor %s" %anchor) + continue + # calculate the distance(km) between proxy and anchor + distance = 0 + anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) + if len(self.gps) != 0: + distance = vincenty(anchor_gps, self.gps).km + points.append((distance, ping, anchor_gps[0], anchor_gps[1], radi, anchor)) + if len(points) == 0: + logging.debug("no valid pings results") + return [] + return points + + def _get_sol(self): + C = 299792 # km/s + speed = np.multiply(0.5104, C) + return speed + + def check_sol_violation(self, proxy_region, points): + """ + method 2: instead of checking overlap between proxy region and anchor regions, we check + the sol violation from the further anchors. Once we seen sol violation, we stop and + return truth=False. + """ + ## Sort based on distance. + points.sort(key=lambda tup: tup[0], reverse=True) # further to the proxy + claimed_cnt = True + for dist, min_delay, lat, lon, radi, anchor_name in points: + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d + + ## country + azimu_cnt = sh_transform( + functools.partial(pyproj.transform, wgs_proj, aeqd), + proxy_region.geometry.item()) + + ## min_distance + azimu_anchor = self._disk(0, 0, 1*1000) # km ---> m + min_dist = azimu_anchor.distance(azimu_cnt) / float(1000) # km + + # check the violation of speed of light + min_owtt = float(min_delay)/0.2 + speed = dist / min_owtt + min_speed = min_dist / min_owtt + sol = self._get_sol() + + logging.info("dist: %s, min_dist: %s, min_delay: %s, speed: %s, min_speed: %s, sol: %s" + %(dist, min_dist, min_delay, speed, min_speed, sol)) + if speed > sol: + claimed_cnt = False + break + return claimed_cnt + def get_anchors_region(self, points): """ Get anchors region (referred from zack's paper & code Todo: add LICENSE?) @@ -300,50 +396,6 @@ def check_overlap(self, proxy_region, circles, anchors): claimed_cnt = self.is_valid(results) return claimed_cnt - def _calculate_radius(self, time_ms): - """ - (the number got from zack's paper & code) - Network cable's propagation speed: around 2/3c = 199,862 km/s - + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) - """ - C = 299792 # km/s - speed = np.multiply(0.5104, C) - second = time_ms/float(1000) - dist_km = np.multiply(speed, second) - return dist_km - - def check_ping_results(self, results, anchors_gps): - """ - Because the equator circumference is 40,074.275km. - the range cannot be farther than 20,037.135km. - If there are anomalies pings (<3.0ms or >130.0ms), remove. - Otherwise, return latitude and longitude of vps, radius derived from ping delay. - Return points(list): (lat, lon, radius) - """ - points = list() - for anchor, ping in results.iteritems(): - # remove anomalies - if ping == '': continue - ping = float(ping) - owtt = ping/2.0 - if owtt < 3.0 or owtt >= 130.0: - logging.debug("no valid pings results of anchor %s" %anchor) - continue - radi = self._calculate_radius(ping) - if anchor not in anchors_gps: - logging.debug("no gps for anchor %s" %anchor) - continue - # calculate the distance(km) between proxy and anchor - distance = 0 - anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) - if len(self.gps) != 0: - distance = vincenty(anchor_gps, self.gps).km - points.append((distance, ping, anchor_gps[0], anchor_gps[1], radi, anchor)) - if len(points) == 0: - logging.debug("no valid pings results") - return [] - return points - def is_valid(self, results): """ Need reasonable threshold to answer the validation of location From cea8adabea692cafb8217fd090250f4c58534422 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 9 May 2018 13:15:30 -0400 Subject: [PATCH 71/75] Clean up codes for sanity check with speed --- centinel/vpn/geosanity.py | 119 +++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index df219f6..692310d 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -18,11 +18,8 @@ import zipfile import requests import StringIO -import datetime import multiprocessing as mp - - def start_sanity_check(sanity_path, vpn_provider, anchors): ping_path = os.path.join(sanity_path, 'pings') # get the recent ping results of the vpn provider @@ -57,7 +54,6 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list - class Checker: def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.vpn_provider = vpn_provider @@ -83,7 +79,7 @@ def read_ping_results_from_file(fname, ping_path, anchors): vp_info[name]['ip_v4'] = row[2] vp_info[name]['cnt'] = row[3] vp_info[name]['time_taken'] = row[4] - vp_info[name]['ping_to_vp'] = row[5] + vp_info[name]['ping_to_vp'] = float(row[5]) vp_info[name]['pings'] = dict() count = 6 for key in keys: @@ -93,6 +89,30 @@ def read_ping_results_from_file(fname, ping_path, anchors): count += 1 return vp_info + @staticmethod + def load_map_from_shapefile(sanity_path): + """ + Load all countries from shapefile + (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') + """ + logging.info("Loading a shapefile for the world map") + shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") + if not os.path.exists(shapefile): + logging.info("Shape file does not exist, Downloading from server") + shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' + logging.info("Starting to download map shape file zip") + try: + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + logging.info("Map shape file downloaded") + except Exception as exp: + logging.error("Could not fetch map file : %s" % str(exp)) + temp = GeoDataFrame.from_file(shapefile) + # print temp.dtypes.index + map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] + return map + @staticmethod def sanity_check(args): """ @@ -108,52 +128,47 @@ def sanity_check(args): pings = vp_info['pings'] provider = vp_info['vpn_provider'] proxy_ip = vp_info['ip_v4'] + ping_to_vp = vp_info['ping_to_vp'] try: start_time = time.time() checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) - points = checker.check_ping_results(pings, anchors) + points = checker.check_ping_results(pings, anchors, ping_to_vp) if len(points) == 0: logging.info("No valid ping results for %s" % proxy_name) return proxy_name, iso_cnt, -1 logging.info("[%s] has %s valid pings from %s anchors" % (proxy_name, len(points), len(pings))) - # circles = checker.get_anchors_region(points) proxy_region = checker.get_vpn_region(map) if proxy_region.empty: - logging.info("[%s] Fail to get proxy region: %s" % (proxy_name, iso_cnt)) + logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) return proxy_name, iso_cnt, -2 - # tag = checker.check_overlap(proxy_region, circles, anchors) - tag = checker.check_sol_violation(proxy_region, points) + # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) + tag = checker._sanity_check_with_speed(points, proxy_region) end_time = time.time() - start_time logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) except Exception, e: logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) - return provider, proxy_name, iso_cnt, -1, proxy_ip + return provider, proxy_name, iso_cnt, -3, proxy_ip return provider, proxy_name, iso_cnt, tag, proxy_ip - @staticmethod - def load_map_from_shapefile(sanity_path): + def _sanity_check_with_distance(self, points, proxy_region, anchors): + """ Given the minimum rtt, + check the distance how far ping reply can go with sol from anchors. + If the distance is not overlapped with the claimed country, + then we consider it as a lied vp. """ - Load all countries from shapefile - (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') + circles = self.get_anchors_region(points) + tag = self.check_overlap(proxy_region, circles, anchors) + return tag + + def _sanity_check_with_speed(self, points, proxy_region): + """ Given the minimum rtt + and the shortest distance from anchor to the claimed country. + we calculated a speed of them. If the speed violates sol, + then we consider it as a lied vp. """ - logging.info("Loading a shapefile for the world map") - shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") - if not os.path.exists(shapefile): - logging.info("Shape file does not exist, Downloading from server") - shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' - logging.info("Starting to download map shape file zip") - try: - r = requests.get(shapefile_url, stream=True) - z = zipfile.ZipFile(StringIO.StringIO(r.content)) - z.extractall(sanity_path) - logging.info("Map shape file downloaded") - except Exception as exp: - logging.error("Could not fetch map file : %s" % str(exp)) - temp = GeoDataFrame.from_file(shapefile) - # print temp.dtypes.index - map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] - return map + tag = self.check_sol_violation(points, proxy_region) + return tag def get_vpn_region(self, map): """ @@ -189,19 +204,20 @@ def _get_gps_of_proxy(self): def _disk(self, x, y, radius): return Point(x, y).buffer(radius) - def _calculate_radius(self, time_ms): + def _calculate_radius(self, ping): """ (the number got from zack's paper & code) Network cable's propagation speed: around 2/3c = 199,862 km/s + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) """ + owtt_time = ping/float(2) C = 299792 # km/s speed = np.multiply(0.5104, C) - second = time_ms/float(1000) + second = owtt_time/float(1000) dist_km = np.multiply(speed, second) return dist_km - def check_ping_results(self, results, anchors_gps): + def check_ping_results(self, results, anchors_gps, ping_to_vp): """ Because the equator circumference is 40,074.275km. the range cannot be farther than 20,037.135km. @@ -212,13 +228,13 @@ def check_ping_results(self, results, anchors_gps): points = list() for anchor, ping in results.iteritems(): # remove anomalies - if ping == '': continue - ping = float(ping) - owtt = ping/2.0 - if owtt < 3.0 or owtt >= 250.0: - logging.debug("no valid pings results of anchor %s" %anchor) + if ping == None: continue + # get ping from vp to anchor + ping_vp_to_anchor = ping - ping_to_vp + if (ping_vp_to_anchor < 6.0) or (ping_vp_to_anchor >= 500.0): + logging.debug("ping anomalies of %s: %s" %(anchor, ping_vp_to_anchor)) continue - radi = self._calculate_radius(ping) + radi = self._calculate_radius(ping_vp_to_anchor) if anchor not in anchors_gps: logging.debug("no gps for anchor %s" %anchor) continue @@ -227,18 +243,17 @@ def check_ping_results(self, results, anchors_gps): anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) if len(self.gps) != 0: distance = vincenty(anchor_gps, self.gps).km - points.append((distance, ping, anchor_gps[0], anchor_gps[1], radi, anchor)) - if len(points) == 0: - logging.debug("no valid pings results") - return [] + points.append((distance, ping_vp_to_anchor, anchor_gps[0], anchor_gps[1], radi, anchor)) return points def _get_sol(self): + """ Return speed of lights + """ C = 299792 # km/s speed = np.multiply(0.5104, C) return speed - def check_sol_violation(self, proxy_region, points): + def check_sol_violation(self, points, proxy_region): """ method 2: instead of checking overlap between proxy region and anchor regions, we check the sol violation from the further anchors. Once we seen sol violation, we stop and @@ -258,18 +273,18 @@ def check_sol_violation(self, proxy_region, points): proxy_region.geometry.item()) ## min_distance - azimu_anchor = self._disk(0, 0, 1*1000) # km ---> m + azimu_anchor = self._disk(0, 0, 1) # km ---> m min_dist = azimu_anchor.distance(azimu_cnt) / float(1000) # km # check the violation of speed of light - min_owtt = float(min_delay)/0.2 - speed = dist / min_owtt + # speed = dist / min_owtt + min_owtt = float(min_delay)/float(2) min_speed = min_dist / min_owtt sol = self._get_sol() - logging.info("dist: %s, min_dist: %s, min_delay: %s, speed: %s, min_speed: %s, sol: %s" - %(dist, min_dist, min_delay, speed, min_speed, sol)) - if speed > sol: + logging.info("[%s] min_dist: %s, min_owtt: %s, min_speed: %s, sol: %s" + %(anchor_name, min_dist, min_delay, min_speed, sol)) + if min_speed > sol: claimed_cnt = False break return claimed_cnt From 98ba2cbd3ea43c1f0d10a0c79967e56da11a1472 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 9 May 2018 14:11:11 -0400 Subject: [PATCH 72/75] Can't pickle staticmethod --- centinel/vpn/geosanity.py | 80 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 692310d..2ba20c8 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -34,7 +34,7 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): pass pool = mp.Pool(processes=num) results = [] - results.append(pool.map(Checker.sanity_check, + results.append(pool.map(sanity_check, [(this, vpn_pings[this], anchors, map, sanity_path) for this in vpn_pings])) pool.close() pool.join() @@ -54,6 +54,46 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list + +def sanity_check(args): + """ + :param proxy_id:(str) + :param iso_cnt:(str) + :param ping_results:(dict) {anchors: [pings]) + :param anchors_gps:(dict) {anchors: (lat, long)} + :param map:(dataframe) + :return: + """ + proxy_name, vp_info, anchors, map, sanity_path = args + iso_cnt = vp_info['cnt'] + pings = vp_info['pings'] + provider = vp_info['vpn_provider'] + proxy_ip = vp_info['ip_v4'] + ping_to_vp = vp_info['ping_to_vp'] + try: + start_time = time.time() + checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) + points = checker.check_ping_results(pings, anchors, ping_to_vp) + if len(points) == 0: + logging.info("No valid ping results for %s" % proxy_name) + return proxy_name, iso_cnt, -1 + logging.info("[%s] has %s valid pings from %s anchors" + % (proxy_name, len(points), len(pings))) + proxy_region = checker.get_vpn_region(map) + if proxy_region.empty: + logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) + return proxy_name, iso_cnt, -2 + # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) + tag = checker._sanity_check_with_speed(points, proxy_region) + end_time = time.time() - start_time + logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) + except Exception, e: + logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) + return provider, proxy_name, iso_cnt, -3, proxy_ip + return provider, proxy_name, iso_cnt, tag, proxy_ip + + + class Checker: def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.vpn_provider = vpn_provider @@ -113,44 +153,6 @@ def load_map_from_shapefile(sanity_path): map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] return map - @staticmethod - def sanity_check(args): - """ - :param proxy_id:(str) - :param iso_cnt:(str) - :param ping_results:(dict) {anchors: [pings]) - :param anchors_gps:(dict) {anchors: (lat, long)} - :param map:(dataframe) - :return: - """ - proxy_name, vp_info, anchors, map, sanity_path = args - iso_cnt = vp_info['cnt'] - pings = vp_info['pings'] - provider = vp_info['vpn_provider'] - proxy_ip = vp_info['ip_v4'] - ping_to_vp = vp_info['ping_to_vp'] - try: - start_time = time.time() - checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) - points = checker.check_ping_results(pings, anchors, ping_to_vp) - if len(points) == 0: - logging.info("No valid ping results for %s" % proxy_name) - return proxy_name, iso_cnt, -1 - logging.info("[%s] has %s valid pings from %s anchors" - % (proxy_name, len(points), len(pings))) - proxy_region = checker.get_vpn_region(map) - if proxy_region.empty: - logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) - return proxy_name, iso_cnt, -2 - # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) - tag = checker._sanity_check_with_speed(points, proxy_region) - end_time = time.time() - start_time - logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) - except Exception, e: - logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) - return provider, proxy_name, iso_cnt, -3, proxy_ip - return provider, proxy_name, iso_cnt, tag, proxy_ip - def _sanity_check_with_distance(self, points, proxy_region, anchors): """ Given the minimum rtt, check the distance how far ping reply can go with sol from anchors. From 11ec5ec72ce1c7ae062673193d7e34ec12ee30a9 Mon Sep 17 00:00:00 2001 From: shicho Date: Wed, 9 May 2018 16:54:30 -0400 Subject: [PATCH 73/75] Fix errors --- centinel/vpn/geosanity.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index 2ba20c8..f7b22dc 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -7,7 +7,7 @@ import matplotlib matplotlib.use('Agg') from geopandas import * -from geopy.distance import vincenty +from geopy.distance import great_circle from geopy.geocoders import Nominatim from geopy.exc import GeocoderTimedOut import pyproj @@ -48,10 +48,10 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): writer = csv.writer(f) writer.writerow(('vpn_provider', 'proxy_name', 'proxy_cnt', 'truth', 'proxy_ip')) for output in results: - for provider, proxy_name, iso_cnt, tag, ip in output: - if tag == True: - new_conf_list.append(proxy_name + '.ovpn') - writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) + provider, proxy_name, iso_cnt, tag, ip = output + if tag == True: + new_conf_list.append(proxy_name + '.ovpn') + writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list @@ -80,9 +80,9 @@ def sanity_check(args): logging.info("[%s] has %s valid pings from %s anchors" % (proxy_name, len(points), len(pings))) proxy_region = checker.get_vpn_region(map) - if proxy_region.empty: + if (not hasattr(proxy_region, 'empty')) or (proxy_region.empty): logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) - return proxy_name, iso_cnt, -2 + return provider, proxy_name, iso_cnt, -2, proxy_ip # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) tag = checker._sanity_check_with_speed(points, proxy_region) end_time = time.time() - start_time @@ -98,6 +98,8 @@ class Checker: def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.vpn_provider = vpn_provider self.proxy_id = proxy_id + if iso == 'UK': iso = 'GB' + if iso == 'LA': iso = 'US' self.iso = iso self.gps = self._get_gps_of_proxy() self.path = path @@ -178,6 +180,7 @@ def get_vpn_region(self, map): """ # logging.info("Getting vpn region from a map") region = map[map.ISO_A2 == self.iso].geometry + cnt = '' if region.empty: cnt = pycountry.countries.get(alpha2=self.iso) region = map[map.NAME == cnt.name].geometry @@ -244,7 +247,7 @@ def check_ping_results(self, results, anchors_gps, ping_to_vp): distance = 0 anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) if len(self.gps) != 0: - distance = vincenty(anchor_gps, self.gps).km + distance = great_circle(anchor_gps, self.gps).km points.append((distance, ping_vp_to_anchor, anchor_gps[0], anchor_gps[1], radi, anchor)) return points From 8768366f07b9a4b5ab620be2c6196dd7d921586f Mon Sep 17 00:00:00 2001 From: shicho Date: Thu, 10 May 2018 01:20:17 -0400 Subject: [PATCH 74/75] Handle geopy error and missed country code --- centinel/vpn/geosanity.py | 44 ++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index f7b22dc..fbcd8a6 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -48,10 +48,12 @@ def start_sanity_check(sanity_path, vpn_provider, anchors): writer = csv.writer(f) writer.writerow(('vpn_provider', 'proxy_name', 'proxy_cnt', 'truth', 'proxy_ip')) for output in results: - provider, proxy_name, iso_cnt, tag, ip = output - if tag == True: - new_conf_list.append(proxy_name + '.ovpn') - writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) + if type(output) != list: + output = list(output) + for provider, proxy_name, iso_cnt, tag, ip in output: + if tag == True: + new_conf_list.append(proxy_name + '.ovpn') + writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) return new_conf_list @@ -70,40 +72,41 @@ def sanity_check(args): provider = vp_info['vpn_provider'] proxy_ip = vp_info['ip_v4'] ping_to_vp = vp_info['ping_to_vp'] + if iso_cnt == '': + logging.info("Country code is missed for %s/%s" %(provider, proxy_name)) + return provider, proxy_name, iso_cnt, -1, proxy_ip try: start_time = time.time() checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) points = checker.check_ping_results(pings, anchors, ping_to_vp) if len(points) == 0: logging.info("No valid ping results for %s" % proxy_name) - return proxy_name, iso_cnt, -1 + return provider, proxy_name, iso_cnt, -2, proxy_ip logging.info("[%s] has %s valid pings from %s anchors" % (proxy_name, len(points), len(pings))) proxy_region = checker.get_vpn_region(map) if (not hasattr(proxy_region, 'empty')) or (proxy_region.empty): logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) - return provider, proxy_name, iso_cnt, -2, proxy_ip + return provider, proxy_name, iso_cnt, -3, proxy_ip # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) tag = checker._sanity_check_with_speed(points, proxy_region) end_time = time.time() - start_time logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) except Exception, e: logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) - return provider, proxy_name, iso_cnt, -3, proxy_ip + return provider, proxy_name, iso_cnt, -4, proxy_ip return provider, proxy_name, iso_cnt, tag, proxy_ip - class Checker: def __init__(self, proxy_id, iso, path, vpn_provider, ip): self.vpn_provider = vpn_provider self.proxy_id = proxy_id - if iso == 'UK': iso = 'GB' - if iso == 'LA': iso = 'US' self.iso = iso self.gps = self._get_gps_of_proxy() self.path = path self.ip = ip + self.first_trial = True @staticmethod def read_ping_results_from_file(fname, ping_path, anchors): @@ -191,15 +194,32 @@ def get_vpn_region(self, map): df.crs = {'init': 'epsg:4326'} return df + def _handle_geo_name_error(self): + # TODO: ET, UK, LA, IL is not recognized by geolocator. + cnt = self.iso + if self.iso == 'ET': + cnt = 'Ethiopia' + if self.iso == 'UK': + self.iso = 'GB' + cnt = 'GB' + if self.iso == 'LA': + self.iso = 'US' + cnt = 'Los angeles' + if self.iso == 'IL': + self.iso = 'US' + cnt = 'Illinois' + return cnt + def _get_gps_of_proxy(self): """ Return vp's gps """ vpn_gps = tuple() + cnt = self._handle_geo_name_error() try: geolocator = Nominatim() - location = geolocator.geocode(self.iso) + location = geolocator.geocode(cnt, timeout=5) if location == None: - logging.info("Fail to get gps of location %s" %self.iso) + logging.info("Fail to get gps of location %s" %cnt) return None vpn_gps = (location.latitude, location.longitude) except GeocoderTimedOut as e: From 16e8f893f7ab0a99f4941f03cccd7099416cdbf1 Mon Sep 17 00:00:00 2001 From: shicho Date: Thu, 10 May 2018 01:31:04 -0400 Subject: [PATCH 75/75] Convert ms to second --- centinel/vpn/geosanity.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py index fbcd8a6..d32f3a1 100644 --- a/centinel/vpn/geosanity.py +++ b/centinel/vpn/geosanity.py @@ -236,7 +236,7 @@ def _calculate_radius(self, ping): + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) """ owtt_time = ping/float(2) - C = 299792 # km/s + C = 299792 # km/s speed = np.multiply(0.5104, C) second = owtt_time/float(1000) dist_km = np.multiply(speed, second) @@ -300,11 +300,14 @@ def check_sol_violation(self, points, proxy_region): ## min_distance azimu_anchor = self._disk(0, 0, 1) # km ---> m min_dist = azimu_anchor.distance(azimu_cnt) / float(1000) # km + if min_dist == 0: + min_dist = 0.0001 # check the violation of speed of light # speed = dist / min_owtt min_owtt = float(min_delay)/float(2) - min_speed = min_dist / min_owtt + min_owtt_sec = min_owtt/float(1000) # second + min_speed = min_dist/min_owtt_sec # km/s sol = self._get_sol() logging.info("[%s] min_dist: %s, min_owtt: %s, min_speed: %s, sol: %s"