diff --git a/LICENSE b/LICENSE index 35cb1c5..0685e7a 100644 --- a/LICENSE +++ b/LICENSE @@ -21,3 +21,24 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Copyright (c) 2016 Zack Weinberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/centinel/config.py b/centinel/config.py index aa35a4c..62e7357 100644 --- a/centinel/config.py +++ b/centinel/config.py @@ -79,6 +79,8 @@ def __init__(self): proxy['proxy'] = {proxy['proxy_type']: proxy['proxy_url']} self.params['proxy'] = proxy + self.params['custom_meta'] = {} + def parse_config(self, config_file): """ Given a configuration file, read in and interpret the results diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py old mode 100755 new mode 100644 index 80a1f3c..e5adfda --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -12,6 +12,8 @@ import signal import dns.resolver import json +import socket +import shutil import centinel.backend import centinel.client @@ -22,7 +24,14 @@ import centinel.vpn.purevpn as purevpn import centinel.vpn.vpngate as vpngate +import country_module as convertor +import probe as probe +import geosanity as san + PID_FILE = "/tmp/centinel.lock" +log_file = 'log_vpn.log' +logging.basicConfig(format="%(asctime)s %(filename)s:%(lineno)d %(levelname)s: %(message)s", + filename=log_file) def parse_args(): @@ -38,6 +47,10 @@ def parse_args(): parser.add_argument('--key-direction', '-k', dest='key_direction', default=None, help=("Key direction for tls auth, must specify when " "tls-auth is used")) + parser.add_argument('--geo-sanity-check', dest='sanity_check', + action="store_true", default=False, + help=("Run sanity check module to remove lying VP servers " + "from our vantage point list")) parser.add_argument('--reduce-endpoint', dest='reduce_vp', action="store_true", default=False, help="Reduce the number of vantage points by only connect to " @@ -46,15 +59,24 @@ def parse_args(): g1.add_argument('--create-hma-configs', dest='create_HMA', action="store_true", help='Create the openvpn config files for HMA') + g1.add_argument('--update-hma-configs', dest='update_HMA', + action="store_true", + help='Update the openvpn config files for HMA') g1.add_argument('--create-ipvanish-configs', dest='create_IPVANISH', action='store_true', help='Create the openvpn config files for IPVanish') + g1.add_argument('--update-ipvanish-configs', dest='update_IPVANISH', + action="store_true", + help='Update the openvpn config files for IPVANISH') g1.add_argument('--create-purevpn-configs', dest='create_PUREVPN', action='store_true', help='Create the openvpn config files for PureVPN') g1.add_argument('--create-vpngate-configs', dest='create_VPNGATE', action='store_true', help='Create the openvpn config files for VPN Gate') + g1.add_argument('--update-purevpn-configs', dest='update_PUREVPN', + action="store_true", + help='Update the openvpn config files for PUREVPN') parser.add_argument('--shuffle', '-s', dest='shuffle_lists', action="store_true", default=False, help='Randomize the order of vantage points') @@ -66,7 +88,7 @@ def parse_args(): g2 = parser.add_mutually_exclusive_group(required=True) g2.add_argument('--directory', '-d', dest='directory', help='Directory with experiments, config files, etc.') - create_conf_help = ('Create configuration files for the given ' + create_conf_help = ('Create/Update configuration files for the given ' 'openvpn config files so that we can treat each ' 'one as a client. The argument should be a ' 'directory with a subdirectory called openvpn ' @@ -74,6 +96,9 @@ def parse_args(): g2.add_argument('--create-config', '-c', help=create_conf_help, dest='create_conf_dir') + g2.add_argument('--update-config', '-z', help=create_conf_help, + dest='update_conf_dir') + # following args are used to support splitting clients among multiple VMs # each running vpn walker will use this to decide which portion of vpn # endpoints it should include @@ -86,7 +111,7 @@ def parse_args(): def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, - exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp): + exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp, sanity_check): """ For each VPN, check if there are experiments and scan with it if necessary @@ -142,6 +167,24 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, else: logging.warning("Cannot determine VPN provider!") + # geolocation sanity check + if sanity_check: + start_time = time.time() + sanity_path = os.path.join(directory, '../sanitycheck') + if not os.path.exists(sanity_path): + os.makedirs(sanity_path) + anchors = probe.retrieve_anchor_list(sanity_path) + logging.info("Anchors list fetched") + # send pings + probe.start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, + key_direction, sanity_path, vpn_provider, anchors) + # sanity check + new_conf_list = san.start_sanity_check(sanity_path, vpn_provider, anchors) + logging.info("List size after sanity check. New size: %d" % len(new_conf_list)) + conf_list = new_conf_list + end_time = time.time() - start_time + logging.info("Finished sanity check: total elapsed time (%.2f)" %end_time) + # reduce size of list if reduce_vp is true if reduce_vp: logging.info("Reducing list size. Original size: %d" % len(conf_list)) @@ -151,10 +194,24 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, centinel_config = os.path.join(conf_dir, filename) config = centinel.config.Configuration() config.parse_config(centinel_config) - vp_ip = os.path.splitext(filename)[0] + hostname = os.path.splitext(filename)[0] + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname, str(exp))) + continue + # get country for this vpn + with open(centinel_config) as fc: + json_data = json.load(fc) + country_in_config = "" + if 'country' in json_data: + country_in_config = json_data['country'] try: meta = centinel.backend.get_meta(config.params, vp_ip) + # send country name to be converted to alpha2 code + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) if 'country' in meta and 'as_number' in meta \ and meta['country'] and meta['as_number']: country_asn = '_'.join([meta['country'], meta['as_number']]) @@ -240,15 +297,40 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - vpn_address, extension = os.path.splitext(filename) + # filename is [OBhostname].ovpn, we resolved the hostname to ip + # using socket.gethostbyname() + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" %(hostname, str(exp))) + continue + + # vpn_address, extension = os.path.splitext(filename) + lines = [line.rstrip('\n') for line in open(centinel_config)] + + # get country for this vpn + country_in_config = "" + # reading the server.txt file in vpns folder + for line in lines: + if "country" in line: + (key, country_in_config) = line.split(': ') + country_in_config = country_in_config.replace('\"', '').replace(',', '') + country = None try: - meta = centinel.backend.get_meta(config.params, - vpn_address) + # we still might need some info from the Maximind query + meta = centinel.backend.get_meta(config.params, vp_ip) + + # send country name to be converted to alpha2 code + if (len(country_in_config) > 2): + meta['country'] = convertor.country_to_a2(country_in_config) + # some vpn config files already contain the alpha2 code (length == 2) if 'country' in meta: country = meta['country'] except: - logging.exception("%s: Failed to geolocate %s" % (filename, vpn_address)) + logging.exception("%s: Failed to geolocate %s" % (filename, vp_ip)) if country and exclude_list and country in exclude_list: logging.info("%s: Skipping this server (%s)" % (filename, country)) @@ -257,7 +339,8 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # try setting the VPN info (IP and country) to get appropriate # experiemnts and input data. try: - centinel.backend.set_vpn_info(config.params, vpn_address, country) + logging.info("country is %s" % country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("%s: Failed to set VPN info: %s" % (filename, exp)) @@ -270,7 +353,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, if not experiments_available(config.params): logging.info("%s: No experiments available." % filename) try: - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("Failed to set VPN info: %s" % exp) continue @@ -294,6 +377,16 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, json.dump(sched_info, f, indent=2) f.truncate() + # before starting the vpn do the sanity check + # create a directory to store the RIPE anchor list and landmarks_list in it so other vpns could use it as well + # sanity_path = os.path.join(directory,'../sanitycheck') + # if not os.path.exists(sanity_path): + # os.makedirs(sanity_path) + + # fetch the list of RIPE anchors + # anchors = probe.get_anchor_list(sanity_path) + + # logging.info("Anchors list fetched") logging.info("%s: Starting VPN." % filename) vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, @@ -306,6 +399,16 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, time.sleep(5) continue + + # sending ping to the anchors + # ping_result = probe.perform_probe(sanity_path, vpn_provider,vpn_provider,country,anchors) + + # have to do this sanity check if timestamp is a certain value, needs changing + # timestamp = time.time() + # ping_result['timestamp'] = timestamp + + + logging.info("%s: Running Centinel." % filename) try: client = centinel.client.Client(config.params, vpn_provider) @@ -329,7 +432,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # try setting the VPN info (IP and country) to the correct address # after sync is over. try: - centinel.backend.set_vpn_info(config.params, vpn_address, country) + centinel.backend.set_vpn_info(config.params, vp_ip, country) except Exception as exp: logging.exception("Failed to set VPN info: %s" % exp) @@ -378,31 +481,49 @@ def signal_handler(signal, frame): sys.exit(0) -def create_config_files(directory): +def update_config_files(directory, vp_list): """ - For each VPN file in directory/vpns, create a new configuration - file and all the associated directories - - Note: the expected directory structure is - args.directory - -----vpns (contains the OpenVPN config files - -----configs (contains the Centinel config files) - -----exps (contains the experiments directories) - -----results (contains the results) - + For each VPN file in directory/vpns update its configuration if needed :param directory: + :param vp_list: the list of vp updates/deletes/additions + :return: """ - logging.info("Starting to create config files from openvpn files") + logging.info("Starting to update config files") + server_country = {} vpn_dir = return_abs_path(directory, "vpns") + print(vpn_dir) + new_vpn_dir = return_abs_path(directory, "updated_vpns") + + # read servers.txt to find the country associated with the ip + with open(vpn_dir + '/servers.txt') as server_file: + servers = server_file.readlines() + + for server_line in servers: + server_line = (server_line.split('|')) + server_country[server_line[0]] = server_line[1].replace('\n', '') + conf_dir = return_abs_path(directory, "configs") - os.mkdir(conf_dir) home_dirs = return_abs_path(directory, "home") - os.mkdir(home_dirs) - for filename in os.listdir(vpn_dir): + + # remove vps + for vp in vp_list[0]: + os.remove(os.path.join(directory, "vpns/" + vp)) + shutil.rmtree(os.path.join(directory, "home/" + vp)) + os.remove(os.path.join(directory, "configs/" + vp)) + + # update vps + for vp in vp_list[1]: + print('in update') + os.remove(os.path.join(directory, "vpns/" + vp)) + shutil.copyfile(os.path.join(directory, "updated_vpns/" + vp), os.path.join(directory, "vpns/" + vp)) + # add vp + for vp in vp_list[2]: + print(os.path.join(directory, "vpns/" + vp)) + shutil.copyfile(os.path.join(directory, "updated_vpns/" + vp), os.path.join(directory, "vpns/" + vp)) configuration = centinel.config.Configuration() # setup the directories - home_dir = os.path.join(home_dirs, filename) + home_dir = os.path.join(home_dirs, vp) os.mkdir(home_dir) configuration.params['user']['centinel_home'] = home_dir exp_dir = os.path.join(home_dir, "experiments") @@ -423,9 +544,79 @@ def create_config_files(directory): configuration.params['server']['verify'] = True configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] - - conf_file = os.path.join(conf_dir, filename) + configuration.params['country'] = server_country[vp.replace('.ovpn', '')] + conf_file = os.path.join(conf_dir, vp) configuration.write_out_config(conf_file) + shutil.rmtree(new_vpn_dir) + + +def create_config_files(directory, provider): + """ + For each VPN file in directory/vpns, create a new configuration + file and all the associated directories + + Note: the expected directory structure is + args.directory + -----vpns (contains the OpenVPN config files + -----configs (contains the Centinel config files) + -----exps (contains the experiments directories) + -----results (contains the results) + + :param directory: + """ + logging.info("Starting to create config files from openvpn files") + server_country = {} + vpn_dir = return_abs_path(directory, "vpns") + + # read servers.txt to find the country associated with the ip + with open(vpn_dir + '/servers.txt') as server_file: + servers = server_file.readlines() + + for server_line in servers: + server_line = (server_line.split('|')) + server_country[server_line[0]] = server_line[1].replace('\n', '') + + conf_dir = return_abs_path(directory, "configs") + os.mkdir(conf_dir) + home_dirs = return_abs_path(directory, "home") + os.mkdir(home_dirs) + for filename in os.listdir(vpn_dir): + if ('servers' not in filename): + configuration = centinel.config.Configuration() + # setup the directories + home_dir = os.path.join(home_dirs, filename) + os.mkdir(home_dir) + configuration.params['user']['centinel_home'] = home_dir + exp_dir = os.path.join(home_dir, "experiments") + os.mkdir(exp_dir) + configuration.params['dirs']['experiments_dir'] = exp_dir + data_dir = os.path.join(home_dir, "data") + os.mkdir(data_dir) + configuration.params['dirs']['data_dir'] = data_dir + res_dir = os.path.join(home_dir, "results") + os.mkdir(res_dir) + configuration.params['dirs']['results_dir'] = res_dir + + log_file = os.path.join(home_dir, "centinel.log") + configuration.params['log']['log_file'] = log_file + login_file = os.path.join(home_dir, "login") + configuration.params['server']['login_file'] = login_file + configuration.params['user']['is_vpn'] = True + + configuration.params['server']['verify'] = True + configuration.params['experiments']['tcpdump_params'] = ["-i", "tun0"] + configuration.params['country'] = server_country[filename.replace('.ovpn', '')] + hostname = os.path.splitext(filename)[0] + vp_ip = "unknown" + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) + configuration.params['custom_meta']['provider'] = provider + configuration.params['custom_meta']['hostname'] = hostname + configuration.params['custom_meta']['ip_address'] = vp_ip + conf_file = os.path.join(conf_dir, filename) + configuration.write_out_config(conf_file) def experiments_available(config): @@ -503,22 +694,44 @@ def _run(): if args.vm_index < 1 or args.vm_index > args.vm_num: print "vm_index value cannot be negative or greater than vm_num!" return - + provider = "None" if args.create_conf_dir: if args.create_HMA: hma_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'hma' hma.create_config_files(hma_dir) elif args.create_IPVANISH: ipvanish_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'ipvanish' ipvanish.create_config_files(ipvanish_dir) elif args.create_PUREVPN: purevpn_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'purevpn' purevpn.create_config_files(purevpn_dir) elif args.create_VPNGATE: vpngate_dir = return_abs_path(args.create_conf_dir, 'vpns') + provider = 'vpngate' vpngate.create_config_files(vpngate_dir) # create the config files for the openvpn config files - create_config_files(args.create_conf_dir) + create_config_files(args.create_conf_dir, provider) + + elif args.update_conf_dir: + if args.update_HMA: + hma_dir = return_abs_path(args.update_conf_dir, 'vpns') + provider = 'hma' + vp_list = hma.update_config_files(hma_dir) + if args.update_IPVANISH: + ipvanish_dir = return_abs_path(args.update_conf_dir, 'vpns') + provdier = 'ipvanish' + vp_list = ipvanish.update_config_files(ipvanish_dir) + if args.update_PUREVPN: + purevpn_dir = return_abs_path(args.update_conf_dir, 'vpns') + provider = 'purevpn' + vp_list = purevpn.update_config_files(purevpn_dir) + update_config_files(args.update_conf_dir, vp_list) + + # add new ones + else: # sanity check tls_auth and key_direction if (args.tls_auth is not None and args.key_direction is None) or \ @@ -531,7 +744,9 @@ def _run(): crt_file=args.crt_file, tls_auth=args.tls_auth, key_direction=args.key_direction, exclude_list=args.exclude_list, shuffle_lists=args.shuffle_lists, vm_num=args.vm_num, - vm_index=args.vm_index, reduce_vp=args.reduce_vp) + vm_index=args.vm_index, reduce_vp=args.reduce_vp, + sanity_check=args.sanity_check) + if __name__ == "__main__": run() diff --git a/centinel/vpn/country_module.py b/centinel/vpn/country_module.py new file mode 100644 index 0000000..6d6cfa4 --- /dev/null +++ b/centinel/vpn/country_module.py @@ -0,0 +1,102 @@ +import geonamescache +from difflib import SequenceMatcher +from geopy.geocoders import Nominatim +from string import digits +import logging + + +def find_in_states(us_states, country): + """ + Given a country check if it is actually a US State + :param us_states: a list of us states + :param country: the country that we want to get its alpha code + :return: + """ + for state in us_states: + if(country in state): + return 'US' + return None + +def manual_check(country): + """ + Some of the country names have spelling errors, + This function manually fixes those + + :param country: the country that we want to get its alpha code + :return the alpha2 country codes: + """ + if(country == "Angula"): + return 'AO' + if(country == "Bosnia"): + return 'BA' + if(country == "UAE"): + return 'AE' + if(country == "LosAngeles"): + return 'US' + if(country == "Virgin Islands (British)"): + return 'VI' + if(country == "Korea"): + return 'KR' + if(country == "PitcairnIslands"): + return 'PN' + if(country == "RepublicofSingapore"): + return 'SG' + if(country == "USA"): + return 'US' + if(country == "Coted`Ivoire"): + return 'CI' + if(country == "Congo"): + return 'CD' + if(country == "Palestine"): + return 'PS' + if(country == "RepublicofDjibouti"): + return 'DJ' + return None + +def country_to_a2(country): + """ + This function converts country names to their alpha2 codes + :param country: the country that we want to get its alpha code + :return the alpha2 country codes: + """ + gc = geonamescache.GeonamesCache() + countries = gc.get_countries() + us_states = gc.get_us_states_by_names() + + # creating a dict between country name and alpha2 codes + countries_dict = {} + for item in countries: + countries_dict[countries[item]['name']] = item + countries_dict['United States of America'] = 'US' + countries_dict['Deutschland'] = 'DE' + countries_dict['UK'] = 'GB' + + if ',' in country: + country = country.split(',')[0] + iso2 = countries_dict.get(country) + if (iso2 != None): + return iso2 + else: + iso2 = find_in_states(us_states,country) + if(iso2 == None): + iso2 = manual_check(country) + if(iso2 == None): + for known_country in countries_dict: + if(SequenceMatcher(None, country, known_country).ratio()>0.70): + iso2 = countries_dict.get(known_country) + return iso2 + else: + iso2 = None + if (iso2 == None): + try: + # for removing numbers from country/city names + country = country.translate(None, digits) + geolocator = Nominatim() + location = geolocator.geocode(country) + location = (location.address).split(',') + iso2 = (countries_dict.get(location[len(location)-1].strip())) + except: + # no mapping found + return None + + return iso2 diff --git a/centinel/vpn/expressvpn.py b/centinel/vpn/expressvpn.py new file mode 100644 index 0000000..e69de29 diff --git a/centinel/vpn/geosanity.py b/centinel/vpn/geosanity.py new file mode 100644 index 0000000..d32f3a1 --- /dev/null +++ b/centinel/vpn/geosanity.py @@ -0,0 +1,463 @@ +""" Class for sanity check for vpn location""" +import logging +import os +import time +import csv +import pickle +import matplotlib +matplotlib.use('Agg') +from geopandas import * +from geopy.distance import great_circle +from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut +import pyproj +import functools +import pycountry +from shapely.ops import transform as sh_transform +from shapely.geometry import Point, Polygon, box as Box +import zipfile +import requests +import StringIO +import multiprocessing as mp + +def start_sanity_check(sanity_path, vpn_provider, anchors): + ping_path = os.path.join(sanity_path, 'pings') + # get the recent ping results of the vpn provider + file_lists = {i.split('_')[2].split('.csv')[0]: i for i in os.listdir(ping_path) if vpn_provider in i} + fname = file_lists[max(file_lists.keys())] + vpn_pings = Checker.read_ping_results_from_file(fname, ping_path, anchors) + map = Checker.load_map_from_shapefile(sanity_path) + try: + num = mp.cpu_count() + except (ImportError, NotImplementedError): + num = 1 + pass + pool = mp.Pool(processes=num) + results = [] + results.append(pool.map(sanity_check, + [(this, vpn_pings[this], anchors, map, sanity_path) for this in vpn_pings])) + pool.close() + pool.join() + new_conf_list = [] + result_path = os.path.join(sanity_path, 'results') + if not os.path.exists(result_path): + os.makedirs(result_path) + current_time = fname.split('_')[2].split('.csv')[0] + csv_name = 'results_' + vpn_provider + '_' + current_time + '.csv' + with open(os.path.join(result_path, csv_name), 'w') as f: + writer = csv.writer(f) + writer.writerow(('vpn_provider', 'proxy_name', 'proxy_cnt', 'truth', 'proxy_ip')) + for output in results: + if type(output) != list: + output = list(output) + for provider, proxy_name, iso_cnt, tag, ip in output: + if tag == True: + new_conf_list.append(proxy_name + '.ovpn') + writer.writerow((provider, proxy_name, iso_cnt, tag, ip)) + return new_conf_list + + +def sanity_check(args): + """ + :param proxy_id:(str) + :param iso_cnt:(str) + :param ping_results:(dict) {anchors: [pings]) + :param anchors_gps:(dict) {anchors: (lat, long)} + :param map:(dataframe) + :return: + """ + proxy_name, vp_info, anchors, map, sanity_path = args + iso_cnt = vp_info['cnt'] + pings = vp_info['pings'] + provider = vp_info['vpn_provider'] + proxy_ip = vp_info['ip_v4'] + ping_to_vp = vp_info['ping_to_vp'] + if iso_cnt == '': + logging.info("Country code is missed for %s/%s" %(provider, proxy_name)) + return provider, proxy_name, iso_cnt, -1, proxy_ip + try: + start_time = time.time() + checker = Checker(proxy_name, iso_cnt, sanity_path, provider, proxy_ip) + points = checker.check_ping_results(pings, anchors, ping_to_vp) + if len(points) == 0: + logging.info("No valid ping results for %s" % proxy_name) + return provider, proxy_name, iso_cnt, -2, proxy_ip + logging.info("[%s] has %s valid pings from %s anchors" + % (proxy_name, len(points), len(pings))) + proxy_region = checker.get_vpn_region(map) + if (not hasattr(proxy_region, 'empty')) or (proxy_region.empty): + logging.info("[%s] Failed to get proxy region: %s" % (proxy_name, iso_cnt)) + return provider, proxy_name, iso_cnt, -3, proxy_ip + # tag = checker._sanity_check_with_distance(points, proxy_region, anchors) + tag = checker._sanity_check_with_speed(points, proxy_region) + end_time = time.time() - start_time + logging.info("[%s] sanity check takes for %.2fms" % (proxy_name, end_time)) + except Exception, e: + logging.warning("[%s/%s] Failed to sanity check: %s" % (provider, proxy_name, str(e))) + return provider, proxy_name, iso_cnt, -4, proxy_ip + return provider, proxy_name, iso_cnt, tag, proxy_ip + + +class Checker: + def __init__(self, proxy_id, iso, path, vpn_provider, ip): + self.vpn_provider = vpn_provider + self.proxy_id = proxy_id + self.iso = iso + self.gps = self._get_gps_of_proxy() + self.path = path + self.ip = ip + self.first_trial = True + + @staticmethod + def read_ping_results_from_file(fname, ping_path, anchors): + vp_info = dict() + keys = sorted(anchors.keys()) + with open(os.path.join(ping_path, fname), 'r') as f: + reader = csv.reader(f) + for row in reader: + if row[0] == 'vpn_provider': + keys = row[6:] + continue + name = row[1] + vp_info[name] = dict() + vp_info[name]['vpn_provider'] = row[0] + vp_info[name]['ip_v4'] = row[2] + vp_info[name]['cnt'] = row[3] + vp_info[name]['time_taken'] = row[4] + vp_info[name]['ping_to_vp'] = float(row[5]) + vp_info[name]['pings'] = dict() + count = 6 + for key in keys: + if row[count] == '': rtt = None + else: rtt = float(row[count]) + vp_info[name]['pings'][key] = rtt + count += 1 + return vp_info + + @staticmethod + def load_map_from_shapefile(sanity_path): + """ + Load all countries from shapefile + (e.g., shapefile = 'map/ne_10m_admin_0_countries.shp') + """ + logging.info("Loading a shapefile for the world map") + shapefile = os.path.join(sanity_path, "ne_10m_admin_0_countries.shp") + if not os.path.exists(shapefile): + logging.info("Shape file does not exist, Downloading from server") + shapefile_url = 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' + logging.info("Starting to download map shape file zip") + try: + r = requests.get(shapefile_url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(sanity_path) + logging.info("Map shape file downloaded") + except Exception as exp: + logging.error("Could not fetch map file : %s" % str(exp)) + temp = GeoDataFrame.from_file(shapefile) + # print temp.dtypes.index + map = temp[['ISO_A2', 'NAME', 'SUBREGION', 'geometry']] + return map + + def _sanity_check_with_distance(self, points, proxy_region, anchors): + """ Given the minimum rtt, + check the distance how far ping reply can go with sol from anchors. + If the distance is not overlapped with the claimed country, + then we consider it as a lied vp. + """ + circles = self.get_anchors_region(points) + tag = self.check_overlap(proxy_region, circles, anchors) + return tag + + def _sanity_check_with_speed(self, points, proxy_region): + """ Given the minimum rtt + and the shortest distance from anchor to the claimed country. + we calculated a speed of them. If the speed violates sol, + then we consider it as a lied vp. + """ + tag = self.check_sol_violation(points, proxy_region) + return tag + + def get_vpn_region(self, map): + """ + Get a region of given iso country + """ + # logging.info("Getting vpn region from a map") + region = map[map.ISO_A2 == self.iso].geometry + cnt = '' + if region.empty: + cnt = pycountry.countries.get(alpha2=self.iso) + region = map[map.NAME == cnt.name].geometry + if region.empty: + logging.info("Fail to read country region: %s (%s)" % (self.iso, cnt)) + return None + df = geopandas.GeoDataFrame({'geometry': region}) + df.crs = {'init': 'epsg:4326'} + return df + + def _handle_geo_name_error(self): + # TODO: ET, UK, LA, IL is not recognized by geolocator. + cnt = self.iso + if self.iso == 'ET': + cnt = 'Ethiopia' + if self.iso == 'UK': + self.iso = 'GB' + cnt = 'GB' + if self.iso == 'LA': + self.iso = 'US' + cnt = 'Los angeles' + if self.iso == 'IL': + self.iso = 'US' + cnt = 'Illinois' + return cnt + + def _get_gps_of_proxy(self): + """ Return vp's gps + """ + vpn_gps = tuple() + cnt = self._handle_geo_name_error() + try: + geolocator = Nominatim() + location = geolocator.geocode(cnt, timeout=5) + if location == None: + logging.info("Fail to get gps of location %s" %cnt) + return None + vpn_gps = (location.latitude, location.longitude) + except GeocoderTimedOut as e: + logging.info("Error geocode failed: %s" %(e)) + return vpn_gps + + def _disk(self, x, y, radius): + return Point(x, y).buffer(radius) + + def _calculate_radius(self, ping): + """ + (the number got from zack's paper & code) + Network cable's propagation speed: around 2/3c = 199,862 km/s + + processing & queueing delay --> maximum speed: 153,000 km/s (0.5104 c) + """ + owtt_time = ping/float(2) + C = 299792 # km/s + speed = np.multiply(0.5104, C) + second = owtt_time/float(1000) + dist_km = np.multiply(speed, second) + return dist_km + + def check_ping_results(self, results, anchors_gps, ping_to_vp): + """ + Because the equator circumference is 40,074.275km. + the range cannot be farther than 20,037.135km. + If there are anomalies pings (<3.0ms or >130.0ms), remove. + Otherwise, return latitude and longitude of vps, radius derived from ping delay. + Return points(list): (lat, lon, radius) + """ + points = list() + for anchor, ping in results.iteritems(): + # remove anomalies + if ping == None: continue + # get ping from vp to anchor + ping_vp_to_anchor = ping - ping_to_vp + if (ping_vp_to_anchor < 6.0) or (ping_vp_to_anchor >= 500.0): + logging.debug("ping anomalies of %s: %s" %(anchor, ping_vp_to_anchor)) + continue + radi = self._calculate_radius(ping_vp_to_anchor) + if anchor not in anchors_gps: + logging.debug("no gps for anchor %s" %anchor) + continue + # calculate the distance(km) between proxy and anchor + distance = 0 + anchor_gps = (anchors_gps[anchor]['latitude'], anchors_gps[anchor]['longitude']) + if len(self.gps) != 0: + distance = great_circle(anchor_gps, self.gps).km + points.append((distance, ping_vp_to_anchor, anchor_gps[0], anchor_gps[1], radi, anchor)) + return points + + def _get_sol(self): + """ Return speed of lights + """ + C = 299792 # km/s + speed = np.multiply(0.5104, C) + return speed + + def check_sol_violation(self, points, proxy_region): + """ + method 2: instead of checking overlap between proxy region and anchor regions, we check + the sol violation from the further anchors. Once we seen sol violation, we stop and + return truth=False. + """ + ## Sort based on distance. + points.sort(key=lambda tup: tup[0], reverse=True) # further to the proxy + claimed_cnt = True + for dist, min_delay, lat, lon, radi, anchor_name in points: + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d + + ## country + azimu_cnt = sh_transform( + functools.partial(pyproj.transform, wgs_proj, aeqd), + proxy_region.geometry.item()) + + ## min_distance + azimu_anchor = self._disk(0, 0, 1) # km ---> m + min_dist = azimu_anchor.distance(azimu_cnt) / float(1000) # km + if min_dist == 0: + min_dist = 0.0001 + + # check the violation of speed of light + # speed = dist / min_owtt + min_owtt = float(min_delay)/float(2) + min_owtt_sec = min_owtt/float(1000) # second + min_speed = min_dist/min_owtt_sec # km/s + sol = self._get_sol() + + logging.info("[%s] min_dist: %s, min_owtt: %s, min_speed: %s, sol: %s" + %(anchor_name, min_dist, min_delay, min_speed, sol)) + if min_speed > sol: + claimed_cnt = False + break + return claimed_cnt + + def get_anchors_region(self, points): + """ Get anchors region + (referred from zack's paper & code Todo: add LICENSE?) + https://github.com/zackw/active-geolocator + Note that pyproj takes distances in meters & lon/lat order. + """ + # logging.info("Starting to draw anchors region") + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") + ## Sort based on distance. + points.sort(key=lambda tup: tup[0], reverse=True) # further to the proxy + circles = list() + count = 0 + for dist, min_delay, lat, lon, radi, anchor_name in points: + count += 1 + # create azimuthal equidistant projector for each anchors + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + try: + # draw a disk (center = long/lat, radius) + disk = sh_transform( + functools.partial(pyproj.transform, aeqd, wgs_proj), + self._disk(0, 0, radi * 1000)) # km ---> m + north, south, west, east = 90., -90., -180, 180 + boundary = np.array(disk.boundary) + i = 0 + while i < boundary.shape[0] - 1: + if abs(boundary[i + 1, 0] - boundary[i, 0]) > 180: + pole = south if boundary[i, 1] < 0 else north + west = west if boundary[i, 0] < 0 else east + east = east if boundary[i, 0] < 0 else west + boundary = np.insert(boundary, i + 1, [ + [west, boundary[i, 1]], + [west, pole], + [east, pole], + [east, boundary[i + 1, 1]] + ], axis=0) + i += 5 + else: + i += 1 + disk = Polygon(boundary).buffer(0) + # In the case of the generated disk is too large + origin = Point(lon, lat) + if not disk.contains(origin): + df1 = geopandas.GeoDataFrame({'geometry': [Box(-180., -90., 180., 90.)]}) + df2 = geopandas.GeoDataFrame({'geometry': [disk]}) + df3 = geopandas.overlay(df1, df2, how='difference') + disk = df3.geometry[0] + assert disk.is_valid + assert disk.contains(origin) + circles.append((lat, lon, radi, disk, anchor_name, dist, min_delay)) + except Exception as e: + logging.debug("Fail to get a circle %s" %self.proxy_id) + return circles + + def check_overlap(self, proxy_region, circles, anchors): + """ Check overlap between proxy region and anchors' region. + If there is an overlap check how much they are overlapped, + otherwise, check how far the distance is from a proxy. + :return results(list): if True: the percentage of overlapped area to a country + False: the distance (km) between a country and expected range + """ + # logging.info("Starting to check overlap") + results = list() + simple = True + claimed_cnt = True + for lat, lon, radi, this_circle, anchor_name, distance, min_delay in circles: + df_anchor = geopandas.GeoDataFrame({'geometry': [this_circle]}) + overlap = geopandas.overlay(proxy_region, df_anchor, how="intersection") + if simple: + if overlap.empty: + claimed_cnt = False + break + else: + # When we wanna do further investigation + if overlap.empty: + aeqd = pyproj.Proj(proj='aeqd', ellps='WGS84', datum='WGS84', + lat_0=lat, lon_0=lon) + wgs_proj = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") ##4326 -- 2d + ## country + azimu_cnt = sh_transform( + functools.partial(pyproj.transform, wgs_proj, aeqd), + proxy_region.geometry.item()) + ## min_distance + azimu_anchor = self._disk(0, 0, radi * 1000) #km ---> m + gap = azimu_anchor.distance(azimu_cnt) / float(1000) #km + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': False, 'extra': gap, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) + else: + ## area + area_cnt = proxy_region['geometry'].area#/10**6 #km/sqr + area_cnt = sum(area_cnt.tolist()) + area_overlap = overlap['geometry'].area#/10**6 #km/sqr + area_overlap = sum(area_overlap.tolist()) + overlapped = area_overlap/area_cnt + results.append({'anchor_name': anchor_name, 'distance': distance, 'proxy_name': self.proxy_id, + 'min_delay': min_delay, 'truth': True, 'extra': overlapped, 'anchor_gps': (lat, lon), + 'anchor_ip': anchors[anchor_name]['ip_v4'], 'radius': radi, 'proxy_ip': self.ip, + 'anchor_cnt': (anchors[anchor_name]['city'], anchors[anchor_name]['country']), + 'proxy_country': self.iso}) + if not simple: + ping_filename = self.vpn_provider + '_' + self.proxy_id + '_' + str(time.time) + pickle_path = os.path.join(self.path, 'sanity/'+self.vpn_provider) + if not os.path.exists(pickle_path): + os.makedirs(pickle_path) + with open(os.path.join(pickle_path, ping_filename+'.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerow(('proxy_name','proxy_ip','proxy_country','truth','extra', + 'anchor_name','anchor_ip','anchor_cnt','anchor_gps','distance','min_delay','radius')) + for this in results: + writer.writerow((this['proxy_name'],this['proxy_ip'],this['proxy_country'], + this['truth'],this['extra'], + this['anchor_name'],this['anchor_ip'],this['anchor_cnt'], + this['anchor_gps'],this['distance'],this['min_delay'],this['radius'])) + with open(os.path.join(pickle_path, ping_filename), 'w') as f: + pickle.dump(results, f) + # logging.info("Pickle file successfully created.") + claimed_cnt = self.is_valid(results) + return claimed_cnt + + def is_valid(self, results): + """ + Need reasonable threshold to answer the validation of location + For now, we say it is valid if 90% of 30 nearest anchors are True + """ + # logging.info("checking validation") + total = 0 + count_valid = 0 + limit = 30 + for this in results: + valid = this['truth'] + aux = this['extra'] + total += 1 + if valid: + count_valid += 1 + if total == limit: + break + frac = count_valid/float(limit) + if frac >= 0.9: + return True + else: + return False diff --git a/centinel/vpn/hma.py b/centinel/vpn/hma.py index ed19267..d6be42e 100644 --- a/centinel/vpn/hma.py +++ b/centinel/vpn/hma.py @@ -5,56 +5,245 @@ import os import requests import sys +import shutil +import logging +import socket +import zipfile +import urllib2 +import pickle +import hashlib +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ -def create_config_files(directory): - """Create all available VPN configuration files in the given directory + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() - Note: I am basically just following along with what their script - client does +def unzip(source_filename, dest_dir): + with zipfile.ZipFile(source_filename) as zf: + zf.extractall(dest_dir) + + +def update_config_files(directory): """ - # get the config file template - template_url = ("https://securenetconnection.com/vpnconfig/" - "openvpn-template.ovpn") - resp = requests.get(template_url) - resp.raise_for_status() - template = resp.content - - # get the available servers and create a config file for each server - server_url = ("https://securenetconnection.com/vpnconfig/" - "servers-cli.php") - resp = requests.get(server_url) - resp.raise_for_status() - servers = resp.content.split("\n") + Update directory for vpn walker + :param directory: + :return a list of delete, update and added vps: + """ + updated_vpn_path = os.path.join(directory, '../updated_vpns') + print(updated_vpn_path) + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + + logging.info("Update HMA Configs") + + # read python dict back from the file + pkl_file = open(os.path.join(directory,'../config_hash.pkl'), 'rb') + old_config_dict = pickle.load(pkl_file) + pkl_file.close() + + config_zip_url = "https://hidemyass.com/vpn-config/vpn-configs.zip" + + logging.info("Starting to download hma config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../vpn-configs.zip') + + with open(zip_path,'w') as f: + f.write(zip_content) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + orig_path = os.path.join(directory, '../TCP') + config_dict = {} + server_country = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('.')[0] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../TCP')) + shutil.rmtree(os.path.join(directory, '../UDP')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(updated_vpn_path): + file_path = os.path.join(updated_vpn_path, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + message = hash_file(file_path) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): +# print('vpn update'+ str(vp)) + update_list.append(vp) + else: +# print('no update needed') + continue + if found_vpn_flag == 0: + delete_list.append(vp) + # new additions + add_list = [] + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ' , add_list) + print('vp\'s to be deleted: ' , delete_list) + print('vp\'s to be updated: ', update_list) + +# print(new_config_dict) + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') + + + return [delete_list, update_list, add_list] + + +def create_config_files(directory): + """ + Initialize directory ready for vpn walker + :param directory: the path where you want this to happen + :return: + """ + config_zip_url = "https://hidemyass.com/vpn-config/vpn-configs.zip" + if not os.path.exists(directory): - os.makedirs(directory) - with open(os.path.join(directory, "servers.txt"), 'w') as f: - f.write(resp.content) - - for server_line in servers: - if server_line.strip() == "": - continue - server_line = server_line.split("|") - try: - ip, desc, country, udp_sup, tcp_sup = server_line - except ValueError: - ip, desc, country, udp_sup, tcp_sup, no_rand = server_line - with open(os.path.join(directory, ip + ".ovpn"), 'w') as file_o: - file_o.write(template) - # create tcp if available, else udp - tcp_sup = tcp_sup.strip() - if tcp_sup: - port, proto = 443, "tcp" - else: - port, proto = 53, "udp" - file_o.write("remote {0} {1}\n".format(ip, port)) - file_o.write("proto {0}\n".format(proto)) - # add automatic dns server update - file_o.write("up /etc/openvpn/update-resolv-conf\n") - file_o.write("down /etc/openvpn/update-resolv-conf\n") + os.makedirs(directory) + + logging.info("Starting to download hma config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../vpn-configs.zip') + + with open(zip_path,'w') as f: + f.write(zip_content) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + ca_url = "https://vpn.hidemyass.com/vpn-config/keys/ca.crt" + hmauserauth_url = "https://vpn.hidemyass.com/vpn-config/keys/hmauser.crt" + hmauserkey_url = "https://vpn.hidemyass.com/vpn-config/keys/hmauser.key" + + ca_response = urllib2.urlopen(ca_url) + ca_content = ca_response.read() + with open(os.path.join(directory, '../ca.crt'), 'w') as f: + f.write(ca_content) + + response_userauth = urllib2.urlopen(hmauserauth_url) + userauth_content = response_userauth.read() + with open(os.path.join(directory, '../hmauser.key'), 'w') as f: + f.write(userauth_content) + + response_userkey = urllib2.urlopen(hmauserkey_url) + userkey_content = response_userkey.read() + with open(os.path.join(directory, '../hmauser.key'), 'w') as f: + f.write(userkey_content) + + # remove zip file + os.remove(zip_path) + + + # move all config files to /vpns + orig_path = os.path.join(directory, '../TCP') + config_dict = {} + server_country = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('.')[0] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../TCP')) + shutil.rmtree(os.path.join(directory, '../UDP')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + # print(config_dict) + message = hash_file(file_path) + config_dict[filename] = message + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') if __name__ == "__main__": if len(sys.argv) != 2: diff --git a/centinel/vpn/ipvanish.py b/centinel/vpn/ipvanish.py index 632360d..d2f021a 100644 --- a/centinel/vpn/ipvanish.py +++ b/centinel/vpn/ipvanish.py @@ -6,12 +6,141 @@ import sys import urllib import zipfile +import urllib2 +import pickle +import hashlib + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(dest_dir) +def update_config_files(directory): + """ + Update directory for vpn walker + :param directory: + :return a list of delete, update and added vps: + """ + updated_vpn_path = os.path.join(directory, '../updated_vpns') + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + + logging.info("Update Ipvanish Configs") + + # read python dict back from file + pkl_file = open(os.path.join(directory, '../config_hash.pkl'), 'rb') + old_config_dict = pickle.load(pkl_file) + pkl_file.close() + + + config_zip_url = "http://www.ipvanish.com/software/configs/configs.zip" + + logging.info("Starting to download IPVanish config file zip") + + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() + zip_path = os.path.join(directory, '../configs.zip') + unzip_path = os.path.join(directory, '../unzipped') + + if not os.path.exists(unzip_path): + os.makedirs(unzip_path) + with open(zip_path, 'w') as f: + f.write(zip_content) + + logging.info("Extracting zip file") + unzip(zip_path, unzip_path) + + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + for filename in os.listdir(unzip_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[1] + + file_path = os.path.join(unzip_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + # get ip address for this vpn + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + else: + logging.warn("Unable to resolve hostname and remove %s" % filename) + os.remove(file_path) + + for filename in os.listdir(updated_vpn_path): + file_path = os.path.join(updated_vpn_path, filename) + message = hash_file(file_path) + # print(filename, message) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): + update_list.append(vp) + else: + continue + if found_vpn_flag == 0: + delete_list.append(vp) + # new additions + add_list = [] + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ', add_list) + print('vp\'s tp be deleted: ', delete_list) + print('vp\'s to be updated: ', update_list) + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') + + # remove extracted folder + shutil.rmtree(unzip_path) + + + return [delete_list, update_list, add_list] + def create_config_files(directory): """ @@ -26,24 +155,28 @@ def create_config_files(directory): os.makedirs(directory) logging.info("Starting to download IPVanish config file zip") - url_opener = urllib.URLopener() + zip_response = urllib2.urlopen(config_zip_url) + zip_content = zip_response.read() zip_path = os.path.join(directory, '../configs.zip') unzip_path = os.path.join(directory, '../unzipped') + if not os.path.exists(unzip_path): os.makedirs(unzip_path) + with open(zip_path, 'w') as f: + f.write(zip_content) - url_opener.retrieve(config_zip_url, zip_path) logging.info("Extracting zip file") unzip(zip_path, unzip_path) # remove zip file os.remove(zip_path) + # copy ca and key to root path shutil.copyfile(os.path.join(unzip_path, 'ca.ipvanish.com.crt'), os.path.join(directory, '../ca.ipvanish.com.crt')) # move all config files to /vpns - + config_dict = {} server_country = {} for filename in os.listdir(unzip_path): if filename.endswith('.ovpn'): @@ -53,24 +186,39 @@ def create_config_files(directory): lines = [line.rstrip('\n') for line in open(file_path)] # get ip address for this vpn - ip = "" + hostname = "" for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] - ip = socket.gethostbyname(hostname) - break - - if len(ip) > 0: - new_path = os.path.join(directory, ip + '.ovpn') + # added because gethostbyname will fail on some hostnames + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') shutil.copyfile(file_path, new_path) - server_country[ip] = country + server_country[hostname] = country else: logging.warn("Unable to resolve hostname and remove %s" % filename) os.remove(file_path) + # writing pickle file of ovpn configs + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + message = hash_file(file_path) + # print filename, message + config_dict[filename] = message + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() with open(os.path.join(directory, 'servers.txt'), 'w') as f: - for ip in server_country: - f.write('|'.join([ip, server_country[ip]]) + '\n') + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') # remove extracted folder shutil.rmtree(unzip_path) diff --git a/centinel/vpn/probe.py b/centinel/vpn/probe.py new file mode 100644 index 0000000..e8eb278 --- /dev/null +++ b/centinel/vpn/probe.py @@ -0,0 +1,170 @@ +import os +import time +import json +import csv +import pickle +import socket +import logging +import requests +import subprocess +import multiprocessing as mp +from urlparse import urljoin +import datetime +import country_module as convertor +import centinel.backend +import centinel.vpn.openvpn as openvpn + +def retrieve_anchor_list(directory): + """ Retrieve anchor lists with RIPE API + """ + logging.info("Starting to fetch RIPE anchors") + s_time = time.time() + BASE_URL = 'https://atlas.ripe.net/api/v2' + query_url = BASE_URL + '/anchors/' + anchors = dict() + while True: + resp = requests.get(query_url) + resp = resp.json() + for this in resp['results']: + assert this['geometry']['type'] == "Point" + anchor_name = this['fqdn'].split('.')[0].strip() + anchors[anchor_name] = {'aid': this["id"], + 'pid': this["probe"], + 'ip_v4': this["ip_v4"], + 'asn_v4': this["as_v4"], + 'longitude': this["geometry"]["coordinates"][0], + 'latitude': this["geometry"]["coordinates"][1], + 'country': this["country"], + 'city': this["city"]} + next_url = resp.get("next") + if next_url is None: + break + query_url = urljoin(query_url, next_url) + e_time = time.time() + logging.info("Finishing to fetch RIPE anchors (%s sec)" %(e_time-s_time)) + current_time = datetime.date.today().strftime("%Y-%m-%d") + landmark_path = os.path.join(directory, "landmarks_list_" + str(current_time) + ".pickle") + with open(landmark_path, "w") as f: + pickle.dump(anchors, f) + return anchors + +def send_ping(param): + this_host, ip = param + logging.info("Pinging (%s, %s)" % (this_host, ip)) + times = dict() + ping = subprocess.Popen(["ping", "-c", "10", "-i", "0.3", ip], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, error = ping.communicate() + output = out.split('\n') + this_delays = list() + for i in output: + try: + this_delays.append(float(i.split('time=')[1].split(' ms')[0])) + except: + continue + times[this_host] = this_delays + return times + +def perform_probe(fname, vpn_provider, proxy_ip, hostname, target_cnt, anchors): + """Send ping 10 times to landmarks and choose the minimum + :return: times [host] = list() + """ + logging.info("Start Probing [%s(%s)]" %(hostname, proxy_ip)) + # ping from local to vpn + vp_ping = send_ping((hostname, proxy_ip)) + vp_min = min(vp_ping[hostname]) + # get to others + times = dict() + s_time = time.time() + results = [] + process_num = 25 + pool = mp.Pool(processes=process_num) + results.append(pool.map(send_ping, [(this_host, Param['ip_v4']) for this_host, Param in anchors.iteritems()])) + _sum = 0 + _total = 0 + for output in results[0]: + _total += 1 + for key, value in output.iteritems(): + _sum += len(value) + if key not in times: + times[key] = list() + for this in value: + times[key].append(this) + e_time = time.time() + logging.info("Finish Probing [%s(%s)]: average succeeded pings=%.2f/10 (%.2fsec)" + %(hostname, proxy_ip, _sum/float(_total), e_time - s_time)) + pool.close() + pool.join() + # store results + # store as csv file: "vpn_provider, vp_name, vp_ip, vpn_cnt, all_keys()" + keys = sorted(anchors.keys()) + with open(fname, "a") as csv_file: + writer = csv.writer(csv_file) + line = [vpn_provider, hostname, proxy_ip, target_cnt, e_time-s_time, vp_min] + for this_anchor in keys: + if len(times[this_anchor]) > 0: + the_ping = min(times[this_anchor]) + else: + the_ping = None + line.append(the_ping) + writer.writerow(line) + +def start_probe(conf_list, conf_dir, vpn_dir, auth_file, crt_file, tls_auth, + key_direction, sanity_path, vpn_provider, anchors): + """ Run vpn_walk to get pings from proxy to anchors + """ + start_time = time.time() + ping_path = os.path.join(sanity_path, 'pings') + if not os.path.exists(ping_path): + os.makedirs(ping_path) + current_time = datetime.date.today().strftime("%Y-%m-%d") + fname = os.path.join(ping_path, 'pings_' + vpn_provider + '_' + str(current_time) + '.csv') + keys = sorted(anchors.keys()) + with open(fname, "w") as f: + writer = csv.writer(f) + line = ['vpn_provider', 'vp_name', 'vp_ip', 'vpn_cnt', 'time_taken', 'ping_to_vp'] + for k2 in keys: + line.append(k2) + writer.writerow(line) + + for filename in conf_list: + centinel_config = os.path.join(conf_dir, filename) + config = centinel.config.Configuration() + config.parse_config(centinel_config) + # get ip address of hostnames + hostname = os.path.splitext(filename)[0] + try: + vp_ip = socket.gethostbyname(hostname) + except Exception as exp: + logging.exception("Failed to resolve %s : %s" % (hostname, str(exp))) + continue + # get country for this vpn + with open(centinel_config) as fc: + json_data = json.load(fc) + country = "" + if 'country' in json_data: + country = json_data['country'] + if (len(country) > 2): + country = convertor.country_to_a2(country) + # start openvpn + vpn_config = os.path.join(vpn_dir, filename) + logging.info("%s: Starting VPN. (%s)" %(filename, country)) + vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + vpn.start() + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) + vpn.stop() + time.sleep(5) + continue + # sending ping to the anchors + try: + perform_probe(fname, vpn_provider, vp_ip, hostname, country, anchors) + except: + logging.warning("Failed to send pings from %s" % vp_ip) + logging.info("%s: Stopping VPN." % filename) + vpn.stop() + time.sleep(5) + end_time = time.time() - start_time + logging.info("Finished all probing: %.2fsec" %(end_time)) \ No newline at end of file diff --git a/centinel/vpn/purevpn.py b/centinel/vpn/purevpn.py index 756a82f..fef0f55 100644 --- a/centinel/vpn/purevpn.py +++ b/centinel/vpn/purevpn.py @@ -5,12 +5,137 @@ import sys import urllib import zipfile +import pickle +import hashlib + + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(dest_dir) +def update_config_files(directory): + """ + Update directory for vpn walker + :param directory: + :return a list of delete, update and added vps: + """ + updated_vpn_path = os.path.join(directory, '../updated_vpns') + if not os.path.exists(updated_vpn_path): + os.makedirs(updated_vpn_path) + + logging.info("Update Purevpn Confgis") + + # read python dict back from file + pkl_file = open(os.path.join(directory, '../config_hash.pkl'), 'rb') + old_config_dict = pickle.load(pkl_file) + pkl_file.close() + + config_zip_url = "https://s3-us-west-1.amazonaws.com/heartbleed/linux/linux-files.zip" + logging.info("Startin to download Purevpn config file zip") + + url_opener = urllib.URLopener() + zip_path = os.path.join(directory, '../linux_files.zip') + url_opener.retrieve(config_zip_url, zip_path) + logging.info("Extracting zip file") + unzip(zip_path, os.path.join(directory, '../')) + + # remove zip file + os.remove(zip_path) + + server_country = {} + new_config_dict = {} + + orig_path = os.path.join(directory, '../Linux OpenVPN Updated files/TCP') + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[0] + if '(V)' in country: + country = country[:country.find('(V)')] + + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + # get ip address for this vpn + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + + if len(hostname) > 0: + new_path = os.path.join(updated_vpn_path, hostname + '.ovpn') + # shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # remove extracted folder + shutil.rmtree(os.path.join(directory, '../Linux OpenVPN Updated files')) + + # add dns update options to each file + logging.info("Appending DNS update options") + for filename in os.listdir(updated_vpn_path): + file_path = os.path.join(updated_vpn_path, filename) + # with open(file_path, 'a') as f: + # f.write("\n") + # f.write("up /etc/openvpn/update-resolv-conf\n") + # f.write("down /etc/openvpn/update-resolv-conf\n") + message = hash_file(file_path) + new_config_dict[filename] = message + + delete_list = [] + update_list = [] + # delete and update + for vp in old_config_dict: + found_vpn_flag = 0 + for newvp in new_config_dict: + if(vp == newvp): + found_vpn_flag = 1 + if(old_config_dict[vp] != new_config_dict[newvp]): + update_list.append(vp) + else: + continue + if found_vpn_flag == 0: + delete_list.append(vp) + + # new additions + add_list = [] + add_list.extend((set(new_config_dict.keys()) - set(old_config_dict.keys()))) + print('vp\'s to be added: ', add_list) + print('vp\'s to be deleted: ', delete_list) + print('vp\'s to be updated: ', update_list) + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(new_config_dict, output) + output.close() + + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') + + return [delete_list, update_list, add_list] def create_config_files(directory): """ @@ -33,6 +158,7 @@ def create_config_files(directory): # remove zip file os.remove(zip_path) + # copy ca and key to root path shutil.copyfile(os.path.join(directory, '../Linux OpenVPN Updated files', 'ca.crt'), os.path.join(directory, '../ca.crt')) @@ -40,7 +166,7 @@ def create_config_files(directory): os.path.join(directory, '../Wdc.key')) # move all config files to /vpns orig_path = os.path.join(directory, '../Linux OpenVPN Updated files/TCP') - + config_dict = {} server_country = {} for filename in os.listdir(orig_path): if filename.endswith('.ovpn'): @@ -52,17 +178,22 @@ def create_config_files(directory): lines = [line.rstrip('\n') for line in open(file_path)] # get ip address for this vpn - ip = "" + hostname = "" for line in lines: if line.startswith('remote'): hostname = line.split(' ')[1] - ip = socket.gethostbyname(hostname) - break - - if len(ip) > 0: - new_path = os.path.join(directory, ip + '.ovpn') + # added because gethostbyname will fail on some hostnames + # try: + # ip = socket.gethostbyname(hostname) + # break + # except socket.gaierror: + # logging.exception("Failed to resolve %s" %hostname) + # continue + + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') shutil.copyfile(file_path, new_path) - server_country[ip] = country + server_country[hostname] = country # remove extracted folder shutil.rmtree(os.path.join(directory, '../Linux OpenVPN Updated files')) @@ -75,11 +206,17 @@ def create_config_files(directory): f.write("\n") f.write("up /etc/openvpn/update-resolv-conf\n") f.write("down /etc/openvpn/update-resolv-conf\n") + message = hash_file(file_path) + config_dict[filename] = message + + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() print os.path.join(directory, 'servers.txt'), len(server_country) with open(os.path.join(directory, 'servers.txt'), 'w') as f: - for ip in server_country: - f.write('|'.join([ip, server_country[ip]]) + '\n') + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n') if __name__ == "__main__": diff --git a/centinel/vpn/totalvpn.py b/centinel/vpn/totalvpn.py new file mode 100644 index 0000000..e69de29 diff --git a/centinel/vpn/tunnelbear.py b/centinel/vpn/tunnelbear.py new file mode 100644 index 0000000..393bd5d --- /dev/null +++ b/centinel/vpn/tunnelbear.py @@ -0,0 +1,74 @@ +#-c /home/katja/project/new_centinel/centinel/vpn_configs/expressvpn --create-expressvpn-configs +import os +import shutil +import logging +import hashlib +import pickle + +def hash_file(filename): + """ + This function returns the SHA-1 hash + of the file passed into it + """ + # make a hash object + h = hashlib.sha1() + + # open file for reading in binary mode + with open(filename,'rb') as file: + + # loop till the end of the file + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + # return the hex representation of digest + return h.hexdigest() + +def create_config_files(directory): + """ + Initialize directory ready for vpn walker + :param directory: the path where you want this to happen + :return: + """ + if not os.path.exists(directory): + os.makedirs(directory) + + #TODO: write a code to download credentials and config files from its site + orig_path = '/nfs/london/data2/shicho/proxy-configs-2018/ovpn.tbear-split' + server_country = {} + config_dict = {} + for filename in os.listdir(orig_path): + if filename.endswith('.ovpn'): + country = filename.split('-')[1] + file_path = os.path.join(orig_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + hostname = "" + for line in lines: + if line.startswith('remote'): + hostname = line.split(' ')[1] + if len(hostname) > 0: + new_path = os.path.join(directory, hostname + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[hostname] = country + + # add dns server + logging.info("Appending DNS update options") + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + with open(file_path, 'a') as f: + f.write('\n') + f.write('up /etc/openvpn/update-resolv-conf\n') + f.write('down /etc/openvpn/update-resolv-conf\n') + message = hash_file(file_path) + config_dict[filename] = message + output = open(os.path.join(directory, '../config_hash.pkl'), 'wb') + pickle.dump(config_dict, output) + output.close() + + print os.path.join(directory, 'servers.txt'), len(server_country) + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for hostname in server_country: + f.write('|'.join([hostname, server_country[hostname]]) + '\n')