In [1]:
import geohash
import pandas as pd
import configparser
import googlemaps
import time
import json
import os
import networkx as nx
from networkx.readwrite import json_graph
from bs4 import BeautifulSoup
import requests
import csv
from math import radians, cos, sin, asin, sqrt
from dateutil.parser import parse

In [2]:
config = configparser.ConfigParser()
config.read("config.ini")
API_key = config['Keys']['google_API']
gmaps = googlemaps.Client(key=API_key)

In [3]:
def get_geohash_directions(gh_A,gh_B):   
    GPS_A = geohash.decode(gh_A)
    GPS_B = geohash.decode(gh_B) 
    directions_result = gmaps.directions(GPS_A,
                                         GPS_B,
                                         mode="driving")
    time.sleep(1)
    return ({'distance':directions_result[0]['legs'][0]['distance']['value'],
             'steps':len(directions_result[0]['legs'][0]['steps'])})

In [4]:
def load_pop_dict():
    with open('populations.json', 'r') as f:
        p_dict = json.load(f)
    return p_dict

In [43]:
GEOHASH_PRECISION = 2
MAX_RANGE = 346 #Base Tesla Model 3 range (346 km/215 miles)
POP_DICT = load_pop_dict()
def build_connections(G,src_hash):
    print (nx.number_of_nodes(G))
    connections = {}
    node_hashes = ([node for node in G
                  if node[0:GEOHASH_PRECISION] in geohash.expand(src_hash[0:GEOHASH_PRECISION])
                  and node != src_hash])
    src_GPS = reverse_GPS(geohash.decode(src_hash))
    close_connections = ([{'node':node_gh,
                           'directions':get_geohash_directions(src_hash,node_gh)} for node_gh in node_hashes
                            if haversine(*src_GPS,*reverse_GPS(geohash.decode(node_gh))) <= MAX_RANGE])
    for connection in close_connections:
        if connection['directions']['distance']/1000 <= MAX_RANGE:
            edge_weight = 0
            G.add_edge(src_hash,connection['node'],{'weight':edge_weight,'distance':connection['directions']['distance'],
                                                    'steps':connection['directions']['steps'],
                                                    #gets the indx of last node to be added used to determine
                                                    'first_node':str(min(int(G.node[src_hash]["SC_index"]), #order of connection 
                                                                      int(G.node[connection['node']]["SC_index"]))),
                                                    'second_node':str(max(int(G.node[src_hash]["SC_index"]), #order of connection 
                                                                      int(G.node[connection['node']]["SC_index"]))),                          
                                                    'lon_lat_1':reverse_GPS(geohash.decode(src_hash)),
                                                    'lon_lat_2':reverse_GPS(geohash.decode(connection['node']))})
    return G

In [44]:
def get_edge_weight(G,src_hash,connection_hash):
    try:
        pop1 = get_close_population(src_hash)
        pop2 = get_close_population(connection_hash)
        return (pop1+pop2)/POP_DICT['total']
    except KeyError as e:
        print(e)
        return 0    

In [7]:
def reverse_GPS(GPS):
    return [GPS[1],GPS[0]]

In [45]:
def build_network():
    G = nx.Graph()
    df = pd.read_csv("Nov16_SC_data.csv",encoding='latin1',
                     dtype={'Stalls': float,'Zip':str,'Tesla':str,'Elev':str})
    df["lat"], df["lon"] = zip(*df["GPS"].str.split(',').tolist())
    df["lat"], df["lon"] = df["lat"].astype(float), df["lon"].astype(float)
    df['GPS_lon_lat'] = df.apply(lambda x: [x["lon"],x["lat"]], axis=1)
    df['geohash'] = df.apply(lambda x: geohash.encode(x['lat'],x['lon']), axis=1)
    df['SC_data'] = ''
    df['population'] = df.apply(lambda x: get_close_population(x['geohash']), axis=1).astype(float)
    df["Open Date"] = df.apply(lambda x: parse(x["Open Date"]), axis=1)#this is really hackey - eck
    df.sort_values(["Open Date"],inplace=True)
    df["SC_index"] = range(1, len(df) + 1)
    df["SC_index"] = df["SC_index"].astype(str)
    df["Open Date"] = df.apply(lambda x: str(x["Open Date"]), axis=1)
        
    for i in df['geohash'].keys():
        print (str(df["Open Date"][i]) + " " + df['SC_index'][i])
        if df['geohash'][i] in G:
            G.node[df['geohash'][i]] = {key:df[key][i] for key in df.keys()}
        else:
            G.add_node(df['geohash'][i],{key:df[key][i] for key in df.keys()})
            build_connections(G,df['geohash'][i])
    
    network = json_graph.node_link_data(G)
    with open("Nov16_network.json","w") as f:
        json.dump(network,f)
    return G

In [55]:
build_network()

2012-11-19 00:00:00 1
1
2012-11-19 00:00:00 2
2
2012-11-19 00:00:00 3
3
2012-11-19 00:00:00 4
4
2012-11-19 00:00:00 5
5
2012-12-16 00:00:00 6
6
2012-12-16 00:00:00 7
7
2012-12-21 00:00:00 8
8
2013-06-19 00:00:00 9
9
2013-06-19 00:00:00 10
10
2013-06-26 00:00:00 11
11
2013-07-03 00:00:00 12
12
2013-07-13 00:00:00 13
13
2013-07-13 00:00:00 14
14
2013-07-17 00:00:00 15
15
2013-07-24 00:00:00 16
16
2013-07-25 00:00:00 17
17
2013-08-16 00:00:00 18
18
2013-08-20 00:00:00 19
19
2013-08-28 00:00:00 20
20
2013-09-07 00:00:00 21
21
2013-09-12 00:00:00 22
22
2013-09-13 00:00:00 23
23
2013-09-17 00:00:00 24
24
2013-10-03 00:00:00 25
25
2013-10-07 00:00:00 26
26
2013-10-16 00:00:00 27
27
2013-10-18 00:00:00 28
28
2013-10-22 00:00:00 29
29
2013-10-23 00:00:00 30
30
2013-10-24 00:00:00 31
31
2013-11-02 00:00:00 32
32
2013-11-08 00:00:00 33
33
2013-11-14 00:00:00 34
34
2013-11-15 00:00:00 35
35
2013-11-19 00:00:00 36
36
2013-11-20 00:00:00 37
37
2013-11-25 00:00:00 38
38
2013-11-26 00:00:00 39
39
2013

<networkx.classes.graph.Graph at 0x257413ca2e8>

In [None]:
def load_network(net):
    if os.path.getsize(net) > 0:
        with open(net,"r") as f:
            data = json.load(f)
            G = json_graph.node_link_graph(data)
    else:
        G=nx.Graph()
    return G

In [None]:
def parse_Tesla_SC_data(URL):
    SC_data = {}
    r = requests.get(URL)
    soup = BeautifulSoup(r.text,"html.parser")
    attr_lists = soup.find_all('p')
    for attr in attr_lists:
        if attr.find('strong'):
            #probably not ideal, but only way I could get BS to parse 'br' correctly. Better way must exist.
            SC_data[attr.find('strong').text] = [value for value in attr.childGenerator()
                                                 if value.name == None and value != ' ']
            if 'Charging' in SC_data.keys():
                SC_data['Chargers'] = [SC_data['Charging'][0][0]]
            else:
                SC_data['Chargers'] = [0]
    time.sleep(1)
    return SC_data

In [None]:
def google_city_location(city):
    location = gmaps.geocode(city)
    time.sleep(.1)
    return location

In [29]:
def build_pop_dict():
    total_pop = 0
    pop_gps_dict = {}
    for city,pop in POP_DICT.items():
        try:
            geocode_data = google_city_GPS(city)
            city_location = geocode_data[0]['geometry']['location']
            gh = geohash.encode(city_location['lat'],city_location['lng'])
            pop_gps_dict[gh] = ({'city':city,
                                        'population':pop,
                                        'lat':city_location['lat'],
                                        'lon':city_location['lng']})
            total_pop = total_pop + pop
        except IndexError as e:
            print (geocode_data)
            print (city + " not found")
    pop_gps_dict['total'] = total_pop
    with open("populations.json","w") as f:
        json.dump(pop_gps_dict,f)    

In [28]:
def get_close_population(src_hash):#function uses geohash precision of 3 (ie radius of 78km) and sums population within this radius
    total_close_pop = (sum([data['population'] for gh,data in POP_DICT.items()
                    if gh[0:3] in geohash.expand(src_hash[0:3])]))
    return total_close_pop

In [31]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    return km

In [56]:
import network_simulating as ns
ref_network = load_network("Nov16_network.json")
model_network = load_network("network.json")
ns.network_similarity_score(ref_network,model_network)

(0.8621700879765389,
 ['drtjjxb51j1m',
  'drb64sf3c0zg',
  'dnrmnwmxw6xg',
  '9r155k3u7n6p',
  '9xj6r2fgejem',
  '9q9j94z124dt',
  'dr7342hfkf4s',
  'dr5j2upqczh5',
  'c27u43qb61c3',
  'dq0rjfx76y5r',
  'dj3qdb38y5pu',
  'dq8ym3d6djes',
  '9vmey1fu04hj',
  'dpehjss14crf',
  '9ygyhgv2gw1x',
  'dp4mbt9n5c9f',
  '9wfy2sk8pv3y',
  '9q5bf6r884m5',
  'djjtc8unw1kf',
  'dp6tqdrm3xp2',
  'c21431kn9xeu',
  'dqc5c46e0fyv',
  'dr7342hfkf4s',
  '9qk4z15rwvbw',
  '9qeqhugxq5w5',
  'dj6yb2e9pczt',
  'drguywrcdhgu',
  'dqbmckshk5ec',
  '9my6pv8yv83w',
  'dr7342hfkf4s',
  'dr5j2upqczh5',
  '9y28quc6gbuy',
  'drguywrcdhgu',
  'dq2sss9thmwx',
  '9q5bf6r884m5',
  'dp0wrn6ejg4h',
  'c84u0tsd4n8f',
  '9wf57n42sz1e',
  'drk4dss6r30b',
  'dhvqywmxpuqv',
  '9qfupsk0en7s',
  '9ydvcnkpj2wx',
  'dr7342hfkf4s',
  'dphfvg7jc6sp',
  '9vdgwjr7bu1x',
  '9rbdb0yzg1hj',
  '9qx5nymnbb0r',
  '9y3f4vtgk966',
  '9whptzqutbfu',
  '9wyr2h9tw6ey',
  'dns5mfy1t1re',
  '9yuyhe4mhxpy',
  '9qqbhq7ur59f',
  '9q5bf6r884m5',
  '9qbq

In [57]:
(0.8621700879765389*342)-280

14.862170087976324

In [58]:
14/(342-280)

0.22580645161290322

In [53]:
26/91

0.2857142857142857