In [4]:
import geohash
import pandas as pd
import configparser
import googlemaps
import time
import json
import os
import networkx as nx
from networkx.readwrite import json_graph
from bs4 import BeautifulSoup
import requests
import csv
from math import radians, cos, sin, asin, sqrt

In [5]:
config = configparser.ConfigParser()
config.read("config.ini")
API_key = config['Keys']['google_API']
gmaps = googlemaps.Client(key=API_key)

In [6]:
def get_geohash_distance(gh_A,gh_B):   
    GPS_A = geohash.decode(gh_A)
    GPS_B = geohash.decode(gh_B) 
    directions_result = gmaps.directions(GPS_A,
                                         GPS_B,
                                         mode="driving")
    time.sleep(1)
    return directions_result[0]['legs'][0]['distance']['value'] 

In [8]:
def load_pop_dict():
    with open('populations.json', 'r') as f:
        p_dict = json.load(f)
    return p_dict

In [9]:
GEOHASH_PRECISION = 2
MAX_RANGE = 346 #Base Tesla Model 3 range (346 km/215 miles)
POP_DICT = load_pop_dict()
def build_connections(G,src_hash):
    print (nx.number_of_nodes(G))
    connections = {}
    node_hashes = ([node for node in G
                  if node[0:GEOHASH_PRECISION] in geohash.expand(src_hash[0:GEOHASH_PRECISION])
                  and node != src_hash])
    src_GPS = reverse_GPS(geohash.decode(src_hash))
    close_connections = ([{'node':node_gh,
                           'distance':get_geohash_distance(src_hash,node_gh)} for node_gh in node_hashes
                            if haversine(*src_GPS,*reverse_GPS(geohash.decode(node_gh))) <= MAX_RANGE])
    for connection in close_connections:
        if connection['distance']/1000 <= MAX_RANGE:
            edge_weight = get_edge_weight(G,src_hash,connection['node'])
            G.add_edge(src_hash,connection['node'],{'weight':edge_weight,'distance':connection['distance'],
                                                    'lon_lat_1':reverse_GPS(geohash.decode(src_hash)),
                                                    'lon_lat_2':reverse_GPS(geohash.decode(connection['node']))})
    return G

In [10]:
def get_edge_weight(G,src_hash,connection_hash):
    try:
        pop1 = get_closest_population(src_hash,8)
        pop2 = get_closest_population(connection_hash,8)
        return (pop1+pop2)/POP_DICT['total']
    except KeyError as e:
        print(e)
        return 0    

In [11]:
def reverse_GPS(GPS):
    return [GPS[1],GPS[0]]

In [12]:
def build_network():
    G = load_network()
    df = pd.read_csv("Teslarati_SC_data.csv",dtype={'Stalls': float,'Zip':str,'Tesla':str,'Elev':str})
    df["lat"], df["lon"] = zip(*df["GPS"].str.split(',').tolist())
    df["lat"], df["lon"] = df["lat"].astype(float), df["lon"].astype(float)
    df['GPS_lon_lat'] = df.apply(lambda x: [x["lon"],x["lat"]], axis=1)
    df['geohash'] = df.apply(lambda x: geohash.encode(x['lat'],x['lon']), axis=1)
    df['population'] = df.apply(lambda x: get_closest_population(x['geohash'],8), axis=1)
    
    for i in df['geohash'].keys():
        if df['geohash'][i] not in G:
            df['SC_data'][i] = parse_Tesla_SC_data(df['Tesla'][i])
            G.add_node(df['geohash'][i],{key:df[key][i] for key in df.keys()})
            build_connections(G,df['geohash'][i])
    
    network = json_graph.node_link_data(G)
    with open("network.json","w") as f:
        json.dump(network,f)
    return G

In [23]:
test_g = build_network()

In [14]:
def load_network():
    if os.path.getsize("network.json") > 0:
        with open("network.json","r") as f:
            data = json.load(f)
            G = json_graph.node_link_graph(data)
    else:
        G=nx.Graph()
    return G

In [24]:
def parse_Tesla_SC_data(URL):
    SC_data = {}
    r = requests.get(URL)
    soup = BeautifulSoup(r.text,"html.parser")
    attr_lists = soup.find_all('p')
    for attr in attr_lists:
        if attr.find('strong'):
            #probably not ideal, but only way I could get BS to parse 'br' correctly. Better way must exist.
            SC_data[attr.find('strong').text] = [value for value in attr.childGenerator()
                                                 if value.name == None and value != ' ']
            SC_data['Chargers'] = SC_data['Charging'][0][0]
    time.sleep(1)
    return SC_data

In [17]:
def google_city_location(city):
    location = gmaps.geocode(city)
    time.sleep(.1)
    return location

In [18]:
def build_pop_dict():
    total_pop = 0
    pop_gps_dict = {}
    for city,pop in POP_DICT.items():
        try:
            geocode_data = google_city_GPS(city)
            city_location = geocode_data[0]['geometry']['location']
            gh = geohash.encode(city_location['lat'],city_location['lng'])
            pop_gps_dict[gh] = ({'city':city,
                                        'population':pop,
                                        'lat':city_location['lat'],
                                        'lon':city_location['lng']})
            total_pop = total_pop + pop
        except IndexError as e:
            print (geocode_data)
            print (city + " not found")
    pop_gps_dict['total'] = total_pop
    with open("populations.json","w") as f:
        json.dump(pop_gps_dict,f)    

In [19]:
def get_closest_population(src_hash,precision):
    close_gh = ([gh for gh in list(POP_DICT.keys())
                    if gh[0:precision] in geohash.expand(src_hash[0:precision])])
    if close_gh:
        if len(close_gh) == 1:
            return POP_DICT[close_gh[0]]['population']
        else:
            src_GPS = reverse_GPS(geohash.decode(src_hash))
            gh_distances = [(gh,haversine(*src_GPS,*reverse_GPS(geohash.decode(gh)))) for gh in close_gh]
            closest_gh = sorted(gh_distances,key=lambda tup: tup[1])[0][0]
            return POP_DICT[closest_gh]['population']
    else:
        return get_closest_population(src_hash,precision-1)

In [21]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    return km

In [22]:
get_closest_population('dnq2u3y4b',8)

10081