In [1]:
#
#
#   This file gives a demonstration of how Points of Interest were determined for locations of electric vehicle chargers.
#   
#   Before writing this program, each charging location's latitude, longitude, and name was passed into Google Places.
#   The best place from the places returned from Google Places was determined via name similarity. The "Types" (i.e. POI)
#   for the place was then attached to the charging location. These locations with this additional data were then saved
#   into the file 'data_with_types_and_access.pickle' and are loaded in at the beginning of this program.
#
#   This program has two main components: the locations with Types/POI data from Google Places, 
#   and filter commands (saved in filterCommands.txt). filterCommands.txt contains a python dictionary with various POIs
#   and has different keywords that can be given. Multiple keywords can be used together to increase/reduce the number of results
#   that get filtered. These can also be used in conjunction with the Types/POI data from Google Places.
#   
#
#   After running this program, all location POIs that were not found but had an original POI from PlugShare were
#   defaulted to their PlugShare POI. Then, all non-US locations were filtered out, and the remaining location POIs
#   were identified by hand by looking up the latitude and longitude in google maps, as well as looking up the name
#   of the location. All POIs were then analyzed a second time by a human in excel to account for small errors that may have
#   been made.
#
#   After filling in remaining POIs, POIs were then grouped into larger categories for analysis
#


#Import required libraries
import pandas as pd
import numpy as np
from googleplaces import GooglePlaces
import itertools
import pickle
import requests
import json
import re

In [2]:
#Load in the location data array with data from google places API
with open('data_with_types_and_access.pickle', 'rb') as handle:
    locationArray = pickle.load(handle)

print("Number of locations in array: " + str(len(locationArray)))

Number of locations in array: 20333


In [3]:
############################################
#         Helper Functions                 #
############################################

#A more advanced string checker to account for words that may be at the beginning or end of a string
#that also avoids picking up substrings of words
def containsWord(s, w):
    return (' ' + w + ' ') in (' ' + s + ' ')


#Determine restriction level (public/priate) and Google Places categories
def parseOptions(options):
    printTrue = False
    restrictionLevel = ""
    categoryList = list()
    if "PRINT" in options:
        printTrue = True
    for option in options:
        if "Gtype" in option:
            category = option[6:] #indexing could be off
            categoryList.append(category)
        elif "pubRstr" in option:
            restrictionLevel = option[8:]
    return [printTrue, restrictionLevel, categoryList]


#properOptions() - Checks the options for the given query and see if the current item qualifies.

#locationTypes - List of the types the location is, 
#GtypeList - master list of all Gtypes allowed
#Question, if a location has no types, (returned null earlier)
#Should we not include it at all if there are  "-" attributes
def properOptions(locationData, restrictionLevel, GtypeList):
    #Check the restriction level (i.e. public or private)
    if restrictionLevel == "P":
        if locationData['real']['accessType'] != "Public":
            return False
    elif restrictionLevel == "R":
        if locationData['real']['accessType'] != "Restricted":
            return False
    
    #Set up locationTypes variable. It holds all of the types that the item identifies as.
    locationTypes = None
    if locationData['placeSpecifics'] == -1:
        locationTypes = list()
    else:
        locationTypes = locationData['placeSpecifics']['types']

    badType = list()
    goodType = list()
    #Collect the options data
    for Gtype in GtypeList:
        if Gtype[0] == "-":
            badType.append(Gtype[1:])
        else:
            goodType.append(Gtype)

    #Figure out if any types for the item are in the good or bad lists
    for locType in locationTypes:
        if locType in badType:
            return False
        if locType in goodType:
            return True
    
    #If there are no included categories, then it
    #is implied that all categories (besides the "-" ones)
    #are included
    if len(goodType) == 0:
        return True

    return False


In [4]:
#Takes filter instructions (POIs) and applies them to all places found in locationArray
def fillInPOIs(filterCommands, locationArray):
    #For every POI listed
    for POI in filterCommands.keys():
        #Loop through each filter for the specific POI
        for POIindex, optionKeyWords in enumerate(filterCommands[POI]):
            POIcount = 0
            printTrue, restrictionLevel, GtypeList = parseOptions(optionKeyWords['options'])
            keywords = optionKeyWords['keyWords']
            for locationData in locationArray:
                #If the location was already assigned to a POI, skip it
                if locationData['POI'] != -1:
                    continue
                
                #Check options from filter. If current item doesn't fit options, skip it
                if not properOptions(locationData, restrictionLevel, GtypeList):
                    continue
                    
                #easier reading and remove tricky characters (',', '/', turn all to lower)
                realName = locationData['real']['name'].lower().replace(",", "").replace("/", " ")

                #Check the keywords for this POI
                for keyword in keywords:
                    
                    #If we want to exclude this keyword
                    if keyword[0] == "-":
                        keyword = keyword[1:]
                        if containsWord(realName, keyword):
                            locationData['POI'] = -1
                            break
                            
                    #If the keyword is "ALL", then we accept it no matter what its name is
                    elif keyword == "ALL":
                        POIcount += 1
                        if printTrue == True:
                            print(realName)                
                        locationData['POI'] = POI
                        
                    #If we want to include the key word
                    elif containsWord(realName, keyword):
                        POIcount += 1
                        if printTrue == True:
                            print(realName)
                        locationData['POI'] = POI
                        
            #If printing option is on, print out the number of locations caught by the filter
            if printTrue == True and POIcount != 0:
                print("Number of locations: " + str(POIcount))
    return locationArray

In [17]:
#Read in the filter commands for each POI
filterCommands = eval(open('filterCommands.txt', 'r').read())

#Reset all of the location POIs to -1.
#By doing this, you can do iterative testing with the PRINT command inside of filterCommands.txt
count = 0
for locationData in locationArray:
    locationData['POI'] = -1
    
#Reassign locationArray with the new POIs
locationArray = fillInPOIs(filterCommands, locationArray)


lane motor museum
bmw zentrum
old sturbridge village
johnson county central library
sooke harbour house
discovery center of springfield
parking lot b
the nelson-atkins museum of art
laura secord homestead
carnegie science center
big green island - cortes island bc
steinbeck - city of salinas
kaatza station museum
discovery place
dawson creek art gallery
britannia mine museum
west coast railway heritage park
oakland museum of california garage
east hampton village center
corning glass museum parking lot
perot museum
green venture (ecohouse)
village green
natural science & history museum
liberty science center parking
rockhorse park
kennedy space center visitors complex
ontario science centre
ocean beaches glassblowing & gallery
las vegas springs preserve
bc forest discovery centre
indianapolis children's museum garage
pacific science center garage
castle dome mine museum
museum of innovation and science
soldiers and sailors memorial hall & museum
mercedes-benz usi visitor center
museum


In [16]:
#Computer number of locations that did and did not get POIs assigned
printRemainder = False
countMissing = 0
countFound = 0
for locationData in locationArray:
    if locationData['POI'] == -1:
        countMissing += 1
        if printRemainder == True:
            print(locationData['real']['name'].lower())
    else:
        countFound += 1
            
print("Assigned POIs to " + str(countFound) + " locations")
print("Could not assign POIs to " + str(countMissing) + " locations")

Assigned POIs to 16040 locations
Could not assign POIs to 4293 locations


In [8]:
#Put results into a dataframe

df = pd.DataFrame(np.nan, index=range(len(locationArray)), columns=['locationId'])
df['bestPOI'] = np.nan

rowNum = 0
for locationData in locationArray:
    df.loc[rowNum, 'locationId'] = int(locationData['real']['id'])
    if locationData['POI'] != -1:
        df.loc[rowNum, 'bestPOI'] = locationData['POI']
        
    rowNum += 1
    if rowNum % 1000 == 0:
        print(rowNum)

print("Made Dataframe with location ID and best determined POI")


#Change to True if you want to save results for inspection
if False:
    fileName = "newPOIData.csv"
    df.to_csv(fileName, encoding='utf-8', index=False)
    print("Written to csv: " + fileName)
else:
    print("Did not save file")

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
Made Dataframe with location ID and best determined POI
Did not save file
