In [1]:
!which python

//anaconda/bin/python


In [68]:
###################################################################################################
# This script walks through a latitude-longitude "square" and pings at regular intervals, grabbing 
# the nearest address and checking if it's residential (ie. 'premise').  If so, it adds both the 
# address and (lat,long) coordinates into a SQL database.
###################################################################################################

from sqlalchemy import create_engine # to work with PostgreSQL database
from sqlalchemy_utils import database_exists, create_database # to work with PostgreSQL database
import psycopg2  # to work with PostgreSQL database
import pandas as pd
import re # to read in file with api key info and search for the right key

import time # to calculate how long it takes to run the main loop
from math import cos # to calculate lat,lng
import googlemaps # from https://github.com/googlemaps/google-maps-services-python, installed using "pip install -U googlemaps"

In [69]:
path='/Users/brianna/Dropbox/Insight/solar/'

In [4]:
# Get API keys from file so we can use the different APIs
api_file = open(path+'insight_project/api_keys/api_keys.txt','r')
api_text = api_file.read()
api_file.close()

# Open API for google maps
googlemaps_api=re.findall('googlemaps_api=\'(\S+)\'',api_text)
gmaps = googlemaps.Client(key=googlemaps_api[0])


In [5]:
#In Python: Define a database name (we're using a dataset on births, so I call it 
# birth_db), and your username for your computer (CHANGE IT BELOW). 

#dbname = 'locations'
#username = 'brianna'

In [6]:
# 'engine' is a connection to a database
# Here, we're using postgres, but sqlalchemy can connect to other things too.

# To start postgres, open the Elephant application in Applications!

#engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
#print engine.url

## create a database (if it doesn't exist)
#if not database_exists(engine.url):
#    create_database(engine.url)
#print(database_exists(engine.url))


In [79]:
# Ok, I'm going to cheat for now and populate a dataframe using pandas, then pass it to the postgresql database

# Make a dataframe with 100,000 rows that you can start to populate.
# The number (10,000) is just a start so we're not creating a new row in each iteration
nrows=120000
df = pd.DataFrame(columns=['GridLat','GridLng','AddressString','AddLat','AddLng','LocationType','PlaceID'], index=range(nrows))

In [80]:
# Decide what latitude and longitude you want to walk over.

# Palo Alto: Start in southwest corner of Palo Alto (551 Junipero Serra)
# SF: Start off the west coast, travel 0.1723 degrees east (lng) and 0.117 degrees north (lat)
x0=-122.5145  # longitude is x, move west to east
y0=37.6930    # latitude is y, move south to north
#y0=37.6955    # latitude is y, move south to north
xstop=-122.368 # instead of a for loop, just stop when you get to this longitude (East of SF)


# more positive longitude is going East (note that means less negative, since numbers are negative)
# 
#xn=37.465530  # lat difference is 0.052861)
#yn=-122.107286  # lng difference is -0.117031

# Latitude is basically constant across the globe, 69.172 miles for each degree 
# Longitude is cos(latitude)* 69.172 mi
# Latitude is ~0.65 radians in NorCal, so one degree longitude is about cos(.65)*69.172=55.0667 miles in this region
LatStep=0.001 # Stepping East by (69.172*.001) mile increments (??? feet)
LngStep=0.001 # Stepping North by (55.0667*.001) mile increments (??? feet)
print('Latitude step is '+str(round((LatStep*69.172)*5280,0))+' feet')
print('Longitude step is '+str(round((cos(.65)*LngStep*69.172)*5280))+' feet')

xsteps=100 # currently walks 100*0.0003=0.15 degrees longitude
ysteps=100 # currently walks 100*0.0005=0.117 degrees latitude

Latitude step is 365.0 feet
Longitude step is 291.0 feet


In [81]:
# Start a counter i to index which row you're populating in the dataframe
wholestart = time.time()
i=y=x=0

for y in range(ysteps):
    #Step through the latitude lines in a grid with width=LatStep.
    GridLat=y0+y*LatStep
    print('Starting at new latitude line '+str(GridLat))

    xloopstart = time.time()
    for x in range(xsteps):
        # Step through each longitude line in this latitude. If the longitude of the 
        # nearest house is further east than the point on the grid where you were going to 
        # start, skip over to the most eastern longitude.
        # (This will miss some houses, but I'm trying to employ the 20/80 "close enough" rule.)
        GridLng=x0+x*LngStep
        if AddLng > GridLng:
            GridLng = AddLng

        # First, find the address that's most closely associated with the lat,lon coordinates
        # on your grid.
        ReverseGeocodeResult = gmaps.reverse_geocode((GridLat, GridLng))
        AddressString = ReverseGeocodeResult[0]['formatted_address']

        # if address is not already in dataframe AND LocationType is a rooftop, 
        # get the new address coordinates and populate the dataframe
        if not any(df.AddressString == AddressString):
            
            # Now that you have the address, go back and find the exact lat,lng coordinates
            GeocodeResult = gmaps.geocode(AddressString)
            AddLat = GeocodeResult[0]['geometry']['location']['lat']
            AddLng = GeocodeResult[0]['geometry']['location']['lng']
            PlaceID = GeocodeResult[0]['place_id']

            # We only want rooftops (no parks or other structures), so only add to the 
            # database if the 'location_type' == 'ROOFTOP'
            LocationType=GeocodeResult[0]['geometry']['location_type']

            #print('i: '+str(i)+', GridLat: '+str(GridLat)+', GridLng: '+str(GridLng)+', AddressString: '+AddressString+
            #      ', AddLat: '+str(AddLat)+', AddLng: '+str(AddLng)+', LocationType: '+LocationType)

            try:
                if LocationType == 'ROOFTOP':
                    df.GridLat[i]=GridLat; df.GridLng[i]=GridLng
                    df.AddLat[i]=AddLat; df.AddLng[i]=AddLng
                    df.AddressString[i]=AddressString; df.LocationType[i]=LocationType
                    #df.PlaceID[i] = PlaceID
                    i+=1
                    if i%50 == 0:
                        print(i)
            except TypeError:
                pass;
    xloopend = time.time()
    print('Time to traverse lat '+str(GridLat)+' is '+str(xloopend - xloopstart))

wholeend = time.time()
print(wholeend - wholestart)

Starting at new latitude line 37.693
Time to traverse lat -122.4152 is 20.6214540005
Starting at new latitude line 37.694
Time to traverse lat -122.4152 is 19.6714789867
Starting at new latitude line 37.695


KeyboardInterrupt: 

In [None]:
df.head()

In [None]:
df.to_csv(path+'latlng/SF10000b.csv')

In [42]:
GridLat

37.695699999999995

In [43]:
GridLng

-122.465