In [10]:
import pandas as pd
import numpy as np
import geocoder
import warnings
warnings.filterwarnings("ignore")

### Geocode training data
#### If Street != From and To, it means the row represent a portion of the street, we take the mid point of lat & lng
#### If Street == From or To, it means the row represent a perpendicular portion of the street, we take the intersection of the two streets

In [5]:
# get unique rows of street and From/To
train = pd.read_csv('train-parking.csv')
corrds = train[["Street","From","To"]]
uniquecords = corrds.drop_duplicates()

In [6]:
uniquecords.head()

Unnamed: 0,Street,From,To
0,Mission Street,25th Street,26th Street
1,Polk Street,Ellis Street,Olive Street
2,Van Ness Avenue,Geary Boulevard,Myrtle Street
3,Van Ness Avenue,Bush Street,Fern Street
4,Van Ness Avenue,Daniel Burnham Court,Post Street


In [7]:
# find the intersection coordinates of the target street and the To/From Street
def findlatlng_from(row):
    if (row["Street"] != row["From"]):
        add = "%s & %s, San Francisco, CA" %(row["Street"],row["From"])
        cord = geocoder.google(add).latlng
        return cord
    else:
        return None

def findlatlng_to(row):
    if (row["Street"] != row["To"]):
        add = "%s & %s, San Francisco, CA" %(row["Street"],row["To"])
        cord = geocoder.google(add).latlng
        return cord
    else:
        return None

In [8]:
# Google API minibatch - we do this since Google only allows a limited number of decoding each time
def fillnull(row):
    if (row['latlngfrom'] == None) and (row["Street"] != row["From"]):
        add = "%s & %s, San Francisco, CA" %(row["Street"],row["From"])
        cord = geocoder.google(add).latlng
        return cord
    else:
        return row['latlngfrom']
    
def fillnull_to(row):
    if (row['latlngto'] == None) and (row["Street"] != row["To"]):
        add = "%s & %s, San Francisco, CA" %(row["Street"],row["To"])
        cord = geocoder.google(add).latlng
        return cord
    else:
        return row['latlngto']

In [30]:
# Create a table of coordinates
uniquecords["latlngfrom"] = uniquecords.apply(findlatlng_from,axis =1)
uniquecords["latlngto"] = uniquecords.apply(findlatlng_to,axis =1)

In [35]:
# Fill nulls
uniquecords["latlngfrom"] = uniquecords.apply(fillnull,axis =1)
uniquecords["latlngto"] = uniquecords.apply(fillnull_to,axis =1)

In [38]:
uniquecords.head()

Unnamed: 0,Street,From,To,latlngfrom,latlngto
0,Mission Street,25th Street,26th Street,"[37.7506482, -122.4183159]","[37.749044, -122.4181659]"
1,Polk Street,Ellis Street,Olive Street,"[37.7840397, -122.4193807]","[37.784507, -122.4194781]"
2,Van Ness Avenue,Geary Boulevard,Myrtle Street,"[37.7856601, -122.4214764]","[37.785221, -122.4213855]"
3,Van Ness Avenue,Bush Street,Fern Street,"[37.7884986, -122.4218922]","[37.7880441, -122.4218018]"
4,Van Ness Avenue,Daniel Burnham Court,Post Street,"[37.7870844, -122.4217571]","[37.7866423, -122.4215171]"


In [45]:
# Get the final coordinates through averaging
def getfinalgeo(row):
    if row['latlngfrom']==None:
        return row['latlngto']
    elif row['latlngto']==None:
        return row['latlngfrom']
    else:
        return list(np.average(np.array([row['latlngto'],row['latlngfrom']]),axis =0))

In [46]:
uniquecords['latlng'] = uniquecords.apply(getfinalgeo,axis =1)
uniquecords['lat'] = uniquecords['latlng'].apply(lambda x:x[0])
uniquecords['lng'] = uniquecords['latlng'].apply(lambda x:x[1])

In [48]:
uniquecords.head()

Unnamed: 0,Street,From,To,latlngfrom,latlngto,latlng,lat,lng
0,Mission Street,25th Street,26th Street,"[37.7506482, -122.4183159]","[37.749044, -122.4181659]","[37.7498461, -122.4182409]",37.749846,-122.418241
1,Polk Street,Ellis Street,Olive Street,"[37.7840397, -122.4193807]","[37.784507, -122.4194781]","[37.78427335, -122.4194294]",37.784273,-122.419429
2,Van Ness Avenue,Geary Boulevard,Myrtle Street,"[37.7856601, -122.4214764]","[37.785221, -122.4213855]","[37.78544055, -122.42143095]",37.785441,-122.421431
3,Van Ness Avenue,Bush Street,Fern Street,"[37.7884986, -122.4218922]","[37.7880441, -122.4218018]","[37.78827135, -122.421847]",37.788271,-122.421847
4,Van Ness Avenue,Daniel Burnham Court,Post Street,"[37.7870844, -122.4217571]","[37.7866423, -122.4215171]","[37.78686335, -122.4216371]",37.786863,-122.421637


In [54]:
uniquecords.to_csv(path_or_buf='/Users/yandixin_929/Desktop/MSAN/Advanced_Machine_Learning/project/long_lat_table_217.csv',index=False)