In [1]:
import re
import time
import numpy as np
import pandas as pd
import googlemaps as gmaps

gmaps = gmaps.Client(key='AIzaSyAiiG2SnCVLWlg0ZA6GKXS-beHPIidYVeM')

In [2]:
%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

In [3]:
df = pd.read_csv('data.csv')
print(df.shape)
df.head(5)

(2406, 14)


Unnamed: 0,price,title,longitude,latitude,floor,furnishing,tenants preferred,bathroom,facing,availability,car parking,balcony,owner resides,overlooking
0,"₹ 19,000",3 BHK Apartment for rent in Vijay Shanthi Apar...,12.760201,80.207,,,,,,,,,,
1,"₹ 8,500",1 BHK House for rent in Tambaram Sanatorium,13.044208,80.200531,Ground out of 1 Floor,Unfurnished,Bachelors,1.0,South,Immediately,1 Open,,,
2,"₹ 35,000",3 BHK Builder Floor for rent in Pushpa Nagar-N...,,,2 out of 2 Floors,Semi-Furnished,Family,3.0,East,Immediately,5 Covered,1.0,Same Premise,
3,"₹ 15,000",2 BHK House for rent in Velachery 950 sqft,12.979078,80.219185,1 out of 2 Floors,Unfurnished,Bachelors/Family,2.0,,Immediately,,1.0,,
4,"₹ 15,000",2 BHK Apartment for rent in Arun Excello Sanka...,,,,,,,,,,,,


In [4]:
def parse_address(x):
    x = re.split(' \d\d+', x)[0]
    x = re.split(' in', x)[-1]
    return x+', Hyderabad'

In [5]:
df['address'] = df['title'].apply(parse_address)

In [6]:
print(len(df), 'total rows')
unique_locs = list(set(df['address']))
print(len(unique_locs), 'unique locations')

2406 total rows
778 unique locations


# Geocoding

In [7]:
def get_gmap_geos(locations, max_errors=10):
    geocoded = dict()
    start_time = time.perf_counter()
    errors = 0
    for iteration, location in enumerate(locations):
        try:
            geocoded[location] = gmaps.geocode(location)
            time.sleep(.05) # <-- throttle
        except:
            time.sleep(2)
            try:
                geocoded[location] = gmaps.geocode(location)
            except:
                time.sleep(2)
                try:
                    geocoded[location] = gmaps.geocode(location)
                except:
                    geocoded[location] = 'error'
                    errors+=1
        if errors>max_errors:
            print('stopped at', iteration, '(max errors reached)')
            break
            return geocoded
        if iteration>0 and iteration%200==0:
            now = time.strftime('%H:%M:%S', time.localtime(time.time()))
            print(f'{now} | {iteration} locations geocoded ({round(iteration/len(unique_locs)*100, 2)}%) | {errors} total errors')
    print(f'job completed in {time.perf_counter() - start_time}s\n{errors} requests timed out')
    print(len([v for v in geocoded.values() if v=='error']), 'locations not found')
    return geocoded

In [8]:
unique_geocodes = get_gmap_geos(unique_locs)

22:11:35 | 200 locations geocoded (25.71%) | 0 total errors
22:13:42 | 400 locations geocoded (51.41%) | 0 total errors
22:15:40 | 600 locations geocoded (77.12%) | 0 total errors
job completed in 475.464760686s
0 requests timed out
0 locations not found


### Map back onto df

In [9]:
df['gmaps_info'] = df['address'].apply(lambda x: unique_geocodes[x])

In [10]:
def extract_coords(geo):
    try:
        geo = geo[-1] # drop generalized extra results
        return geo['geometry']['location']['lat'], geo['geometry']['location']['lng']
    except:
        return np.nan, np.nan

In [11]:
df['coord'] = df['gmaps_info'].apply(extract_coords)
df['latitude'].fillna(df['coord'].apply(lambda x: x[0]), inplace=True)
df['longitude'].fillna(df['coord'].apply(lambda x: x[1]), inplace=True)
df.head()

Unnamed: 0,price,title,longitude,latitude,floor,furnishing,tenants preferred,bathroom,facing,availability,car parking,balcony,owner resides,overlooking,address,gmaps_info,coord
0,"₹ 19,000",3 BHK Apartment for rent in Vijay Shanthi Apar...,12.760201,80.207,,,,,,,,,,,"Vijay Shanthi Apartment, Kelambakkam, Old Mah...",[{'address_components': [{'long_name': 'Chenna...,"(12.7606528, 80.2079447)"
1,"₹ 8,500",1 BHK House for rent in Tambaram Sanatorium,13.044208,80.200531,Ground out of 1 Floor,Unfurnished,Bachelors,1.0,South,Immediately,1 Open,,,,"Tambaram Sanatorium, Hyderabad",[{'address_components': [{'long_name': 'Tambar...,"(17.385044, 78.486671)"
2,"₹ 35,000",3 BHK Builder Floor for rent in Pushpa Nagar-N...,78.486671,17.385044,2 out of 2 Floors,Semi-Furnished,Family,3.0,East,Immediately,5 Covered,1.0,Same Premise,,"Pushpa Nagar-Nungambakkam, Hyderabad",[{'address_components': [{'long_name': 'Pushpa...,"(17.385044, 78.486671)"
3,"₹ 15,000",2 BHK House for rent in Velachery 950 sqft,12.979078,80.219185,1 out of 2 Floors,Unfurnished,Bachelors/Family,2.0,,Immediately,,1.0,,,"Velachery, Hyderabad",[{'address_components': [{'long_name': 'Madipa...,"(12.9771281, 80.22274589999999)"
4,"₹ 15,000",2 BHK Apartment for rent in Arun Excello Sanka...,80.162565,12.817676,,,,,,,,,,,"Arun Excello Sankara, Mambakkam, Hyderabad",[{'address_components': [{'long_name': 'Kelamb...,"(12.8176758, 80.16256489999999)"


### Export

In [12]:
df.to_csv('geocoded.csv', index=False)