In [10]:
import re
import time
import numpy as np
import pandas as pd
import googlemaps as gmaps

gmaps = gmaps.Client(key='AIzaSyAiiG2SnCVLWlg0ZA6GKXS-beHPIidYVeM')

In [11]:
%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

In [12]:
df = pd.read_csv('pre_cleaned.csv')
print(df.shape)
df.head(5)

(2346, 14)


Unnamed: 0,price,title,longitude,latitude,floor,furnishing,tenants preferred,bathroom,balcony,availability,facing,overlooking,car parking,owner resides
0,"₹ 10,000",1 BHK Builder Floor for rent in Saket,28.523548,77.208893,2 out of 4 Floors,Furnished,Bachelors/Family,2,1.0,Immediately,,,,
1,₹ 1.6 Lac,4 BHK Builder Floor for rent in Greater Kailas...,,,2 out of 3 Floors,Semi-Furnished,Bachelors/Family,4,2.0,Immediately,South,"Garden/Park, Main Road","1 Covered, 1 Open",
2,"₹ 7,000",1 BHK Builder Floor for rent in Chhattarpur,,,2 out of 3 Floors,Semi-Furnished,Bachelors/Family,1,,Immediately,East,Main Road,1 Open,
3,"₹ 20,500","1 BHK House for rent in Dayanand Colony, Lajpa...",28.562599,77.249603,Ground out of 3 Floors,Furnished,Bachelors/Family,1,1.0,Immediately,,,,
4,"₹ 13,000",1 BHK Apartment for rent in Rohini Sector 18,28.741163,77.134613,2 out of 4 Floors,Semi-Furnished,Bachelors/Family,1,1.0,Immediately,,,,


In [13]:
def parse_address(x):
    x = re.split(' \d\d+', x)[0]
    x = re.split(' in', x)[-1]
    return x+', New Delhi'

In [14]:
df['address'] = df['title'].apply(parse_address)

In [15]:
print(len(df), 'total rows')
unique_locs = list(set(df['address']))
print(len(unique_locs), 'unique locations')

2346 total rows
628 unique locations


# Geocoding

In [17]:
def get_gmap_geos(locations, max_errors=10):
    geocoded = dict()
    start_time = time.perf_counter()
    errors = 0
    for iteration, location in enumerate(locations):
        try:
            geocoded[location] = gmaps.geocode(location)
            time.sleep(.05) # <-- throttle
        except:
            time.sleep(2)
            try:
                geocoded[location] = gmaps.geocode(location)
            except:
                time.sleep(2)
                try:
                    geocoded[location] = gmaps.geocode(location)
                except:
                    geocoded[location] = 'error'
                    errors+=1
        if errors>max_errors:
            print('stopped at', iteration, '(max errors reached)')
            break
            return geocoded
        if iteration>0 and iteration%200==0:
            now = time.strftime('%H:%M:%S', time.localtime(time.time()))
            print(f'{now} | {iteration} locations geocoded ({round(iteration/len(unique_locs)*100, 2)}%) | {errors} total errors')
    print(f'job completed in {time.perf_counter() - start_time}s\n{errors} requests timed out')
    print(len([v for v in geocoded.values() if v=='error']), 'locations not found')
    return geocoded

In [18]:
unique_geocodes = get_gmap_geos(unique_locs)

15:26:47 | 200 locations geocoded (31.85%) | 0 total errors
15:28:46 | 400 locations geocoded (63.69%) | 0 total errors
15:30:43 | 600 locations geocoded (95.54%) | 0 total errors
job completed in 380.901121669s
0 requests timed out
0 locations not found


### Map back onto df

In [19]:
df['gmaps_info'] = df['address'].apply(lambda x: unique_geocodes[x])

In [20]:
def extract_coords(geo):
    try:
        geo = geo[-1] # drop generalized extra results
        return geo['geometry']['location']['lat'], geo['geometry']['location']['lng']
    except:
        return np.nan, np.nan

In [21]:
df['coord'] = df['gmaps_info'].apply(extract_coords)
df['latitude'].fillna(df['coord'].apply(lambda x: x[0]), inplace=True)
df['longitude'].fillna(df['coord'].apply(lambda x: x[1]), inplace=True)
df.head()

Unnamed: 0,price,title,longitude,latitude,floor,furnishing,tenants preferred,bathroom,balcony,availability,facing,overlooking,car parking,owner resides,address,gmaps_info,coord
0,"₹ 10,000",1 BHK Builder Floor for rent in Saket,28.523548,77.208893,2 out of 4 Floors,Furnished,Bachelors/Family,2,1.0,Immediately,,,,,"Saket, New Delhi",[{'address_components': [{'long_name': 'Saket'...,"(28.5220971, 77.2101534)"
1,₹ 1.6 Lac,4 BHK Builder Floor for rent in Greater Kailas...,77.239797,28.550216,2 out of 3 Floors,Semi-Furnished,Bachelors/Family,4,2.0,Immediately,South,"Garden/Park, Main Road","1 Covered, 1 Open",,"Greater Kailash 1, New Delhi",[{'address_components': [{'long_name': 'Greate...,"(28.5502164, 77.2397968)"
2,"₹ 7,000",1 BHK Builder Floor for rent in Chhattarpur,77.184825,28.495856,2 out of 3 Floors,Semi-Furnished,Bachelors/Family,1,,Immediately,East,Main Road,1 Open,,"Chhattarpur, New Delhi",[{'address_components': [{'long_name': 'Chhata...,"(28.4958562, 77.1848254)"
3,"₹ 20,500","1 BHK House for rent in Dayanand Colony, Lajpa...",28.562599,77.249603,Ground out of 3 Floors,Furnished,Bachelors/Family,1,1.0,Immediately,,,,,"Dayanand Colony, Lajpat Nagar, New Delhi",[{'address_components': [{'long_name': 'Dayana...,"(28.5625103, 77.2484181)"
4,"₹ 13,000",1 BHK Apartment for rent in Rohini Sector 18,28.741163,77.134613,2 out of 4 Floors,Semi-Furnished,Bachelors/Family,1,1.0,Immediately,,,,,"Rohini Sector, New Delhi",[{'address_components': [{'long_name': 'Halar ...,"(28.7018804, 77.1227164)"


### Export

In [22]:
df.to_csv('geocoded.csv', index=False)