# Introduction

### Imports

In [2]:
import pandas as pd
import numpy as np
import os
from google.cloud import bigquery
import geopy.distance
import requests
import json
import time

In [1]:
# !pip install pandas-gbq

In [6]:
df = pd.read_parquet('bcxlocation_20190701_00.parquet', engine='pyarrow')

# Input

# Format

In [7]:
def preprocessing(df):
    df = df.copy()[['Lat','Lon','G City','G Suburb','G Street Name','G Str Num']]
    df.replace('(blank)', np.nan, inplace=True)
    return df

In [8]:
df = preprocessing(df)

In [9]:
def format_table(df):
    new_cols = ['Country','Province','Postal_Code']
    for i in new_cols:
        df[i] = np.nan
    df['City'] = df['G City']
    df['Suburb'] = df['G Suburb']
    df['Street'] = df['G Street Name']
    df['Number'] = df['G Str Num']
    df['Latitude'] = df['Lat']
    df['Longitude'] = df['Lon']
    df = df[['Country','Province','City','Suburb','Street','Number','Postal_Code','Latitude','Longitude']]
    return df

In [10]:
df = format_table(df)

In [11]:
df = df.iloc[10020:10025].copy()

In [11]:
# !pip install geolocation-python

In [12]:
# !pip install responses

# Get Place Details

In [17]:
class GooglePlaces(object):
    def __init__(self, apiKey):
        super(GooglePlaces, self).__init__()
        self.apiKey = apiKey
 
    def get_place_details_using_address(self,address):
        endpoint_url = "https://maps.googleapis.com/maps/api/geocode/json"
        
        params = {
            'bounds': 'South Africa',
            'address': address,
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        res =  json.loads(res.content)
        results = res['results'][0]
        return results
    
    def get_place_details_using_coordinates(self,coordinates):
        endpoint_url = "https://maps.googleapis.com/maps/api/geocode/json"

        params = {
            
            'bounds':'South Africa',
            'latlng': coordinates,
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        res =  json.loads(res.content)
        results = res['results'][0]
        return results

In [18]:
gp = GooglePlaces('AIzaSyDE8PAqFBYE4mHrLm7HgPRcoYnZbVjrVQo')


# Extract Values From Results

In [19]:
def get_values(d):
    
    """
    This function takes the results from the API request, searches it then extracts certain values and stores in a list.
    
    """
   
    # new dictionary
    values = {}
        
    # length of the address components dictionary in the API results
    count = len(d['address_components']) - 1

    
    #loop through the items in address components to create a new dictionary
    while count >= 0:
        
        values.update({d['address_components'][count]['types'][0]: d['address_components'][count]['long_name'].upper()})
        count = count -1

    results = []
    
    #get country
    results.append(values.get('country',np.nan))
    
    #get province
    results.append(values.get('administrative_area_level_1',np.nan))
    
    #get city
    results.append(values.get('administrative_area_level_2',np.nan))
    
    #get suburb
    results.append(values.get('political',np.nan))
    
    #get street
    results.append(values.get('route',np.nan))
    
    #get number
    results.append(values.get('street_number',np.nan))
    
    #get code
    results.append(values.get('postal_code',np.nan))

    #get lat
    results.append(d['geometry']['location']['lat'])
    
    #get lng
    results.append(d['geometry']['location']['lng'])
    


    return results

# Fill Missing Values

In [20]:
def fill_missing(dataframe,instance):

    index = 0
    cols = dataframe.columns.tolist()

    while index < len(dataframe):

        row = dataframe.iloc[index]

        address = row.Number +" "+ row.Street +","+ row.Suburb
    #     coodinates = row.Latitude +","+ row.Longitude

        #if address not null, use address
        if pd.notnull(row['Number']) and pd.notnull(row['Street']) and pd.notnull(row['Suburb']):
            address = row.Number +" "+ row.Street +","+ row.Suburb
            #call the get_places function
            results = instance.get_place_details_using_address(address)

        #if address null and coordinates not null, use coordinates
        elif pd.notnull(row['Latitude']) and pd.notnull(row['Longitude']):
            coordinates = row.Latitude +","+ row.Longitude   
            #call the get_places function
            results = instance.get_place_details_using_coordinates(coordinates)

        #if address null and coordinates null, delete row
        else:   
            dataframe = dataframe.drop(dataframe.iloc[index])
            index += 1
            break

        #get results of the geocode api request
        values = get_values(results)

        #create dataframe with API results
        frame = pd.DataFrame(columns = cols, data = [values])
        row_2 = frame.iloc[0]

        for col in cols:

            # check for nulls in the results of api request and change it to the value column
            if pd.isnull(row_2[col]):
                row_2[col] = row[col]

            # check for nulls and mismatch values in dataframe row and replace wiith results of api 
            if row[col] != row_2[col]:
                row[col] = row_2[col]

        #update row in dataframe
        dataframe.iloc[index] = row

        #increment index
        index += 1
    return dataframe
fill_missing(df,gp)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Country,Province,City,Suburb,Street,Number,Postal_Code,Latitude,Longitude
10020,SOUTH AFRICA,WESTERN CAPE,CAPE TOWN,STIKLAND INDUSTRIAL,CILMOR STREET,1,7530,-33.8997,18.6684
10021,SOUTH AFRICA,WESTERN CAPE,CAPE TOWN,STIKLAND INDUSTRIAL,GAMKA STREET,14,7530,-33.9016,18.6649
10022,SOUTH AFRICA,WESTERN CAPE,CAPE TOWN,STIKLAND INDUSTRIAL,GAMKA STREET,2,7530,-33.9002,18.6637
10023,SOUTH AFRICA,WESTERN CAPE,CAPE TOWN,STIKLAND INDUSTRIAL,LA BELLE ROAD,13560,7530,-33.9001,18.67
10024,SOUTH AFRICA,WESTERN CAPE,CAPE TOWN,STIKLAND INDUSTRIAL,PALMIET ROAD,12,7530,-33.9018,18.6683
