In [10]:
import numpy as np
import pandas as pd
import os
import googlemaps
from google.cloud import bigquery
import requests
import json
import string

In [15]:
def df_read_files(**kwargs):
    file = input("Enter file name: ")
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file
    
    if '.parquet' in file:
        data = pd.read_parquet(file, engine = 'pyarrow')
    elif '.csv' in file:
        data = pd.read_csv(file)
    elif '.json' in file:
        project_id = input("enter project ID: ")
        bigquery_client = bigquery.Client(project = project_id)
        table_id = input("enter table ID from Big Query: ")
        QUERY = "SELECT * FROM " +  "`" + table_id + "`"
        job = bigquery_client.query(QUERY)
        data = job.to_dataframe()
        
    return data

df = df_read_files()

Enter file name: bcx-insights-6dfb9fabfb5b.json
enter project ID: bcx-insights
enter table ID from Big Query: bcx_networkhealth.addresses_20191029


In [16]:
df.head()

Unnamed: 0,ENTITYID,STREETBOX,SUBURBCITY,POSTALCODE,CAREOF,BUILDING,FLOOR,ROOM,LATITUDE,LONGITUDE,PROVINCE
0,183627,100 Brakfontein Road,"Loulardia, Centurion",,,Shopite corporate park,,,,,Gauteng
1,461123,5 Simmonds Street,"Marshalltown, Johannesburg",,,Standard Bank Centre,,,,,Gauteng
2,181822,C/o Shepstone Street and Collis Road,Ivongo,,,1861 Rear Building,,,,,KwaZulu-Natal
3,283018,5 Bauhinia Str,"Highveld Technopark, Centurion",,,22 Cambridge Office Park,1st Floor,,,,
4,297531,8 Grix Road,Pietermaritzburg,,,"Subdivision 9 (of 4) of Lot 122,",,,,,


In [17]:
df.tail()

Unnamed: 0,ENTITYID,STREETBOX,SUBURBCITY,POSTALCODE,CAREOF,BUILDING,FLOOR,ROOM,LATITUDE,LONGITUDE,PROVINCE
26529,33959,Marabastad Informal Trading Market,11th Street in Junction Street mogul Street,Marabastad :Pretoria,,,,,,,
26530,336483,Erf 3080,Cnr Heidelberg & Airport Roads,"Dalpark Ext 5, Brakpan",,,,Shop 111,,,
26531,59858,27 Murrayfield Boulevard,Pretoria - Silverlakes - Homeowners Association,Management Centre 0081,,,,,,,
26532,191114,Cnr R51 and Brizial,"Daveyton, Jhb",Boyas View North UJ Campus Daveyton,,,,no 90,,,
26533,437784,PO BOX 7655,Pretoria,speedpot@icon.co.za(Send invoices via email),,,,,,,Gauteng


In [18]:
def load_config(file):
    with open(file) as conf:
        config = json.load(conf)
    return config

In [19]:
def configure_table(df):
    '''This function uses configuration json files and tunes/formats the input DataFrame to a desired Format'''
    
    
    config = load_config(file) #save config file in variable
    df = df.copy() 
    df.replace(config['null'], '', inplace=True) 
    
    input_cols = df.columns.to_list()
    conf_vals = list(config.values())
    
    #drop unwanted columns
    for i in input_cols:
        if i not in conf_vals:
            df.drop(i, axis=1, inplace=True)
    
    #Get key value pairs in dictionary in a tuple format
    pairs = list(config.items())
    
    #For loop to either rename columns, create new and fill with empty string, or create address column
    for i in pairs:
        if i[1] in input_cols:
            df.rename(columns={i[1]:i[0]}, inplace=True)
        elif i[1] == '':
            df[i[0]] = i[1]
        elif i[0] == 'Address':
            address = []
            keys = list(config.keys())
            input_addr = config['Address']
            for x in input_addr:
                for y in conf_vals:
                    if x == y:
                        address.append(keys[conf_vals.index(y)])
              
            df[i[0]] = [str(a).join([' ' + str(b) + ', ' + str(c)]) for a,b,c in zip(df[address[0]],df[address[1]],df[address[2]])]
            
    # Make sure all nans are changed to empty strings        
    df.replace(np.nan, '', inplace=True)
    
    # Change table order
    output_cols = list(config.keys())[:-1]
    data = df[output_cols]
    
    
    return data

In [20]:
file = input("Input json file to configure Dataframe ")

Input json file to configure Dataframe config2.json


In [21]:
#cf = load_config(file)

In [22]:
dfc = configure_table(df)
dfc.head(10)

Unnamed: 0,ID,Country,Province,Postal Code,City,Suburb,Street,Number,Building,Floor,Room,Latitude,Longitude,Address
0,183627,,Gauteng,,,"Loulardia, Centurion",100 Brakfontein Road,,Shopite corporate park,,,,,"100 Brakfontein Road, Loulardia, Centurion"
1,461123,,Gauteng,,,"Marshalltown, Johannesburg",5 Simmonds Street,,Standard Bank Centre,,,,,"5 Simmonds Street, Marshalltown, Johannesburg"
2,181822,,KwaZulu-Natal,,,Ivongo,C/o Shepstone Street and Collis Road,,1861 Rear Building,,,,,"C/o Shepstone Street and Collis Road, Ivongo"
3,283018,,,,,"Highveld Technopark, Centurion",5 Bauhinia Str,,22 Cambridge Office Park,1st Floor,,,,"5 Bauhinia Str, Highveld Technopark, Centurion"
4,297531,,,,,Pietermaritzburg,8 Grix Road,,"Subdivision 9 (of 4) of Lot 122,",,,,,"8 Grix Road, Pietermaritzburg"
5,67977,,,,,"Laser Park, Honeydew",Zeiss Road,,Kimbuilt Industrial Park,,Block B Unit 10,,,"Zeiss Road, Laser Park, Honeydew"
6,372671,,,,,"Rosebank, Gauteng",51 Bath Avenue,,Rosebank Mall,,Shop 327/328,,,"51 Bath Avenue, Rosebank, Gauteng"
7,445339,,,,,"Centurion, Pretoria","5 BauhiniaRoad, Highveld Technopark",,Cambridge Office Park - Building 17,1st floor,,,,"5 BauhiniaRoad, Highveld Technopark, Centurio..."
8,458160,,Gauteng,,,Highveld Technopark Centurion,"5 Bauhinia Road,",,Building 22 Cambridge Office Park,,,,,"5 Bauhinia Road,, Highveld Technopark Centurion"
9,234524,,,,,"Foreshore, Cpt",14 Christiaan Barnard Street,,Atlantic Centre,,,,,"14 Christiaan Barnard Street, Foreshore, Cpt"


In [23]:
dfc.tail(10)

Unnamed: 0,ID,Country,Province,Postal Code,City,Suburb,Street,Number,Building,Floor,Room,Latitude,Longitude,Address
26524,446397,,Eastern Cape,Port Elizabeth,,Lorraine,193 Circular Drive,,,,,,,"193 Circular Drive, Lorraine"
26525,75876,,,port elizabeth,,walmer,20 sixth avenue,,,,,,,"20 sixth avenue, walmer"
26526,278684,,,Pan Africa Mall,,Alexandra,Cnr 2nd avenue and Watt Street,,,,Shop L/7,,,"Cnr 2nd avenue and Watt Street, Alexandra"
26527,52642,,,Cape Town - 8001,,Table Mountain,Table Mountain On top,,,,,,,"Table Mountain On top, Table Mountain"
26528,426401,,,Postnetsuit 4529,,Rustenburg,P.O.BOX 82323,,,,,,,"P.O.BOX 82323, Rustenburg"
26529,33959,,,Marabastad :Pretoria,,11th Street in Junction Street mogul Street,Marabastad Informal Trading Market,,,,,,,"Marabastad Informal Trading Market, 11th Stre..."
26530,336483,,,"Dalpark Ext 5, Brakpan",,Cnr Heidelberg & Airport Roads,Erf 3080,,,,Shop 111,,,"Erf 3080, Cnr Heidelberg & Airport Roads"
26531,59858,,,Management Centre 0081,,Pretoria - Silverlakes - Homeowners Association,27 Murrayfield Boulevard,,,,,,,"27 Murrayfield Boulevard, Pretoria - Silverla..."
26532,191114,,,Boyas View North UJ Campus Daveyton,,"Daveyton, Jhb",Cnr R51 and Brizial,,,,no 90,,,"Cnr R51 and Brizial, Daveyton, Jhb"
26533,437784,,Gauteng,speedpot@icon.co.za(Send invoices via email),,Pretoria,PO BOX 7655,,,,,,,"PO BOX 7655, Pretoria"
