# Append LGA Codes

- Takes input csv/excel containing dirty local government area names
- Appends cleaned lga name, lga code and lga name to copy of data
- Data can be exported as csv or excel

## Import modules

In [1]:
import datetime as dt
import pandas as pd
import numpy as np

## Define Functions

### Dynamically imports data as csv or excel

In [2]:
def import_csv_or_excel(filepath="",
                        sheet_name="Sheet1",
                        data_delimeter=",",
                        lga_col_name=""):

    if ".csv" in filepath:
        data = pd.read_csv(filepath, low_memory=False, error_bad_lines=False, delimiter=data_delimeter)

    elif ".xlsx" in filepath:
        # excel_data = pd.ExcelFile(filepath)
        # data = pd.concat([excel_data.parse(sheet_name)], axis=1)
        data = pd.read_excel(filepath, sheet_name = sheet_name)

    else:
        print("File neither .csv or .xlsx")

    data = data.dropna(how="all", axis=1)

    return data


### Clean data

**Operations**
- Uppercase.
- Strip.
- Replace anything which is not A-Z with spaces.
- Replace all double spaces with single spaces.

In [3]:
def clean_data(data, dirty_lga_column_name):
    
    acceptable_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    
    # Start iterating through data
    
    for index in range(data.shape[0]):
        
        dirty_lga = data.loc[index, dirty_lga_column_name]
        dirty_lga_upper = str(dirty_lga).upper()
        string = dirty_lga_upper
        
        for string_index in range(len(string)):
            if string[string_index] not in acceptable_chars:
                string = string.replace(string[string_index], " ")
        
        dirty_lga_upper = string
        
        # Replace all double spaces with single spaces
        
        while "  " in dirty_lga_upper:
            dirty_lga_upper = dirty_lga_upper.replace("  ", " ")

        # Strip all of the trailing and leading white space
        dirty_lga_upper = dirty_lga_upper.strip()
        
        data.loc[index, "cleaned_lga"] = dirty_lga_upper
        
    return data

### Append codes and names

In [4]:
def append_codes_and_names(data):

    lga_dict = {20110: ('ALPINE','Alpine (S)')
    ,20260: ('ARARAT', 'Ararat (RC)')
    ,20570: ('BALLARAT', 'Ballarat (C)')
    ,20660: ('BANYULE', 'Banyule (C)')
    ,20740: ('BASS COAST','Bass Coast (S)')
    ,20830: ('BAW BAW', 'Baw Baw (S)')
    ,20910: ('BAYSIDE', 'Bayside (C)')
    ,21010: ('BENALLA', 'Benalla (RC)')
    ,21110: ('BOROONDARA', 'Boroondara (C)')
    ,21180: ('BRIMBANK', 'Brimbank (C)')
    ,21270: ('BULOKE', 'Buloke (S)')
    ,21370: ('CAMPASPE', 'Campaspe (S)')
    ,21450: ('CARDINIA', 'Cardinia (S)')
    ,21610: ('CASEY', 'Casey (C)')
    ,21670: ('CENTRAL GOLDFIELDS', 'Central Goldfields (S)')
    ,21750: ('COLAC OTWAY', 'Colac-Otway (S)')
    ,21830: ('CORANGAMITE', 'Corangamite (S)')
    ,21890: ('DAREBIN', 'Darebin (C)')
    ,22110: ('EAST GIPPSLAND', 'East Gippsland (S)')
    ,22170: ('FRANKSTON', 'Frankston (C)')
    ,22250: ('GANNAWARRA', 'Gannawarra (S)')
    ,22310: ('GLEN EIRA', 'Glen Eira (C)')
    ,22410: ('GLENELG', 'Glenelg (S)')
    ,22490: ('GOLDEN PLAINS', 'Golden Plains (S)')
    ,22620: ('GREATER BENDIGO', 'Greater Bendigo (C)')
    ,22670: ('GREATER DANDENONG', 'Greater Dandenong (C)')
    ,22750: ('GREATER GEELONG', 'Greater Geelong (C)')
    ,22830: ('GREATER SHEPPARTON', 'Greater Shepparton (C)')
    ,22910: ('HEPBURN', 'Hepburn (S)')
    ,22980: ('HINDMARSH', 'Hindmarsh (S)')
    ,23110: ('HOBSONS BAY', 'Hobsons Bay (C)')
    ,23190: ('HORSHAM', 'Horsham (RC)')
    ,23270: ('HUME', 'Hume (C)')
    ,23350: ('INDIGO', 'Indigo (S)')
    ,23430: ('KINGSTON', 'Kingston (C)(Vic.)')
    ,23670: ('KNOX', 'Knox (C)')
    ,23810: ('LATROBE', 'Latrobe (C)(Vic.)')
    ,23940: ('LODDON', 'Loddon (S)')
    ,24130: ('MACEDON RANGES', 'Macedon Ranges (S)')
    ,24210: ('MANNINGHAM', 'Manningham (C')
    ,24250: ('MANSFIELD', 'Mansfield (S)')
    ,24330: ('MARIBYRNONG', 'Maribyrnong (C)')
    ,24410: ('MAROONDAH', 'Maroondah (C)')
    ,24600: ('MELBOURNE', 'Melbourne (C)')
    ,24650: ('MELTON', 'Melton (C)')
    ,24780: ('MILDURA', 'Mildura (RC)')
    ,24850: ('MITCHELL', 'Mitchell (S)')
    ,24900: ('MOIRA', 'Moira (S)')
    ,24970: ('MONASH', 'Monash (C)')
    ,25060: ('MOONEE VALLEY', 'Moonee Valley (C)')
    ,25150: ('MOORABOOL', 'Moorabool (S)')
    ,25250: ('MORELAND', 'Moreland (C)')
    ,25340: ('MORNINGTON PENINSULA', 'Mornington Peninsula (S)')
    ,25430: ('MOUNT ALEXANDER', 'Mount Alexander (S)')
    ,25490: ('MOYNE', 'Moyne (S)')
    ,25620: ('MURRINDINDI', 'Murrindindi (S)')
    ,25710: ('NILLUMBIK', 'Nillumbik (S)')
    ,25810: ('NORTHERN GRAMPIANS', 'Northern Grampians (S)')
    ,25900: ('PORT PHILLIP', 'Port Phillip (C)')
    ,25990: ('PYRENEES', 'Pyrenees (S)')
    ,26080: ('QUEENSCLIFFE', 'Queenscliffe (B)')
    ,26170: ('SOUTH GIPPSLAND', 'South Gippsland (S)')
    ,26260: ('SOUTHERN GRAMPIANS', 'Southern Grampians (S)')
    ,26350: ('STONNINGTON', 'Stonnington (C)')
    ,26430: ('STRATHBOGIE', 'Strathbogie (S)')
    ,26490: ('SURF COAST', 'Surf Coast (S)')
    ,26610: ('SWAN HILL', 'Swan Hill (RC)')
    ,26670: ('TOWONG', 'Towong (S)')
    ,29399: ('UNINCORPORATED VIC', 'Unincorporated Vic')
    ,26700: ('WANGARATTA', 'Wangaratta (RC)')
    ,26730: ('WARRNAMBOOL', 'Warrnambool (C)')
    ,26810: ('WELLINGTON', 'Wellington (S)')
    ,26890: ('WEST WIMMERA', 'West Wimmera (S)')
    ,26980: ('WHITEHORSE', 'Whitehorse (C)')
    ,27070: ('WHITTLESEA', 'Whittlesea (C)')
    ,27170: ('WODONGA', 'Wodonga (C)')
    ,27260: ('WYNDHAM', 'Wyndham (C)')
    ,27450: ('YARRA RANGES', 'Yarra Ranges (S)')
    ,27350: ('YARRA', 'Yarra (C)')
    ,27630: ('YARRIAMBIACK', 'Yarriambiack (S)')
    ,29499: ('NO USUAL ADDRESS', 'No usual address (Vic.)')
    ,29799: ('MIGRATORY OFFSHORE SHIPPING', 'Migratory - Offshore - Shipping (Vic.)')}

    
    for index in range(data.shape[0]):
        cleaned_lga_name = data.loc[index, "cleaned_lga"]
        for key in lga_dict.keys():
            reference_code = key
            reference_name = lga_dict[key][0]
            lga_abs_name = lga_dict[key][1]
            
            if reference_name in cleaned_lga_name:
                
                data.loc[index, "script_lga_code"] = reference_code
                data.loc[index, "script_lga_name"] = lga_abs_name
                
                break
                
    return data

## Process and export

In [6]:
data = import_csv_or_excel(filepath = "/users/danielcorcoran/desktop/clean.xlsx",sheet_name="Sheet1")
data2 = clean_data(data, "Lga Name16")
data3 = append_codes_and_names(data2)
data3

Unnamed: 0,Lga Code16,Lga Name16,Ste Code16,Ste Name16,Areasqkm16,Geometry,cleaned_lga,script_lga_code,script_lga_name
0,20110,Alpine (S),2,Victoria,4788.16,POLYGON,ALPINE S,20110.0,Alpine (S)
1,20260,Ararat (RC),2,Victoria,4211.12,POLYGON,ARARAT RC,20260.0,Ararat (RC)
2,20570,Ballarat (C),2,Victoria,739.03,POLYGON,BALLARAT C,20570.0,Ballarat (C)
3,20660,Banyule (C),2,Victoria,62.54,POLYGON,BANYULE C,20660.0,Banyule (C)
4,20740,Bass Coast (S),2,Victoria,865.81,MULTIPOLYGON,BASS COAST S,20740.0,Bass Coast (S)
5,20830,Baw Baw (S),2,Victoria,4027.63,POLYGON,BAW BAW S,20830.0,Baw Baw (S)
6,20910,Bayside (C),2,Victoria,37.21,POLYGON,BAYSIDE C,20910.0,Bayside (C)
7,21010,Benalla (RC),2,Victoria,2352.64,POLYGON,BENALLA RC,21010.0,Benalla (RC)
8,21110,Boroondara (C),2,Victoria,60.18,POLYGON,BOROONDARA C,21110.0,Boroondara (C)
9,21180,Brimbank (C),2,Victoria,123.40,POLYGON,BRIMBANK C,21180.0,Brimbank (C)


In [None]:
data3.to_excel("/users/danielcorcoran/desktop/clean.xlsx", index_label = "row_index", index = True)