# Append LGA Codes

- Takes input csv/excel containing dirty local government area names
- Appends cleaned lga name, lga code and lga name to copy of data
- Data can be exported as csv or excel

## Import modules

In [None]:
import datetime as dt
import pandas as pd
import numpy as np

## Define Functions

### Dynamically imports data as csv or excel

In [None]:
def import_csv_or_excel(filepath="",
                        sheet_name="Sheet1",
                        data_delimeter=",",
                        lga_col_name=""):

    if ".csv" in filepath:
        data = pd.read_csv(filepath, low_memory=False, error_bad_lines=False, delimiter=data_delimeter)

    elif ".xlsx" in filepath:
        # excel_data = pd.ExcelFile(filepath)
        # data = pd.concat([excel_data.parse(sheet_name)], axis=1)
        data = pd.read_excel(filepath, sheet_name = sheet_name)

    else:
        print("File neither .csv or .xlsx")

    data = data.dropna(how="all", axis=1)

    return data


### Clean data

**Operations**
- Uppercase.
- Strip.
- Replace anything which is not A-Z with spaces.
- Replace all double spaces with single spaces.

In [None]:
def clean_data(data, dirty_lga_column_name):
    
    acceptable_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    
    # Start iterating through data
    
    for index in range(data.shape[0]):
        
        dirty_lga = data.loc[index, dirty_lga_column_name]
        dirty_lga_upper = str(dirty_lga).upper()
        string = dirty_lga_upper
        
        for string_index in range(len(string)):
            if string[string_index] not in acceptable_chars:
                string = string.replace(string[string_index], " ")
        
        dirty_lga_upper = string
        
        # Replace all double spaces with single spaces
        
        while "  " in dirty_lga_upper:
            dirty_lga_upper = dirty_lga_upper.replace("  ", " ")

        # Strip all of the trailing and leading white space
        dirty_lga_upper = dirty_lga_upper.strip()
        
        data.loc[index, "cleaned_lga"] = dirty_lga_upper
        
    return data

### Append codes and names

In [None]:
def append_codes_and_names(data):

    lga_dict = {20110: 'ALPINE'
    ,20260: 'ARARAT'
    ,20570: 'BALLARAT'
    ,20660: 'BANYULE'
    ,20740: 'BASS COAST'
    ,20830: 'BAW BAW'
    ,20910: 'BAYSIDE'
    ,21010: 'BENALLA'
    ,21110: 'BOROONDARA'
    ,21180: 'BRIMBANK'
    ,21270: 'BULOKE'
    ,21370: 'CAMPASPE'
    ,21450: 'CARDINIA'
    ,21610: 'CASEY'
    ,21670: 'CENTRAL GOLDFIELDS'
    ,21750: 'COLAC OTWAY'
    ,21830: 'CORANGAMITE'
    ,21890: 'DAREBIN'
    ,22110: 'EAST GIPPSLAND'
    ,22170: 'FRANKSTON'
    ,22250: 'GANNAWARRA'
    ,22310: 'GLEN EIRA'
    ,22410: 'GLENELG'
    ,22490: 'GOLDEN PLAINS'
    ,22620: 'GREATER BENDIGO'
    ,22670: 'GREATER DANDENONG'
    ,22750: 'GREATER GEELONG'
    ,22830: 'GREATER SHEPPARTON'
    ,22910: 'HEPBURN'
    ,22980: 'HINDMARSH'
    ,23110: 'HOBSONS BAY'
    ,23190: 'HORSHAM'
    ,23270: 'HUME'
    ,23350: 'INDIGO'
    ,23430: 'KINGSTON'
    ,23670: 'KNOX'
    ,23810: 'LATROBE'
    ,23940: 'LODDON'
    ,24130: 'MACEDON RANGES'
    ,24210: 'MANNINGHAM'
    ,24250: 'MANSFIELD'
    ,24330: 'MARIBYRNONG'
    ,24410: 'MAROONDAH'
    ,24600: 'MELBOURNE'
    ,24650: 'MELTON'
    ,24780: 'MILDURA'
    ,24850: 'MITCHELL'
    ,24900: 'MOIRA'
    ,24970: 'MONASH'
    ,25060: 'MOONEE VALLEY'
    ,25150: 'MOORABOOL'
    ,25250: 'MORELAND'
    ,25340: 'MORNINGTON PENINSULA'
    ,25430: 'MOUNT ALEXANDER'
    ,25490: 'MOYNE'
    ,25620: 'MURRINDINDI'
    ,25710: 'NILLUMBIK'
    ,25810: 'NORTHERN GRAMPIANS'
    ,25900: 'PORT PHILLIP'
    ,25990: 'PYRENEES'
    ,26080: 'QUEENSCLIFFE'
    ,26170: 'SOUTH GIPPSLAND'
    ,26260: 'SOUTHERN GRAMPIANS'
    ,26350: 'STONNINGTON'
    ,26430: 'STRATHBOGIE'
    ,26490: 'SURF COAST'
    ,26610: 'SWAN HILL'
    ,26670: 'TOWONG'
    ,29399: 'UNINCORPORATED VIC'
    ,26700: 'WANGARATTA'
    ,26730: 'WARRNAMBOOL'
    ,26810: 'WELLINGTON'
    ,26890: 'WEST WIMMERA'
    ,26980: 'WHITEHORSE'
    ,27070: 'WHITTLESEA'
    ,27170: 'WODONGA'
    ,27260: 'WYNDHAM'
    ,27450: 'YARRA RANGES'
    ,27350: 'YARRA'
    ,27630: 'YARRIAMBIACK'
    ,29499: 'NO USUAL ADDRESS'
    ,29799: 'MIGRATORY OFFSHORE SHIPPING'}

    
    for index in range(data.shape[0]):
        cleaned_lga_name = data.loc[index, "cleaned_lga"]
        for key in lga_dict.keys():
            reference_code = key
            reference_name = lga_dict[key]
            
            if reference_name in cleaned_lga_name:
                
                data.loc[index, "script_lga_code"] = reference_code
                data.loc[index, "script_lga_name"] = reference_name
                
                break
                
    return data

## Process and export

In [None]:
data = import_csv_or_excel(filepath = "test.xlsx",sheet_name="Data")
data2 = clean_data(data, "Local Government Area")
data3 = append_codes_and_names(data2)
data3

In [None]:
data3.to_excel("/users/danielcorcoran/desktop/new.xlsx")