## Import CSV

In [23]:
import pandas as pd
import base64

data = pd.read_csv("sacom-data-encoded.csv")

In [24]:
data.head(2)

Unnamed: 0,Org ID,Org Name,AKA,Acronym,Former Name,S Street Addr 1,S Street Addr 2,S Suburb,S State,S Postcode,...,Toilets Access,Disabled Parking,Services,ABN,Local Community dir,Adelaide Hills dir,Onkaparinga dir,Subjects,Primary Category,Council
0,193932,RSL Ardrossan Sub Branch,Ardrossan RSL----Returned & Services League Ar...,,,"RSL Hall, West Tce",,Ardrossan,South Australia,5571,...,,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...,25166174779,Service Clubs,,,Ex-Defence Service Groups----Halls For Hire---...,Recreation,Yorke Peninsula Council
1,193933,RSL Balaklava Sub Branch,Balaklava RSL----Returned & Services League Ba...,,,21 Scotland St,,Balaklava,South Australia,5461,...,,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...,98 260 727 531,Support Groups,,,Ex-Defence Service Groups----Social & Activity...,Personal & Family Support,Wakefield Regional Council


## Set the index to "Org ID"

In [25]:
data.set_index('Org ID', inplace=True)

In [26]:
data.head(2)

Unnamed: 0_level_0,Org Name,AKA,Acronym,Former Name,S Street Addr 1,S Street Addr 2,S Suburb,S State,S Postcode,Phone,...,Toilets Access,Disabled Parking,Services,ABN,Local Community dir,Adelaide Hills dir,Onkaparinga dir,Subjects,Primary Category,Council
Org ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
193932,RSL Ardrossan Sub Branch,Ardrossan RSL----Returned & Services League Ar...,,,"RSL Hall, West Tce",,Ardrossan,South Australia,5571,08 8837 3596++,...,,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...,25166174779,Service Clubs,,,Ex-Defence Service Groups----Halls For Hire---...,Recreation,Yorke Peninsula Council
193933,RSL Balaklava Sub Branch,Balaklava RSL----Returned & Services League Ba...,,,21 Scotland St,,Balaklava,South Australia,5461,08 8100 7300++Main Office,...,,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...,98 260 727 531,Support Groups,,,Ex-Defence Service Groups----Social & Activity...,Personal & Family Support,Wakefield Regional Council


In [27]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14385 entries, 193932 to 238742
Data columns (total 27 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Org Name             14385 non-null  object 
 1   AKA                  4849 non-null   object 
 2   Acronym              1365 non-null   object 
 3   Former Name          1476 non-null   object 
 4   S Street Addr 1      12933 non-null  object 
 5   S Street Addr 2      3184 non-null   object 
 6   S Suburb             14312 non-null  object 
 7   S State              14353 non-null  object 
 8   S Postcode           14295 non-null  object 
 9   Phone                10438 non-null  object 
 10  Mobile               5774 non-null   object 
 11  Email                12595 non-null  object 
 12  Website              12123 non-null  object 
 13  Parent Body          6304 non-null   object 
 14  Parent Body URL      3059 non-null   object 
 15  Open Hours           6293 non-null 

## Decode base64

In [28]:
decode_column=["Open Hours","Services"]

In [29]:
data[decode_column].head()

Unnamed: 0_level_0,Open Hours,Services
Org ID,Unnamed: 1_level_1,Unnamed: 2_level_1
193932,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...
193933,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...
193934,TW9uIDEyOjAwIFBNIC0gNjowMCBQTQ1cClR1ZXMgNTozMC...,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...
193935,,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...
193936,U2F0IDRwbSAtIDhwbSwgU3VuIGFuZCBwdWJsaWMgaG9saW...,V2VsZmFyZSBhbmQgcGVuc2lvbnMgc3VwcG9ydCBmb3IgZX...


In [30]:
def decode_base64(x):
    if isinstance(x, str):
        try:
            # Check if the base64 string is correctly padded
            missing_padding = len(x) % 4
            if missing_padding != 0:
                x += '='* (4 - missing_padding)
            return base64.b64decode(x).decode('utf-8')
        except UnicodeDecodeError:
            try:
                return base64.b64decode(x).decode('ISO-8859-1')  # replace with the correct encoding
            except Exception:
                return x  # or handle the error in another way
    else:
        return x

In [31]:
for column in decode_column:
    data[column] = data[column].apply(decode_base64)

In [32]:
data[decode_column].head()

Unnamed: 0_level_0,Open Hours,Services
Org ID,Unnamed: 1_level_1,Unnamed: 2_level_1
193932,,Welfare and pensions support for ex-servicemen...
193933,,Welfare and pensions support for ex-servicemen...
193934,Mon 12:00 PM - 6:00 PM\r\\nTues 5:30 PM - 9:00...,Welfare and pensions support for ex-servicemen...
193935,,Welfare and pensions support for ex-servicemen...
193936,"Sat 4pm - 8pm, Sun and public holidays 2pm - 8pm",Welfare and pensions support for ex-servicemen...


## Export to new csv file

In [13]:
data.to_csv("decoded_sacom.csv")

## Data Cleaning

In [14]:
data["S State"].value_counts()

S State
South Australia                 14300
Victoria                           24
New South Wales                    15
Queensland                          7
Australian Capital Territory        4
Northern Territory                  3
Name: count, dtype: int64

In [15]:
data["Services"].value_counts()

Services
Church services and pastoral care                                                                                                                                                                                                                                                                                                                                                              170
Police station                                                                                                                                                                                                                                                                                                                                                                                 118
Postal services\r\\nBanking services\r\\nPO boxes                                                                                                                                                                        

In [44]:
data["Council"].value_counts().tail(10)

Council
ACT                             11
QLD                              9
nil                              6
APY Lands                        4
Yalata Community                 3
NT                               1
0                                1
Maralinga Tjarutja Community     1
Nepabunna Community Council      1
Gerard Community Council         1
Name: count, dtype: int64

In [42]:
data[data['Council'] == 'nil']

Unnamed: 0_level_0,Org Name,AKA,Acronym,Former Name,S Street Addr 1,S Street Addr 2,S Suburb,S State,S Postcode,Phone,...,Toilets Access,Disabled Parking,Services,ABN,Local Community dir,Adelaide Hills dir,Onkaparinga dir,Subjects,Primary Category,Council
Org ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
207751,Adelaide Yoga Meditation Centre,,,,"Lokananda, 9 Main St",,Point Pass,South Australia,5374,08 8581 1758++,...,,,Yoga classes\r\\nMeditation sessions and cours...,,Yoga,,,Meditation----Yoga,Health & Disability,nil
212718,Parawa Agricultural Bureau Landcare Group,Landcare - Parawa,,,,,Parawa,South Australia,5203,08 8598 0223++,...,,,Landcare group,57647094857.0,Environmental Groups,,,Bushland Regeneration----Conservation----Envir...,Environment & Heritage,nil
216524,Meals on Wheels SA - Peterborough,,MoWSA,,23 Hurlstone St,,Peterborough,South Australia,5422,08 8271 8700++Head Office----08 8651 0421++---...,...,,,Home delivered meals - hot 3 course meal of so...,76069457196.0,Home Delivered Meals,,,Food Services----Home Care & Support----Home D...,Personal & Family Support,nil
218688,Hamley Bridge Hospital Women's Auxiliary (in r...,,,,,,Hamley Bridge,South Australia,5401,08 8528 2194++President,...,,,Fundraising for purchase of equipment,,Fund Raising,,,Auxiliaries----Fund Raising,Community Organisation & Development,nil
229315,Eldercare Home Care Yorke Peninsula,,,,Centenary Ave,,Maitland,South Australia,5573,08 8832 2822++General enquiries,...,1.0,1.0,</li>General services include:\r\\n24 hour on-...,63758127271.0,Home Care & Support,,,Cleaning Services----Community Aged Care Packa...,Health & Disability,nil
232972,Waikerie District Ratepayers and Residents Ass...,,WDRRA,,,,Waikerie,South Australia,5330,,...,,,Resident Action Group,,Resident Action Groups,,,Community Projects----Resident Action Groups,Community Organisation & Development,nil


In [17]:
data["Local Community dir"].value_counts()

Local Community dir
Primary Schools         459
Child Care              353
Service Clubs           304
Post Offices            295
Pre-School Education    275
                       ... 
Leadlight                 1
Papercrafts               1
Unitarian                 1
Slot Car Racing           1
LETS                      1
Name: count, Length: 399, dtype: int64

In [34]:
data["Disabled Parking"].value_counts()

Disabled Parking
1.0    3527
0.0     538
Name: count, dtype: int64

In [18]:
data["Subjects"].value_counts()

Subjects
Post Offices                                                                                                                                          302
Police Stations                                                                                                                                       115
Halls For Hire                                                                                                                                        105
Cricket                                                                                                                                               102
Tennis                                                                                                                                                 98
                                                                                                                                                     ... 
Employment Assistance Programs----Employment Services----Job Search

## New CSV data is well cleaned