Environment: Python 3.6.4 and Jupyter notebook

Libraries Used :
* pandas (for dataframe, included in Anaconda Python 3.6.4) 
* re (for regular expression, included in Anaconda Python 3.6.4) 
* numpy (for numpy array, included in Anaconda Python 3.6.4) 

## 1.  Import libraries 

In [43]:
# Importing necessary packages
import pandas as pd
import numpy as np

## 2. Parse CSV File

In [44]:
# Loading the hospital_locations csv file
csv_file = pd.read_csv('Hospital_Locations.csv')


# dropping the unrequired columns
csv_file = csv_file.drop(['FID', 'OpsName','Type', 'RoadSuffix', 'CampusCode','VicgovRegi','ServiceNam'], axis=1)


# Renaming columns
csv_file.columns = ['long', 'lat', 'Hospital', 'Street Number', 'Road Name', 'Road Type','LGAName','Suburb', 'postcode', 'State']
#lat, long, postcode, name, address
# Deleting the record which does not belongs to VIC state
csv_file = csv_file[csv_file.State != 'NSW']

# printing first 2o rows
csv_file.head(20)



Unnamed: 0,long,lat,Hospital,Street Number,Road Name,Road Type,LGAName,Suburb,postcode,State
0,144.983399,-37.809681,East Melbourne Specialist Day Hospital,23,CLARENDON,STREET,MELBOURNE,,3002,VIC
1,145.1493,-37.9514,Windsor Avenue Day Surgery,17,WINDSOR,AVENUE,GREATER DANDENONG,,3171,VIC
2,145.344653,-38.03458,Hyperbaric Health Wound Centre Berwick,3,GIBB,STREET,CASEY,,3806,VIC
3,143.832286,-37.540553,Ballarat Day Procedure Centre,1117-1123,HOWITT,STREET,BALLARAT,,3355,VIC
4,145.143379,-38.148189,Bayside Day Procedure and Specialist Centre,141,CRANBOURNE,ROAD,FRANKSTON,,3199,VIC
5,144.2615,-36.7619,Bendigo Day Surgery,1-7,CHUM,STREET,GREATER BENDIGO,,3555,VIC
6,145.041233,-37.920125,Bentleigh Surgicentre,157,JASPER,ROAD,GLEN EIRA,,3204,VIC
7,145.346519,-38.033496,Berwick Surgicentre,22,LANGMORE,LANE,CASEY,,3806,VIC
8,144.991817,-37.91256,Brighton Plastic Surgery Centre,206,NEW,STREET,BAYSIDE,,3186,VIC
9,145.061879,-37.692573,Victorian Gut Centre,119,PLENTY,ROAD,WHITTLESEA,,3083,VIC


In [45]:
#Eliminating strings like within braces and comma in the hospital column and storing the required information in the hospital name column

csv_file['name'] = csv_file['Hospital'].str.split(',').str[0]
csv_file['name'] = csv_file['name'].str.split('(').str[0]
csv_file = csv_file.drop('Hospital', axis=1)


In [46]:
# checking the unique values in suburb column
csv_file['Suburb'].unique()


# replacing the '' to 'A' first and then 'A' to nan values
csv_file['Suburb'] = csv_file['Suburb'].replace(' ', 'A')
csv_file['Suburb'] = csv_file['Suburb'].replace('A', np.nan)


In [47]:
# checking the number of null values in the csv file in all the columns
print(csv_file.isnull().sum())

long               0
lat                0
Street Number      0
Road Name          0
Road Type          0
LGAName            0
Suburb           175
postcode           0
State              0
name               0
dtype: int64


In [48]:
# filling the null values of suburb column
# groupby postcode, and transform('first') to get rid of nan values:
csv_file['Suburb'] = csv_file.groupby("postcode")["Suburb"].transform("first")

In [49]:
# filling the remaining missing value of suburb colum  with the matching value of LGAName column
csv_file['Suburb'] = csv_file['Suburb'].fillna(csv_file['LGAName'])

In [50]:
# checking the number of null values in the csv file in all the columns
print(csv_file.isnull().sum())

long             0
lat              0
Street Number    0
Road Name        0
Road Type        0
LGAName          0
Suburb           0
postcode         0
State            0
name             0
dtype: int64


In [51]:
# creating address column by merging street number,roadname,roadtype and suburb
csv_file['address'] = csv_file['Street Number'].fillna('').astype(str) + ',' + csv_file['Road Name'].fillna('').astype(str) + csv_file['Road Type'].fillna('').astype(str)+ ',' + csv_file['Suburb'].fillna('').astype(str)
csv_file['address'] = csv_file['address'].str.strip(',')

# dropping columns
csv_file = csv_file.drop(['Street Number', 'Road Name','Road Type', 'LGAName', 'Suburb','State'], axis=1)



In [52]:
csv_file['postcode'].dtype
csv_file['lat'].dtype
csv_file['long'].dtype

dtype('float64')

In [53]:
#Exporting to csv
csv_file.to_csv('hospital_locations_final.csv',index = False)