In [8]:
# Importing necessary packages

import pandas as pd
import numpy as np
import re
import geocoder
from geopy.geocoders import Nominatim
import warnings
warnings.filterwarnings("ignore")

In [9]:
#Loading the parks_updated csv file
df = pd.read_csv('parks_updated.csv')

# creating a dataframe df1 with only place name,latitude and longitude columns
df1 = df[['Place Name','Latitude','Longitude']]

# showing only first 5 rows
df1.head(5)

Unnamed: 0,Place Name,Latitude,Longitude
0,13TH BEACH SURF LIFE SAVING CLUB,-38.28273,144.452556
1,A B SHAW RESERVE,-37.86375,144.784306
2,A C CAMPBELL RESERVE,-38.239858,145.031091
3,A E BROWN RESERVE,-38.027361,145.354861
4,A E SHEPHERD GARDENS,-37.789583,144.868472


In [10]:
# checking for null values
print(df1.isnull().sum())

Place Name     0
Latitude      64
Longitude     64
dtype: int64


In [11]:
# Deleting the records where both latitude and longitude are null
df1 = df1.dropna(axis=0, subset=['Latitude','Longitude'])

In [12]:
# checking for null values
print(df1.isnull().sum())

Place Name    0
Latitude      0
Longitude     0
dtype: int64


In [13]:
# creating a dataframe df2 with only place name column
df2=df1[['Place Name']]
df2.head(5)

Unnamed: 0,Place Name
0,13TH BEACH SURF LIFE SAVING CLUB
1,A B SHAW RESERVE
2,A C CAMPBELL RESERVE
3,A E BROWN RESERVE
4,A E SHEPHERD GARDENS


In [14]:
# creating a dataframe df3 with only Latitude and Longitude columns
df3 = df1[['Latitude','Longitude']]
df3.head(5)

Unnamed: 0,Latitude,Longitude
0,-38.28273,144.452556
1,-37.86375,144.784306
2,-38.239858,145.031091
3,-38.027361,145.354861
4,-37.789583,144.868472


In [15]:
# reverse geocoding using geolocator.reverse to get the address from latitude and longitude
geolocator = Nominatim(user_agent="monash university research project_moms care")

df3['address'] = df3.apply(lambda row: geolocator.reverse((row['Latitude'], row['Longitude']),timeout=30), axis=1)
df3

Unnamed: 0,Latitude,Longitude,address
0,-38.282730,144.452556,"(Thirteenth Beach Road, Connewarre, Barwon Hea..."
1,-37.863750,144.784306,"(Hall Avenue, Altona Meadows, City of Hobsons ..."
2,-38.239858,145.031091,"(Prince Street, Mornington, Shire of Morningto..."
3,-38.027361,145.354861,"(Arch Brown Reserve, Buchanan Road, Berwick, H..."
4,-37.789583,144.868472,"(Suffolk Street, Maidstone, City of Maribyrnon..."
5,-38.326025,143.078906,"(Peter Street, Cobden, Shire of Corangamite, B..."
6,-37.860856,144.813493,"(Altona West Kindergarten, Lindwood Avenue, Al..."
7,-37.728472,144.873472,"(Kingsley Road, Airport West, City of Moonee V..."
8,-37.913684,147.716689,"(King Street, Paynesville, Shire of East Gipps..."
9,-36.572917,143.873472,"(Hospital Street, Inglewood, Shire of Loddon, ..."


In [16]:
#Exporting to csv
df3.to_csv('parks_updated_final.csv',index = False)

In [35]:
#Loading the parks_updated_final csv file
df4 = pd.read_csv('parks_updated_final.csv')
df4

Unnamed: 0,Latitude,Longitude,address
0,-38.282730,144.452556,"Thirteenth Beach Road, Connewarre, Barwon Head..."
1,-37.863750,144.784306,"Hall Avenue, Altona Meadows, City of Hobsons B..."
2,-38.239858,145.031091,"Prince Street, Mornington, Shire of Mornington..."
3,-38.027361,145.354861,"Arch Brown Reserve, Buchanan Road, Berwick, Ha..."
4,-37.789583,144.868472,"Suffolk Street, Maidstone, City of Maribyrnong..."
5,-38.326025,143.078906,"Peter Street, Cobden, Shire of Corangamite, Ba..."
6,-37.860856,144.813493,"Altona West Kindergarten, Lindwood Avenue, Alt..."
7,-37.728472,144.873472,"Kingsley Road, Airport West, City of Moonee Va..."
8,-37.913684,147.716689,"King Street, Paynesville, Shire of East Gippsl..."
9,-36.572917,143.873472,"Hospital Street, Inglewood, Shire of Loddon, L..."


In [36]:
# splitting the address column into address,country,postcode columns
df4[['address','country']] = df4['address'].str.rsplit(pat=',',expand=True, n=1)
df4[['address','postcode']] = df4['address'].str.rsplit(pat=',',expand=True, n=1)
df4

Unnamed: 0,Latitude,Longitude,address,country,postcode
0,-38.282730,144.452556,"Thirteenth Beach Road, Connewarre, Barwon Head...",Australia,3227
1,-37.863750,144.784306,"Hall Avenue, Altona Meadows, City of Hobsons B...",Australia,3028
2,-38.239858,145.031091,"Prince Street, Mornington, Shire of Mornington...",Australia,3931
3,-38.027361,145.354861,"Arch Brown Reserve, Buchanan Road, Berwick, Ha...",Australia,3806
4,-37.789583,144.868472,"Suffolk Street, Maidstone, City of Maribyrnong...",Australia,3019
5,-38.326025,143.078906,"Peter Street, Cobden, Shire of Corangamite, Ba...",Australia,Victoria
6,-37.860856,144.813493,"Altona West Kindergarten, Lindwood Avenue, Alt...",Australia,3018
7,-37.728472,144.873472,"Kingsley Road, Airport West, City of Moonee Va...",Australia,3042
8,-37.913684,147.716689,"King Street, Paynesville, Shire of East Gippsl...",Australia,3880
9,-36.572917,143.873472,"Hospital Street, Inglewood, Shire of Loddon, L...",Australia,3517


In [37]:
# merging two dataframes df4 and df2
df5=df4.join(df2)
df5

Unnamed: 0,Latitude,Longitude,address,country,postcode,Place Name
0,-38.282730,144.452556,"Thirteenth Beach Road, Connewarre, Barwon Head...",Australia,3227,13TH BEACH SURF LIFE SAVING CLUB
1,-37.863750,144.784306,"Hall Avenue, Altona Meadows, City of Hobsons B...",Australia,3028,A B SHAW RESERVE
2,-38.239858,145.031091,"Prince Street, Mornington, Shire of Mornington...",Australia,3931,A C CAMPBELL RESERVE
3,-38.027361,145.354861,"Arch Brown Reserve, Buchanan Road, Berwick, Ha...",Australia,3806,A E BROWN RESERVE
4,-37.789583,144.868472,"Suffolk Street, Maidstone, City of Maribyrnong...",Australia,3019,A E SHEPHERD GARDENS
5,-38.326025,143.078906,"Peter Street, Cobden, Shire of Corangamite, Ba...",Australia,Victoria,A H BARRETT RESERVE
6,-37.860856,144.813493,"Altona West Kindergarten, Lindwood Avenue, Alt...",Australia,3018,A H FORD RESERVE
7,-37.728472,144.873472,"Kingsley Road, Airport West, City of Moonee Va...",Australia,3042,A J DAVIS RESERVE
8,-37.913684,147.716689,"King Street, Paynesville, Shire of East Gippsl...",Australia,3880,A J FREEMAN RESERVE
9,-36.572917,143.873472,"Hospital Street, Inglewood, Shire of Loddon, L...",Australia,3517,A J MITCHELL PARK


In [38]:
# dropping the duplicates
df5 = df5.drop_duplicates(subset=['Place Name'], keep='first')
df5

Unnamed: 0,Latitude,Longitude,address,country,postcode,Place Name
0,-38.282730,144.452556,"Thirteenth Beach Road, Connewarre, Barwon Head...",Australia,3227,13TH BEACH SURF LIFE SAVING CLUB
1,-37.863750,144.784306,"Hall Avenue, Altona Meadows, City of Hobsons B...",Australia,3028,A B SHAW RESERVE
2,-38.239858,145.031091,"Prince Street, Mornington, Shire of Mornington...",Australia,3931,A C CAMPBELL RESERVE
3,-38.027361,145.354861,"Arch Brown Reserve, Buchanan Road, Berwick, Ha...",Australia,3806,A E BROWN RESERVE
4,-37.789583,144.868472,"Suffolk Street, Maidstone, City of Maribyrnong...",Australia,3019,A E SHEPHERD GARDENS
5,-38.326025,143.078906,"Peter Street, Cobden, Shire of Corangamite, Ba...",Australia,Victoria,A H BARRETT RESERVE
6,-37.860856,144.813493,"Altona West Kindergarten, Lindwood Avenue, Alt...",Australia,3018,A H FORD RESERVE
7,-37.728472,144.873472,"Kingsley Road, Airport West, City of Moonee Va...",Australia,3042,A J DAVIS RESERVE
8,-37.913684,147.716689,"King Street, Paynesville, Shire of East Gippsl...",Australia,3880,A J FREEMAN RESERVE
9,-36.572917,143.873472,"Hospital Street, Inglewood, Shire of Loddon, L...",Australia,3517,A J MITCHELL PARK


In [39]:
# converting postcode to int datatype and filling null values with '0'
df5.postcode = pd.to_numeric(df5.postcode, errors='coerce').fillna(0).astype(np.int64)
df5['postcode'].dtype

dtype('int64')

In [40]:
print(df5.isnull().sum())

Latitude      0
Longitude     0
address       0
country       0
postcode      0
Place Name    1
dtype: int64


In [41]:
# dropping the country column
df5 = df5.drop(['country'], axis=1)

In [42]:
#Renaming columns
df5.columns = ['lat', 'long', 'address', 'postcode','name']

In [43]:
#Exporting to csv
df5.to_csv('parks_updated_final_version.csv',index = False)