In [1]:
# Import the Pandas library
from pathlib import Path
import pandas as pd

In [2]:
# Create a reference the CSV file desired
csv_path = Path('Resources/Road_Stopping_Places.csv')

# Read the CSV into a Pandas DataFrame

df = pd.read_csv(csv_path)

# Print the first five rows of data to the screen
df.head()

Unnamed: 0,X,Y,OBJECTID,ROAD,ROAD_NAME,COMMON_USAGE_NAME,START_SLK,END_SLK,CWY,START_TRUE_DIST,...,CONSTRUCTED_SHELTER,NATURAL_SHADE,LIGHTING_PRESENT,NUMBER_OF_BINS,NUMBER_OF_TOILETS,ACCESSIBLE_TOILETS,EFFLUENT_DUMP_SITE,NUMBER_OF_TABLES,ACCESSIBLE_TABLES,ROUTE_NE_ID
0,119.797373,-23.45471,125858,H006,Great Northern Hwy,Great Northern Hwy,1151.37,1151.37,Single,1145.18,...,Yes,Yes,Powered Site,1.0,1.0,Yes,No,'1',Yes,247653
1,114.720162,-27.503224,125859,H007,North West Coastal Hwy,North West Coastal Hwy,151.04,151.04,Single,151.01,...,No,No,,3.0,0.0,No,No,0,No,247803
2,115.257054,-29.848984,125860,H004,Brand Hwy,Brand Hwy,220.91,220.91,Single,218.2,...,No,Yes,,2.0,0.0,No,No,0,No,247578
3,120.627016,-33.665446,125861,H008,South Coast Hwy,South Coast Hwy,347.61,347.61,Single,348.24,...,No,No,,2.0,0.0,No,No,1,No,247842
4,116.217742,-22.660976,125862,M029,Nanutarra Munjina,Nanutarra Munjina Rd,89.11,89.11,Single,89.11,...,No,No,,1.0,1.0,Yes,No,0,No,247800


In [3]:
# Check the data types. 
df.dtypes

X                      float64
Y                      float64
OBJECTID                 int64
ROAD                    object
ROAD_NAME               object
COMMON_USAGE_NAME       object
START_SLK              float64
END_SLK                float64
CWY                     object
START_TRUE_DIST        float64
END_TRUE_DIST          float64
NETWORK_TYPE            object
XSP                     object
RA_NO                    int64
RA_NAME                 object
LG_NO                    int64
LG_NAME                 object
REST_AREA_TYPE          object
STAY_24_HOUR            object
REST_AREA_NAME          object
OWNERSHIP               object
INFORMATION_BOARD       object
SCENIC_LOOKOUT          object
COMMEMORATION_WAY       object
SURFACE                 object
SURFACE_TYPE            object
SURFACE_AREA           float64
CONSTRUCTED_SHELTER     object
NATURAL_SHADE           object
LIGHTING_PRESENT        object
NUMBER_OF_BINS         float64
NUMBER_OF_TOILETS      float64
ACCESSIB

In [4]:
# Check the names of all the columns and see if there are any rows with missing data
df.count()

X                      1679
Y                      1679
OBJECTID               1679
ROAD                   1679
ROAD_NAME              1679
COMMON_USAGE_NAME      1679
START_SLK              1679
END_SLK                1679
CWY                    1679
START_TRUE_DIST        1679
END_TRUE_DIST          1679
NETWORK_TYPE           1679
XSP                    1679
RA_NO                  1679
RA_NAME                1679
LG_NO                  1679
LG_NAME                1679
REST_AREA_TYPE         1679
STAY_24_HOUR           1679
REST_AREA_NAME          266
OWNERSHIP              1679
INFORMATION_BOARD      1679
SCENIC_LOOKOUT         1679
COMMEMORATION_WAY       133
SURFACE                1574
SURFACE_TYPE            641
SURFACE_AREA            637
CONSTRUCTED_SHELTER    1679
NATURAL_SHADE          1679
LIGHTING_PRESENT         82
NUMBER_OF_BINS         1558
NUMBER_OF_TOILETS      1549
ACCESSIBLE_TOILETS     1679
EFFLUENT_DUMP_SITE     1679
NUMBER_OF_TABLES       1553
ACCESSIBLE_TABLES   

In [5]:
# Rename columns "X" to "LONGITUDE", "Y" to "LATITUDE", "RA_NAME" to "RESPONSIBILITY_AREA",
# and "LG_NAME" to "LOCAL_GOVERNMENT_NAME"
df_updated = df.rename(columns = {'X' : 'LONGITUDE',
                                  'Y' : 'LATITUDE',
                                  'RA_NAME' : 'RESPONSIBILITY_AREA',
                                  'LG_NAME' : 'LOCAL_GOVERNMENT_NAME'})
df_updated.head(2)

Unnamed: 0,LONGITUDE,LATITUDE,OBJECTID,ROAD,ROAD_NAME,COMMON_USAGE_NAME,START_SLK,END_SLK,CWY,START_TRUE_DIST,...,CONSTRUCTED_SHELTER,NATURAL_SHADE,LIGHTING_PRESENT,NUMBER_OF_BINS,NUMBER_OF_TOILETS,ACCESSIBLE_TOILETS,EFFLUENT_DUMP_SITE,NUMBER_OF_TABLES,ACCESSIBLE_TABLES,ROUTE_NE_ID
0,119.797373,-23.45471,125858,H006,Great Northern Hwy,Great Northern Hwy,1151.37,1151.37,Single,1145.18,...,Yes,Yes,Powered Site,1.0,1.0,Yes,No,'1',Yes,247653
1,114.720162,-27.503224,125859,H007,North West Coastal Hwy,North West Coastal Hwy,151.04,151.04,Single,151.01,...,No,No,,3.0,0.0,No,No,0,No,247803


In [7]:
# Reorder and reduce to columns that will help us identify inaccessibly concerns and make improvements: 
# COMMON_USAGE_NAME, RESPONSIBILITY_AREA, LOCAL_GOVERNMENT_NAME,
# REST_AREA_TYPE, STAY_24_HOUR, OWNERSHIP, SURFACE, SURFACE_TYPE,
# CONSTRUCTED_SHELTER, NATURAL_SHADE, NUMBER_OF_BINS, NUMBER_OF_TOILETS, 
# ACCESSIBLE_TOILETS, NUMBER_OF_TABLES, ACCESSIBLE_TABLES, LATITUDE, LONGITUDE
df_reduced = df_updated[['COMMON_USAGE_NAME', 'RESPONSIBILITY_AREA', 'LOCAL_GOVERNMENT_NAME',
                         'REST_AREA_TYPE', 'STAY_24_HOUR', 'OWNERSHIP', 
                         'SURFACE', 'SURFACE_TYPE', 'CONSTRUCTED_SHELTER', 
                          'NATURAL_SHADE',  'NUMBER_OF_BINS',  'NUMBER_OF_TOILETS',
                          'ACCESSIBLE_TOILETS',  'NUMBER_OF_TABLES',  'ACCESSIBLE_TABLES', 
                          'LATITUDE',  'LONGITUDE']]
df_reduced.head(2)

Unnamed: 0,COMMON_USAGE_NAME,RESPONSIBILITY_AREA,LOCAL_GOVERNMENT_NAME,REST_AREA_TYPE,STAY_24_HOUR,OWNERSHIP,SURFACE,SURFACE_TYPE,CONSTRUCTED_SHELTER,NATURAL_SHADE,NUMBER_OF_BINS,NUMBER_OF_TOILETS,ACCESSIBLE_TOILETS,NUMBER_OF_TABLES,ACCESSIBLE_TABLES,LATITUDE,LONGITUDE
0,Great Northern Hwy,Pilbara,Meekatharra,Roadhouse,Yes,Other,Surfaced,,Yes,Yes,1.0,1.0,Yes,'1',Yes,-23.45471,119.797373
1,North West Coastal Hwy,Mid West-Gascoyne,Northampton,Minor Rest Area,No,Main Roads,Unsurfaced,,No,No,3.0,0.0,No,0,No,-27.503224,114.720162


In [9]:
df_reduced.count()

COMMON_USAGE_NAME        1679
RESPONSIBILITY_AREA      1679
LOCAL_GOVERNMENT_NAME    1679
REST_AREA_TYPE           1679
STAY_24_HOUR             1679
OWNERSHIP                1679
SURFACE                  1574
SURFACE_TYPE              641
CONSTRUCTED_SHELTER      1679
NATURAL_SHADE            1679
NUMBER_OF_BINS           1558
NUMBER_OF_TOILETS        1549
ACCESSIBLE_TOILETS       1679
NUMBER_OF_TABLES         1553
ACCESSIBLE_TABLES        1679
LATITUDE                 1679
LONGITUDE                1679
dtype: int64

In [None]:
# Fill NA values for the column "SURFACE_TYPE" with "Unknown"
df_reduced.fillna({'SURFACE_TYPE' : 'Unknown'}, inplace=True)

In [13]:
df_reduced.count()

COMMON_USAGE_NAME        1679
RESPONSIBILITY_AREA      1679
LOCAL_GOVERNMENT_NAME    1679
REST_AREA_TYPE           1679
STAY_24_HOUR             1679
OWNERSHIP                1679
SURFACE                  1574
SURFACE_TYPE             1679
CONSTRUCTED_SHELTER      1679
NATURAL_SHADE            1679
NUMBER_OF_BINS           1558
NUMBER_OF_TOILETS        1549
ACCESSIBLE_TOILETS       1679
NUMBER_OF_TABLES         1553
ACCESSIBLE_TABLES        1679
LATITUDE                 1679
LONGITUDE                1679
dtype: int64

In [None]:
# Remove any additional rows with missing data
df_reduced.dropna(inplace=True)

In [16]:
df_reduced.count()

COMMON_USAGE_NAME        1450
RESPONSIBILITY_AREA      1450
LOCAL_GOVERNMENT_NAME    1450
REST_AREA_TYPE           1450
STAY_24_HOUR             1450
OWNERSHIP                1450
SURFACE                  1450
SURFACE_TYPE             1450
CONSTRUCTED_SHELTER      1450
NATURAL_SHADE            1450
NUMBER_OF_BINS           1450
NUMBER_OF_TOILETS        1450
ACCESSIBLE_TOILETS       1450
NUMBER_OF_TABLES         1450
ACCESSIBLE_TABLES        1450
LATITUDE                 1450
LONGITUDE                1450
dtype: int64

In [None]:
# Filter data to road stops in which "Main Roads" is the owner and 
# that are either unsurfaced, do not have accessible toilets or do not have accessible tables
# So Main Roads knows which road stops need accessibility improvements

In [None]:
# Count how many inaccessible road stops are in each local govenment area


In [None]:
# Convert the lg_counts Series into a DataFrame


In [None]:
# Convert the column name into "Number of Road Stops to Improve Accessibility"


In [None]:
# We want to calculate the total number of tables? There is a problem
# Problem can be seen by examining datatypes within the DataFrame of inaccessible road stops


In [None]:
# Use astype() to convert a column's data into an integer


In [None]:
# Calculate the number of tables currently available at inaccessible Main Roads road stops


In [None]:
# Save the clean DataFrame to a CSV file without the header. 
