In [1]:
import pandas as pd
import os
import numpy as np
import plotly.express as px

In [2]:
directory = os.getcwd()

os.chdir(directory)

schooling_transport_share_df = pd.read_csv(r"..\data\onemap_demographic\getModeOfTransportSchool_data.csv")
working_transport_share_df = pd.read_csv(r"..\data\onemap_demographic\getModeOfTransportWork_data.csv")

In [3]:
schooling_transport_share_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   planning_area               55 non-null     object
 1   bus                         55 non-null     int64 
 2   mrt                         55 non-null     int64 
 3   mrt_bus                     55 non-null     int64 
 4   mrt_car                     55 non-null     int64 
 5   mrt_other                   55 non-null     int64 
 6   taxi                        55 non-null     int64 
 7   car                         55 non-null     int64 
 8   pvt_chartered_bus           55 non-null     int64 
 9   lorry_pickup                55 non-null     int64 
 10  motorcycle_scooter          55 non-null     int64 
 11  others                      55 non-null     int64 
 12  no_transport_required       55 non-null     int64 
 13  other_combi_mrt_or_bus      55 non-null     int64 
 

In [4]:
cond = schooling_transport_share_df.select_dtypes(include = int).sum(axis=0) == 0
columns_to_drop = list(cond[cond].index)
schooling_transport_share_df = schooling_transport_share_df.drop(columns=(columns_to_drop + ["planning_area","year","planningArea"]))
schooling_transport_share_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   bus                         55 non-null     int64
 1   car                         55 non-null     int64
 2   lorry_pickup                55 non-null     int64
 3   motorcycle_scooter          55 non-null     int64
 4   others                      55 non-null     int64
 5   no_transport_required       55 non-null     int64
 6   mrt_lrt_only                55 non-null     int64
 7   mrt_lrt_and_bus             55 non-null     int64
 8   other_combi_mrt_lrt_or_bus  55 non-null     int64
 9   taxi_pvt_hire_car_only      55 non-null     int64
 10  pvt_chartered_bus_van       55 non-null     int64
dtypes: int64(11)
memory usage: 4.9 KB


In [5]:
school_transport_breakdown_by_type = schooling_transport_share_df.sum(axis = 0)

In [6]:
print(working_transport_share_df)

              planning_area    bus  mrt  mrt_bus  mrt_car  mrt_other  taxi  \
0                     Bedok  25560    0        0        0          0     0   
1               Bukit Timah   2639    0        0        0          0     0   
2               Bukit Batok  13706    0        0        0          0     0   
3               Bukit Merah  18461    0        0        0          0     0   
4   Central Water Catchment      0    0        0        0          0     0   
5             Downtown Core    206    0        0        0          0     0   
6                    Changi      0    0        0        0          0     0   
7                Changi Bay      0    0        0        0          0     0   
8              Lim Chu Kang      0    0        0        0          0     0   
9                  Boon Lay      0    0        0        0          0     0   
10  Western Water Catchment      0    0        0        0          0     0   
11                Woodlands  15314    0        0        0       

In [7]:
cond = working_transport_share_df.select_dtypes(include = int).sum(axis=0) == 0
columns_to_drop = list(cond[cond].index)
working_transport_share_df = working_transport_share_df.drop(columns=(columns_to_drop + ["planning_area","year","planningArea"]))
working_transport_share_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   bus                         55 non-null     int64
 1   car                         55 non-null     int64
 2   lorry_pickup                55 non-null     int64
 3   motorcycle_scooter          55 non-null     int64
 4   others                      55 non-null     int64
 5   no_transport_required       55 non-null     int64
 6   mrt_lrt_only                55 non-null     int64
 7   mrt_lrt_and_bus             55 non-null     int64
 8   other_combi_mrt_lrt_or_bus  55 non-null     int64
 9   taxi_pvt_hire_car_only      55 non-null     int64
 10  pvt_chartered_bus_van       55 non-null     int64
dtypes: int64(11)
memory usage: 4.9 KB


In [8]:
work_transport_breakdown_by_type = working_transport_share_df.sum(axis = 0)

In [9]:
work_transport_breakdown_by_type

bus                           324083
car                           454872
lorry_pickup                   28739
motorcycle_scooter             73294
others                         34629
no_transport_required         210342
mrt_lrt_only                  284955
mrt_lrt_and_bus               556758
other_combi_mrt_lrt_or_bus     84679
taxi_pvt_hire_car_only         63988
pvt_chartered_bus_van          44726
dtype: int64

In [10]:
transport_types = list(work_transport_breakdown_by_type.index)
transport_breakdown_total = [work_transport_breakdown_by_type[a] + school_transport_breakdown_by_type[a] for a in transport_types]
transport_breakdown_total = pd.concat([pd.Series(transport_types), pd.Series(transport_breakdown_total)], axis = 1,keys = ["Transport_Type", "Total"])
transport_breakdown_total["proportion"] = (transport_breakdown_total["Total"]/transport_breakdown_total["Total"].sum()) *100
transport_breakdown_total = transport_breakdown_total.sort_values(by = "proportion", ascending = False)
transport_breakdown_total["Transport_Type"] = [
    "MRT,LRT & Public Bus Only",
    "Car Only",
    "Public Bus Only",
    "No Transport Required",
    "MRT & LRT Only",
    "Combination of MRT,LRT,Bus & Others",
    "Private Chartered Bus/Van Only",
    "Motorcycle/Scooters Only",
    "Taxi/Private Hired Car Only",
    "Others(Bicycle,Walking,RideSharing etc)",
    "Lorry/Pickup Truck Only"
]

In [31]:
colors = transport_breakdown_total['Transport_Type'].apply(
    lambda x: "LightGreen" if x in ["Combination of MRT,LRT,Bus & Others", "Others(Bicycle,Walking,RideSharing etc)"] else "LightBlue"
)

colors = list(colors)
colors

['LightBlue',
 'LightBlue',
 'LightBlue',
 'LightBlue',
 'LightBlue',
 'LightGreen',
 'LightBlue',
 'LightBlue',
 'LightBlue',
 'LightGreen',
 'LightBlue']

In [32]:
fig = px.bar(transport_breakdown_total,
             y = "Transport_Type", 
             x = "proportion", 
             title = "Proportion of Transport Type in Singapore", 
             labels = {"proportion":"Proportion of Transport Type (%)", "Transport_Type":"Type of Transport"}
             )

fig.update_layout(
    title = "Transportation Types in Singapore(By Proportion)",
    title_x = 0.5,
    title_font=dict(family= "Calibri"),
    showlegend = False,
    xaxis = dict(           # attributes for x axis 
        showline = True,
        showgrid = True,
        linecolor = 'black',
        tickfont = dict(
            family = 'Calibri'
        )
    ),
    yaxis = dict(           # attributes for y axis 
        showline = False,
        showgrid = True,
        linecolor = 'black',
        tickfont = dict(
            family = 'Calibri'
        ),
        categoryorder='total ascending',
        tickmode = "linear",
        dtick = 1.4
    ),
    plot_bgcolor = 'white'
)

fig.update_traces(marker_color = colors,
                  hovertemplate=
                  'Type of Transport: %{y}<br>Proportion of Transport Type (%): %{x:.3f}<extra></extra>')
# fig.update_layout(title_x = 0.5)
# fig.update_yaxes(categoryorder='total ascending',showgrid = False)
# fig.update_layout(showlegend=False)
 
fig.show()