In [1]:
import pandas as pd
import numpy as np
import re
from datetime import datetime

# Reading the form responses
data = pd.read_csv('form_responses.csv')

In [2]:
# Cleaning column names

# General Data
data.rename(columns={'Timestamp' : 'timestamp',
                     'What is your role at the school?': 'role',
                     'How frequently do you use the public transport system (ISB) on campus? ': 'frequency_of_travel',
                     'What is your primary purpose for using the ISB on campus?' : 'primary_purpose',
                     'Which days of the week do you use the ISB?' : 'travel_days',
                     'At what times of the day do you travel using the ISB? (Please only choose the hours you would use the ISB)': 'travel_hours'
                     }, inplace=True)

# Trip 1
data.rename(columns={'ISB Service used': 'ISB_Service',
                     'Where do you board the bus?': 'bus_stop_board',
                     'Where do you alight?' : 'bus_stop_alight',
                     'What day of the week was this trip made?' : 'day_of_the_week',
                     'What time do you typically start your journey?': 'time_start',
                     'What is your typical travel duration using the ISB?': 'travel_duration',
                     'Choose the column that best describes your satisfaction for each of the following.  [Frequency of buses]': 'frequency',
                     'Choose the column that best describes your satisfaction for each of the following.  [Punctuality of buses]': 'punctuality',
                     'Choose the column that best describes your satisfaction for each of the following.  [Cleanliness of buses]': 'cleanliness',
                     'Choose the column that best describes your satisfaction for each of the following.  [Safety on the buses]': 'safety',
                     'Choose the column that best describes your satisfaction for each of the following.  [Bus route coverage]': 'coverage',
                     'How crowded are the buses usually at this timing?': 'crowdedness'
                     }, inplace=True)

columns_to_suffix = range(6,18)
data = data.rename(columns={data.columns[i]: data.columns[i] + '_trip_1' for i in columns_to_suffix})

# Trip 2
data.rename(columns={'ISB Service used.1': 'ISB_Service',
                     'Where do you board the bus?.1': 'bus_stop_board',
                     'Where do you alight?.1' : 'bus_stop_alight',
                     'What day of the week was this trip made?.1' : 'day_of_the_week',
                     'What time do you typically start your journey?.1': 'time_start',
                     'What is your typical travel duration using the ISB?.1': 'travel_duration',
                     'Choose the column that best describes your satisfaction for each of the following.  [Frequency of buses].1': 'frequency',
                     'Choose the column that best describes your satisfaction for each of the following.  [Punctuality of buses].1': 'punctuality',
                     'Choose the column that best describes your satisfaction for each of the following.  [Cleanliness of buses].1': 'cleanliness',
                     'Choose the column that best describes your satisfaction for each of the following.  [Safety on the buses].1': 'safety',
                     'Choose the column that best describes your satisfaction for each of the following.  [Bus route coverage].1': 'coverage',
                     'How crowded are the buses usually at this timing?.1': 'crowdedness'
                     }, inplace=True)

columns_to_suffix = range(18,30)
data = data.rename(columns={data.columns[i]: data.columns[i] + '_trip_2' for i in columns_to_suffix})

# Trip 3
data.rename(columns={'ISB Service used.2': 'ISB_Service',
                     'Where do you board the bus?.2': 'bus_stop_board',
                     'Where do you alight?.2' : 'bus_stop_alight',
                     'What day of the week was this trip made?.2' : 'day_of_the_week',
                     'What time do you typically start your journey?.2': 'time_start',
                     'What is your typical travel duration using the ISB?.2': 'travel_duration',
                     'Choose the column that best describes your satisfaction for each of the following.  [Frequency of buses].2': 'frequency',
                     'Choose the column that best describes your satisfaction for each of the following.  [Punctuality of buses].2': 'punctuality',
                     'Choose the column that best describes your satisfaction for each of the following.  [Cleanliness of buses].2': 'cleanliness',
                     'Choose the column that best describes your satisfaction for each of the following.  [Safety on the buses].2': 'safety',
                     'Choose the column that best describes your satisfaction for each of the following.  [Bus route coverage].2': 'coverage',
                     'How crowded are the buses usually at this timing?.2': 'crowdedness'
                     }, inplace=True)

columns_to_suffix = range(30,42)
data = data.rename(columns={data.columns[i]: data.columns[i] + '_trip_3' for i in columns_to_suffix})

# Preferences & Pain Points
data.rename(columns={'What influences your usage of the ISB over other forms of transportation? Rank each factor from 1st to 5th, 1st being the most important and 5th being the least important. (Please only choose one option for each column) [Convenience]': 'usage_influence_convenience',
                     'What influences your usage of the ISB over other forms of transportation? Rank each factor from 1st to 5th, 1st being the most important and 5th being the least important. (Please only choose one option for each column) [Cost]': 'usage_influence_cost',
                     'What influences your usage of the ISB over other forms of transportation? Rank each factor from 1st to 5th, 1st being the most important and 5th being the least important. (Please only choose one option for each column) [Lack of other transportation options]' : 'usage_influence_lack_of_options',
                     'What influences your usage of the ISB over other forms of transportation? Rank each factor from 1st to 5th, 1st being the most important and 5th being the least important. (Please only choose one option for each column) [Availability of parking]' : 'usage_influence_availability_of_parking',
                     'What influences your usage of the ISB over other forms of transportation? Rank each factor from 1st to 5th, 1st being the most important and 5th being the least important. (Please only choose one option for each column) [Environmental Concerns]' : 'usage_influence_environmental',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Frequency of buses]' : 'prioritize_frequency',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Punctuality of buses]' : 'prioritize_punctuality',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Cleanliness of the buses]' : 'prioritize_cleanliness',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Safety of the buses]' : 'prioritize_safety',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Bus route coverage]' : 'prioritize_bus_route_coverage',
                     'Rank the factors you prioritize the most when choosing a bus route from 1st to 6th, 1st being the most important and 6th being the least important. (Please only choose one option for each column) [Crowdedness of the bus]': 'prioritize_crowdedness',
                     'What are your top 3 frustrations with the ISB service?' : 'top_3_frustrations',
                     'How often are you not able to get on the bus due to overcrowding?' : 'not_able_to_get_on',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [More frequent bus services]': 'additional_features_frequency',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [More Seats]' : 'additional_features_seats',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [Improved cleanliness]' : 'additional_features_cleanliness',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [More comfortable seating]' : 'additional_features_comfortable',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [Better route coverage]' : 'additional_features_route_coverage',
                     'What additional features would make the ISB more appealing to you?  Rank each factor from 1st to 6th, 1st being the most appealing and 6th being the least appealing. (Please only choose one option for each column) [Real-time tracking and updates]' : 'additional_features_updates',
                     'Have you faced issues with the quality of information provided about bus services (eg. timing accuracy, route changes)?' : 'issues_with_quality_of_info',
                     'How well does the ISB accommodate special events (eg. Open House, exam season)?': 'special_events',
                     'Do you notice any seasonal changes in ISB quality and capacity?' : 'seasonal_changes',
                     'Specify the seasonal changes in service identified from the previous question. ' : 'seasonal_changes_specific',
                     'What changes would you like to see regarding the ISB system? (Enter NA if you do not wish to see any changes)': 'further_comments'
                    }, inplace=True)

pd.set_option('display.max_columns', None)
data

Unnamed: 0,timestamp,role,frequency_of_travel,primary_purpose,travel_days,travel_hours,ISB_Service_trip_1,bus_stop_board_trip_1,bus_stop_alight_trip_1,day_of_the_week_trip_1,time_start_trip_1,travel_duration_trip_1,frequency_trip_1,punctuality_trip_1,cleanliness_trip_1,safety_trip_1,coverage_trip_1,crowdedness_trip_1,ISB_Service_trip_2,bus_stop_board_trip_2,bus_stop_alight_trip_2,day_of_the_week_trip_2,time_start_trip_2,travel_duration_trip_2,frequency_trip_2,punctuality_trip_2,cleanliness_trip_2,safety_trip_2,coverage_trip_2,crowdedness_trip_2,ISB_Service_trip_3,bus_stop_board_trip_3,bus_stop_alight_trip_3,day_of_the_week_trip_3,time_start_trip_3,travel_duration_trip_3,frequency_trip_3,punctuality_trip_3,cleanliness_trip_3,safety_trip_3,coverage_trip_3,crowdedness_trip_3,usage_influence_convenience,usage_influence_cost,usage_influence_lack_of_options,usage_influence_availability_of_parking,usage_influence_environmental,prioritize_frequency,prioritize_punctuality,prioritize_cleanliness,prioritize_safety,prioritize_bus_route_coverage,prioritize_crowdedness,top_3_frustrations,not_able_to_get_on,additional_features_frequency,additional_features_seats,additional_features_cleanliness,additional_features_comfortable,additional_features_route_coverage,additional_features_updates,issues_with_quality_of_info,special_events,seasonal_changes,seasonal_changes_specific,further_comments
0,10/1/2024 22:17:11,Undergraduate student,Daily,Commute to classes,"Monday, Tuesday, Wednesday, Thursday, Friday","0900 - 1000, 1000 - 1100, 1100 - 1200, 1200 - ...",A2,KR MRT,Ventus,Monday,9:30:00 AM,15 - 20 minutes,Neutral,Neutral,Very Satisfied,Satisfied,Satisfied,3,D2,KR MRT,COM3,Friday,10:30:00 AM,5 - 10 minutes,Neutral,Neutral,Very Satisfied,Satisfied,Satisfied,4.0,D2,COM3,LT27,Tuesday,2:30:00 PM,10 - 15 minutes,Dissatisfied,Dissatisfied,Very Satisfied,Very Satisfied,Satisfied,3.0,1st,2nd,3rd,4th,5th,3rd,2nd,6th,5th,4th,1st,"Frequency of bus services, Capacity of the bus...",Occasionally,1st,4th,6th,5th,2nd,3rd,Yes,3,"No, service is consistent",,More frequent buses during exam periods
1,10/2/2024 1:03:02,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","0900 - 1000, 1100 - 1200, 1300 - 1400, 1500 - ...",D2,KR MRT,UTown,Monday,9:30:00 AM,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,4,A2,IT,Opp KR MRT,Thursday,5:30:00 PM,15 - 20 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,D2,UTown,Opp KR MRT,Wednesday,11:30:00 AM,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,1st,3rd,5th,4th,2nd,6th,4th,3rd,5th,1st,"Travel time too long, Frequency of bus service...",Frequently,3rd,1st,4th,5th,6th,2nd,No,4,"No, service is consistent",,Bigger bus size
2,10/2/2024 9:18:23,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","1700 - 1800, 1800 - 1900, 2100 - 2200, 2200 - ...",A2,KR MRT,S17,Thursday,5:50:00 AM,< 5 minutes,Very Satisfied,Satisfied,Satisfied,Satisfied,Very Satisfied,4,A1,S17,Opp KR MRT,Wednesday,8:55:00 AM,< 5 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,3.0,A1,KR MRT,S17,Wednesday,6:55:00 PM,< 5 minutes,Very Satisfied,Very Satisfied,Very Satisfied,Satisfied,Satisfied,4.0,2nd,1st,3rd,4th,5th,1st,6th,5th,4th,3rd,2nd,"Capacity of the bus (Overcrowding), Proximity ...",Occasionally,1st,3rd,6th,4th,3rd,2nd,No,3,"Yes, service improves/worsens (please specify ...",more in exams,na
3,10/2/2024 13:27:16,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Thursday, Friday, Saturday / Sunday","1100 - 1200, 1200 - 1300, 1800 - 1900, 2100 - ...",A1,PGP Terminal,S17,Friday,11:30:00 AM,10 - 15 minutes,Very Satisfied,Very Satisfied,Very Satisfied,Neutral,Very Satisfied,4,D2,PGP Foyer,COM3,Tuesday,2:00:00 PM,15 - 20 minutes,Neutral,Very Satisfied,Very Satisfied,Neutral,Satisfied,3.0,A2,PGP Foyer,Ventus,Monday,11:30:00 AM,15 - 20 minutes,Very dissatisfied,Very Satisfied,Very Satisfied,Neutral,Neutral,5.0,1st,3rd,2nd,5th,4th,1st,4th,5th,6th,3rd,2nd,"Travel time too long, Frequency of bus service...",Occasionally,1st,2nd,6th,5th,3rd,4th,No,3,"No, service is consistent",,More frequent buses to avoid overcrowding
4,10/2/2024 13:58:50,Undergraduate student,1 - 2 days a week,Commute to classes,"Tuesday, Thursday, Friday","0700 - 0800, 1000 - 1100, 1100 - 1200, 1200 - ...",A1,KR MRT,LT27,Tuesday,7:50:00 AM,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5,A2,S17,Opp KR MRT,Thursday,1:05:00 PM,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,3.0,D2,KR MRT,LT27,Friday,7:50:00 AM,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,3rd,1st,5th,4th,2nd,3rd,6th,5th,1st,4th,Capacity of the bus (Overcrowding),Frequently,1st,2nd,6th,5th,3rd,4th,No,1,"Yes, service improves/worsens (please specify ...",worsens during semester break,better capacity management
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,10/18/2024 19:53:24,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Friday","0900 - 1000, 1100 - 1200, 1200 - 1300, 1400 - ...",D1,UTown,BIZ2,Friday,11:40:00 AM,5 - 10 minutes,Satisfied,Very Satisfied,Very Satisfied,Very Satisfied,Very Satisfied,5,A2,Opp KR MRT,Opp HSSML,Monday,9:40:00 AM,10 - 15 minutes,Satisfied,Very Satisfied,Very Satisfied,Very Satisfied,Very Satisfied,3.0,A1,BIZ2,LT27,Monday,11:40:00 AM,10 - 15 minutes,Neutral,Satisfied,Very Satisfied,Very Satisfied,Very Satisfied,5.0,1st,3rd,2nd,5th,4th,1st,2nd,4th,3rd,5th,6th,"Frequency of bus services, Capacity of the bus...",Occasionally,1st,4th,5th,6th,2nd,3rd,No,4,"No, service is consistent",,
73,10/18/2024 22:12:59,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Tuesday","0700 - 0800, 1100 - 1200, 1300 - 1400, 1700 - ...",A2,Opp KR MRT,Opp NUSS,Tuesday,11:45:00 AM,10 - 15 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Neutral,4,,,,,,,,,,,,,,,,,,,,,,,,,1st,2nd,3rd,4th,5th,1st,3rd,6th,5th,2nd,4th,"Availability of seats, Frequency of bus servic...",Occasionally,1st,4th,5th,6th,2nd,3rd,No,3,"Yes, service improves/worsens (please specify ...",lesser buses during break,increase the frequency of buses
74,10/18/2024 22:20:22,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Tuesday, Thursday, Friday","0800 - 0900, 0900 - 1000, 1100 - 1200, 1500 - ...",A1,KR MRT,CLB,Monday,8:45:00 AM,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Dissatisfied,2,K,KR MRT,CLB,Tuesday,8:45:00 AM,10 - 15 minutes,Dissatisfied,Satisfied,Satisfied,Neutral,Dissatisfied,2.0,,,,,,,,,,,,,2nd,3rd,1st,5th,4th,1st,4th,6th,5th,3rd,2nd,"Frequency of bus services, Capacity of the bus...",Rarely,1st,4th,6th,5th,2nd,3rd,No,4,"No, service is consistent",,
75,10/18/2024 22:30:15,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Tuesday, Thursday, Friday","0800 - 0900, 1100 - 1200, 1300 - 1400, 1500 - ...",A2,Opp KR MRT,Ventus,Thursday,11:30:00 AM,10 - 15 minutes,Dissatisfied,Satisfied,Satisfied,Satisfied,Dissatisfied,5,,,,,,,,,,,,,,,,,,,,,,,,,2nd,3rd,1st,5th,4th,2nd,4th,6th,5th,3rd,1st,"Availability of seats, Capacity of the bus (Ov...",Rarely,2nd,4th,6th,5th,1st,3rd,No,4,"No, service is consistent",,Have a bus that goes from kr to closer to sde


In [3]:
# Cleaning timings out of range

start_time = pd.to_datetime('07:00:00 AM', format='%I:%M:%S %p').time()
end_time = pd.to_datetime('11:00:00 PM', format='%I:%M:%S %p').time()

def adjust_time_in_range(time):
    if pd.isnull(time):
        return None
    elif not (start_time <= time.time() <= end_time):
        # Swap AM/PM to bring time in the desired range
        adjusted_time = (time + pd.Timedelta(hours=12)) if time.time() < start_time else (time - pd.Timedelta(hours=12))
        return adjusted_time.time()
    else:
        return time.time()

# Convert strings into datetime objects
for i in ['time_start_trip_1', 'time_start_trip_2', 'time_start_trip_3']:
    data[i] = pd.to_datetime(data[i], format='%I:%M:%S %p')
    data[i] = data[i].apply(adjust_time_in_range)


In [4]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
data

Unnamed: 0,timestamp,role,frequency_of_travel,primary_purpose,travel_days,travel_hours,ISB_Service_trip_1,bus_stop_board_trip_1,bus_stop_alight_trip_1,day_of_the_week_trip_1,time_start_trip_1,travel_duration_trip_1,frequency_trip_1,punctuality_trip_1,cleanliness_trip_1,safety_trip_1,coverage_trip_1,crowdedness_trip_1,ISB_Service_trip_2,bus_stop_board_trip_2,bus_stop_alight_trip_2,day_of_the_week_trip_2,time_start_trip_2,travel_duration_trip_2,frequency_trip_2,punctuality_trip_2,cleanliness_trip_2,safety_trip_2,coverage_trip_2,crowdedness_trip_2,ISB_Service_trip_3,bus_stop_board_trip_3,bus_stop_alight_trip_3,day_of_the_week_trip_3,time_start_trip_3,travel_duration_trip_3,frequency_trip_3,punctuality_trip_3,cleanliness_trip_3,safety_trip_3,coverage_trip_3,crowdedness_trip_3,usage_influence_convenience,usage_influence_cost,usage_influence_lack_of_options,usage_influence_availability_of_parking,usage_influence_environmental,prioritize_frequency,prioritize_punctuality,prioritize_cleanliness,prioritize_safety,prioritize_bus_route_coverage,prioritize_crowdedness,top_3_frustrations,not_able_to_get_on,additional_features_frequency,additional_features_seats,additional_features_cleanliness,additional_features_comfortable,additional_features_route_coverage,additional_features_updates,issues_with_quality_of_info,special_events,seasonal_changes,seasonal_changes_specific,further_comments
0,10/1/2024 22:17:11,Undergraduate student,Daily,Commute to classes,"Monday, Tuesday, Wednesday, Thursday, Friday","0900 - 1000, 1000 - 1100, 1100 - 1200, 1200 - ...",A2,KR MRT,Ventus,Monday,09:30:00,15 - 20 minutes,Neutral,Neutral,Very Satisfied,Satisfied,Satisfied,3,D2,KR MRT,COM3,Friday,10:30:00,5 - 10 minutes,Neutral,Neutral,Very Satisfied,Satisfied,Satisfied,4.0,D2,COM3,LT27,Tuesday,14:30:00,10 - 15 minutes,Dissatisfied,Dissatisfied,Very Satisfied,Very Satisfied,Satisfied,3.0,1st,2nd,3rd,4th,5th,3rd,2nd,6th,5th,4th,1st,"Frequency of bus services, Capacity of the bus...",Occasionally,1st,4th,6th,5th,2nd,3rd,Yes,3,"No, service is consistent",,More frequent buses during exam periods
1,10/2/2024 1:03:02,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","0900 - 1000, 1100 - 1200, 1300 - 1400, 1500 - ...",D2,KR MRT,UTown,Monday,09:30:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,4,A2,IT,Opp KR MRT,Thursday,17:30:00,15 - 20 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,D2,UTown,Opp KR MRT,Wednesday,11:30:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,1st,3rd,5th,4th,2nd,6th,4th,3rd,5th,1st,"Travel time too long, Frequency of bus service...",Frequently,3rd,1st,4th,5th,6th,2nd,No,4,"No, service is consistent",,Bigger bus size
2,10/2/2024 9:18:23,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","1700 - 1800, 1800 - 1900, 2100 - 2200, 2200 - ...",A2,KR MRT,S17,Thursday,17:50:00,< 5 minutes,Very Satisfied,Satisfied,Satisfied,Satisfied,Very Satisfied,4,A1,S17,Opp KR MRT,Wednesday,08:55:00,< 5 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,3.0,A1,KR MRT,S17,Wednesday,18:55:00,< 5 minutes,Very Satisfied,Very Satisfied,Very Satisfied,Satisfied,Satisfied,4.0,2nd,1st,3rd,4th,5th,1st,6th,5th,4th,3rd,2nd,"Capacity of the bus (Overcrowding), Proximity ...",Occasionally,1st,3rd,6th,4th,3rd,2nd,No,3,"Yes, service improves/worsens (please specify ...",more in exams,na
3,10/2/2024 13:27:16,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Thursday, Friday, Saturday / Sunday","1100 - 1200, 1200 - 1300, 1800 - 1900, 2100 - ...",A1,PGP Terminal,S17,Friday,11:30:00,10 - 15 minutes,Very Satisfied,Very Satisfied,Very Satisfied,Neutral,Very Satisfied,4,D2,PGP Foyer,COM3,Tuesday,14:00:00,15 - 20 minutes,Neutral,Very Satisfied,Very Satisfied,Neutral,Satisfied,3.0,A2,PGP Foyer,Ventus,Monday,11:30:00,15 - 20 minutes,Very dissatisfied,Very Satisfied,Very Satisfied,Neutral,Neutral,5.0,1st,3rd,2nd,5th,4th,1st,4th,5th,6th,3rd,2nd,"Travel time too long, Frequency of bus service...",Occasionally,1st,2nd,6th,5th,3rd,4th,No,3,"No, service is consistent",,More frequent buses to avoid overcrowding
4,10/2/2024 13:58:50,Undergraduate student,1 - 2 days a week,Commute to classes,"Tuesday, Thursday, Friday","0700 - 0800, 1000 - 1100, 1100 - 1200, 1200 - ...",A1,KR MRT,LT27,Tuesday,07:50:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5,A2,S17,Opp KR MRT,Thursday,13:05:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,3.0,D2,KR MRT,LT27,Friday,07:50:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,3rd,1st,5th,4th,2nd,3rd,6th,5th,1st,4th,Capacity of the bus (Overcrowding),Frequently,1st,2nd,6th,5th,3rd,4th,No,1,"Yes, service improves/worsens (please specify ...",worsens during semester break,better capacity management
5,10/2/2024 14:07:58,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Wednesday, Friday","1400 - 1500, 1600 - 1700, 1700 - 1800, 2000 - ...",D2,UTown,Opp KR MRT,Friday,17:00:00,10 - 15 minutes,Neutral,Satisfied,Very Satisfied,Satisfied,Satisfied,4,A1,CLB,S17,Monday,14:30:00,10 - 15 minutes,Satisfied,Satisfied,Very Satisfied,Very Satisfied,Satisfied,1.0,K,S17,Opp KR MRT,Monday,20:15:00,< 5 minutes,Neutral,Satisfied,Very Satisfied,Satisfied,Satisfied,5.0,1st,2nd,3rd,5th,4th,1st,3rd,6th,5th,4th,2nd,"Availability of seats, Capacity of the bus (Ov...",Occasionally,2nd,1st,6th,5th,3rd,4th,No,3,"No, service is consistent",,More frequent buses
6,10/2/2024 14:18:47,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Friday","1100 - 1200, 1400 - 1500, 1700 - 1800",A2,Opp KR MRT,Opp HSSML,Friday,11:40:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Neutral,Satisfied,4,A2,Opp KR MRT,Opp HSSML,Friday,14:37:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Neutral,Satisfied,4.0,A2,BIZ2,KR MRT,Monday,11:30:00,15 - 20 minutes,Neutral,Neutral,Satisfied,Neutral,Satisfied,5.0,2nd,1st,3rd,5th,5th,1st,2nd,6th,3rd,5th,4th,"Safety, Capacity of the bus (Overcrowding), Lo...",Occasionally,1st,2nd,6th,5th,4th,3rd,No,3,"No, service is consistent",-,
7,10/2/2024 20:03:11,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday, Friday","1400 - 1500, 2000 - 2100, 2100 - 2200, 2200 - ...",A1,BIZ2,KR MRT,"Wednesday, Thursday, Friday",20:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,2,D1,Opp HSSML,UTown,Wednesday,14:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,3.0,D1,UTown,BIZ2,Wednesday,18:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,3.0,1st,2nd,3rd,4th,5th,3rd,4th,6th,5th,2nd,1st,"Frequency of bus services, Capacity of the bus...",Frequently,1st,4th,6th,5th,2nd,3rd,Yes,3,"No, service is consistent",,"MORE BUSES, it is always so crowded and squeez..."
8,10/2/2024 23:02:50,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Tuesday","1100 - 1200, 1200 - 1300, 1700 - 1800, 1800 - ...",A2,Opp KR MRT,Opp HSSML,Monday,11:50:00,5 - 10 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Satisfied,5,A1,BIZ2,KR MRT,Tuesday,17:45:00,10 - 15 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Satisfied,5.0,,,,,,,,,,,,,2nd,1st,3rd,5th,4th,3rd,4th,5th,6th,1st,2nd,"Frequency of bus services, Capacity of the bus...",Frequently,1st,2nd,6th,5th,3rd,4th,No,2,"No, service is consistent",,more buses
9,10/2/2024 23:15:36,Undergraduate student,Daily,Commute to classes,"Monday, Tuesday, Wednesday, Thursday, Friday","0800 - 0900, 0900 - 1000, 1000 - 1100, 1100 - ...",A1,KR MRT,CLB,Thursday,09:40:00,20 - 30 minutes,Dissatisfied,Satisfied,Satisfied,Neutral,Dissatisfied,5,D1,IT,UTown,Friday,11:40:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Satisfied,Neutral,4.0,A1,KR MRT,LT27,Tuesday,09:40:00,20 - 30 minutes,Dissatisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,3rd,1st,5th,4th,3rd,4th,6th,5th,1st,2nd,"Frequency of bus services, Capacity of the bus...",Frequently,1st,4th,6th,5th,2nd,3rd,Yes,3,"Yes, service improves/worsens (please specify ...",increased frequency of buses lately,more buses and higher frequency especially dur...


In [5]:
#for each column, replace any na found with a random non-null value from the column

for column in data.columns:
    non_null_value = data[column].dropna().values
    data[column] = data[column].apply(lambda x: np.random.choice(non_null_value) if pd.isnull(x) else x)

print(data)

              timestamp                   role    frequency_of_travel  \
0    10/1/2024 22:17:11  Undergraduate student                  Daily   
1     10/2/2024 1:03:02  Undergraduate student      3 - 4 days a week   
2     10/2/2024 9:18:23  Undergraduate student      3 - 4 days a week   
3    10/2/2024 13:27:16  Undergraduate student      3 - 4 days a week   
4    10/2/2024 13:58:50  Undergraduate student      1 - 2 days a week   
5    10/2/2024 14:07:58  Undergraduate student      1 - 2 days a week   
6    10/2/2024 14:18:47  Undergraduate student      1 - 2 days a week   
7    10/2/2024 20:03:11  Undergraduate student      3 - 4 days a week   
8    10/2/2024 23:02:50  Undergraduate student      1 - 2 days a week   
9    10/2/2024 23:15:36  Undergraduate student                  Daily   
10   10/2/2024 23:29:41  Undergraduate student      1 - 2 days a week   
11   10/2/2024 23:33:14  Undergraduate student      1 - 2 days a week   
12   10/2/2024 23:49:13  Undergraduate student     

In [6]:
#to confirm no null data
rows_with_nan = data[data.isnull().any(axis=1)]
columns_with_nan = data.loc[:, data.isnull().any(axis=0)]
print(columns_with_nan)
print(rows_with_nan)

Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76]
Empty DataFrame
Columns: [timestamp, role, frequency_of_travel, primary_purpose, travel_days, travel_hours, ISB_Service_trip_1, bus_stop_board_trip_1, bus_stop_alight_trip_1, day_of_the_week_trip_1, time_start_trip_1, travel_duration_trip_1, frequency_trip_1, punctuality_trip_1, cleanliness_trip_1, safety_trip_1, coverage_trip_1, crowdedness_trip_1, ISB_Service_trip_2, bus_stop_board_trip_2, bus_stop_alight_trip_2, day_of_the_week_trip_2, time_start_trip_2, travel_duration_trip_2, frequency_trip_2, punctuality_trip_2, cleanliness_trip_2, safety_trip_2, coverage_trip_2, crowdedness_trip_2, ISB_Service_trip_3, bus_stop_board_trip_3, bus_stop_alight_trip_3, day

In [7]:
A1_bus = ['KR Bus Terminal', 'LT13', 'AS5', 'BIZ2', 'Opp TCOMS', 'PGP Terminal', 'KR MRT', 'LT27', 'University Hall', 'Opp UHC', 'YIH', 'CLB', 'KR Bus Terminal']
A2_bus = ['KR Bus Terminal', 'IT', 'Opp YIH', 'Museum', 'UHC', 'Opp University Hall', 'S17', 'Opp KR MRT', 'PGP Foyer', 'TCOMS', 'Opp HSSML', 'Opp NUSS', 'Ventus', 'KR Bus Terminal']
D1_bus = ['COM3', 'Opp HSSML', 'Opp NUSS', 'Ventus', 'IT', 'Opp YIH', 'Museum', 'UTown', 'YIH', 'CLB', 'LT13', 'AS5', 'BIZ2', 'COM3']
D2_bus = ['COM3', 'Opp TCOMS', 'PGP Terminal', 'KR MRT', 'LT27', 'University Hall', 'Opp UHC', 'Museum', 'UTown', 'UHC', 'Opp University Hall', 'S17', 'Opp KR MRT', 'PGP Foyer', 'TCOMS', 'COM3']
BTC_bus = ['Oei Tiong Ham Building (BTC)', 'Botanic Gardens MRT (BTC)', 'KR MRT', 'LT27', 'University Hall', 'Opp UHC', 'UTown', 'Raffles Hall', 'Kent Vale', 'Museum', 'YIH', 'CLB', 'LT13', 'AS5', 'BIZ2', 'PGP Terminal', 'College Green (BTC)', 'Oei Tiong Ham Building (BTC)']
E_bus = ['UTown', 'Raffles Hall', 'Kent Vale', 'EA', 'SDE3', 'IT', 'Opp YIH', 'UTown']
K_bus = ['PGP Terminal', 'KR MRT', 'LT27', 'University Hall', 'Opp UHC', 'YIH', 'CLB', 'Opp SDE3', 'The Japanese Primary School', 'Kent Vale', 'Museum', 'UHC', 'Opp University Hall', 'S17', 'Opp KR MRT', 'PGP Foyer']
L_bus = ['Oei Tiong Ham Building (BTC)', 'Botanic Gardents MRT (BTC)', 'College Green (BTC)', 'Oei Tiong Ham Building (BTC)']

bus_routes = {'A1':A1_bus, 'A2':A2_bus, 'D1':D1_bus, 'D2':D2_bus, 'BTC (Bukit Timah Campus)':BTC_bus, 'E':E_bus, 'K':K_bus, 'L':L_bus}

# changing bus_stop_board and bus_stop_alight to "error" if there is error in bus route
def validate_bus_stops(data, bus_routes):
    for i in range(1, 3):  # Loop through each trip
        service_col = f'ISB_Service_trip_{i}'
        board_col = f'bus_stop_board_trip_{i}'
        alight_col = f'bus_stop_alight_trip_{i}'

        def check_stops(row):
            bus = row[service_col]
            start = row[board_col]
            end = row[alight_col]

            # Check if bus route exists in the dictionary
            if bus in bus_routes:
                route = bus_routes[bus]

                # Check both stops are in the route and start is before end
                if start in route and end in route:
                    start_index = route.index(start)
                    # Handle looping
                    end_index = route.index(end) if end != route[0] else len(route)

                    if start_index < end_index:
                        return row[board_col], row[alight_col]  # No change if valid
            return "error", "error"
        data[[board_col, alight_col]] = data.apply(lambda row: check_stops(row), axis=1, result_type="expand")

    return data


# if the bus route is invalid, set the rest of the trip's value to NaN as it would be irrelevant
def set_nan_on_error(data):
    for i in range(1, 4):
        service_col = f'ISB_Service_trip_{i}'
        board_col = f'bus_stop_board_trip_{i}'
        alight_col = f'bus_stop_alight_trip_{i}'
        columns_to_nan = [service_col, board_col, alight_col,
                          f'day_of_the_week_trip_{i}',
                          f'time_start_trip_{i}',
                          f'travel_duration_trip_{i}',
                          f'frequency_trip_{i}',
                          f'punctuality_trip_{i}',
                          f'cleanliness_trip_{i}',
                          f'safety_trip_{i}',
                          f'coverage_trip_{i}',
                          f'crowdedness_trip_{i}']
        data.loc[data[board_col] == "error", columns_to_nan] = np.nan

    return data




validate_bus_stops(data, bus_routes)
set_nan_on_error(data)
data

Unnamed: 0,timestamp,role,frequency_of_travel,primary_purpose,travel_days,travel_hours,ISB_Service_trip_1,bus_stop_board_trip_1,bus_stop_alight_trip_1,day_of_the_week_trip_1,time_start_trip_1,travel_duration_trip_1,frequency_trip_1,punctuality_trip_1,cleanliness_trip_1,safety_trip_1,coverage_trip_1,crowdedness_trip_1,ISB_Service_trip_2,bus_stop_board_trip_2,bus_stop_alight_trip_2,day_of_the_week_trip_2,time_start_trip_2,travel_duration_trip_2,frequency_trip_2,punctuality_trip_2,cleanliness_trip_2,safety_trip_2,coverage_trip_2,crowdedness_trip_2,ISB_Service_trip_3,bus_stop_board_trip_3,bus_stop_alight_trip_3,day_of_the_week_trip_3,time_start_trip_3,travel_duration_trip_3,frequency_trip_3,punctuality_trip_3,cleanliness_trip_3,safety_trip_3,coverage_trip_3,crowdedness_trip_3,usage_influence_convenience,usage_influence_cost,usage_influence_lack_of_options,usage_influence_availability_of_parking,usage_influence_environmental,prioritize_frequency,prioritize_punctuality,prioritize_cleanliness,prioritize_safety,prioritize_bus_route_coverage,prioritize_crowdedness,top_3_frustrations,not_able_to_get_on,additional_features_frequency,additional_features_seats,additional_features_cleanliness,additional_features_comfortable,additional_features_route_coverage,additional_features_updates,issues_with_quality_of_info,special_events,seasonal_changes,seasonal_changes_specific,further_comments
0,10/1/2024 22:17:11,Undergraduate student,Daily,Commute to classes,"Monday, Tuesday, Wednesday, Thursday, Friday","0900 - 1000, 1000 - 1100, 1100 - 1200, 1200 - ...",,,,,,,,,,,,,D2,KR MRT,COM3,Friday,10:30:00,5 - 10 minutes,Neutral,Neutral,Very Satisfied,Satisfied,Satisfied,4.0,D2,COM3,LT27,Tuesday,14:30:00,10 - 15 minutes,Dissatisfied,Dissatisfied,Very Satisfied,Very Satisfied,Satisfied,3.0,1st,2nd,3rd,4th,5th,3rd,2nd,6th,5th,4th,1st,"Frequency of bus services, Capacity of the bus...",Occasionally,1st,4th,6th,5th,2nd,3rd,Yes,3,"No, service is consistent","During exam periods, more students tend to boa...",More frequent buses during exam periods
1,10/2/2024 1:03:02,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","0900 - 1000, 1100 - 1200, 1300 - 1400, 1500 - ...",D2,KR MRT,UTown,Monday,09:30:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,4.0,A2,IT,Opp KR MRT,Thursday,17:30:00,15 - 20 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,D2,UTown,Opp KR MRT,Wednesday,11:30:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,1st,3rd,5th,4th,2nd,6th,4th,3rd,5th,1st,"Travel time too long, Frequency of bus service...",Frequently,3rd,1st,4th,5th,6th,2nd,No,4,"No, service is consistent",more in exams,Bigger bus size
2,10/2/2024 9:18:23,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday","1700 - 1800, 1800 - 1900, 2100 - 2200, 2200 - ...",,,,,,,,,,,,,,,,,,,,,,,,,A1,KR MRT,S17,Wednesday,18:55:00,< 5 minutes,Very Satisfied,Very Satisfied,Very Satisfied,Satisfied,Satisfied,4.0,2nd,1st,3rd,4th,5th,1st,6th,5th,4th,3rd,2nd,"Capacity of the bus (Overcrowding), Proximity ...",Occasionally,1st,3rd,6th,4th,3rd,2nd,No,3,"Yes, service improves/worsens (please specify ...",more in exams,na
3,10/2/2024 13:27:16,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Thursday, Friday, Saturday / Sunday","1100 - 1200, 1200 - 1300, 1800 - 1900, 2100 - ...",,,,,,,,,,,,,D2,PGP Foyer,COM3,Tuesday,14:00:00,15 - 20 minutes,Neutral,Very Satisfied,Very Satisfied,Neutral,Satisfied,3.0,A2,PGP Foyer,Ventus,Monday,11:30:00,15 - 20 minutes,Very dissatisfied,Very Satisfied,Very Satisfied,Neutral,Neutral,5.0,1st,3rd,2nd,5th,4th,1st,4th,5th,6th,3rd,2nd,"Travel time too long, Frequency of bus service...",Occasionally,1st,2nd,6th,5th,3rd,4th,No,3,"No, service is consistent",During peak periods (mid term season in Week 7...,More frequent buses to avoid overcrowding
4,10/2/2024 13:58:50,Undergraduate student,1 - 2 days a week,Commute to classes,"Tuesday, Thursday, Friday","0700 - 0800, 1000 - 1100, 1100 - 1200, 1200 - ...",A1,KR MRT,LT27,Tuesday,07:50:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5.0,A2,S17,Opp KR MRT,Thursday,13:05:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,3.0,D2,KR MRT,LT27,Friday,07:50:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,3rd,1st,5th,4th,2nd,3rd,6th,5th,1st,4th,Capacity of the bus (Overcrowding),Frequently,1st,2nd,6th,5th,3rd,4th,No,1,"Yes, service improves/worsens (please specify ...",worsens during semester break,better capacity management
5,10/2/2024 14:07:58,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Wednesday, Friday","1400 - 1500, 1600 - 1700, 1700 - 1800, 2000 - ...",D2,UTown,Opp KR MRT,Friday,17:00:00,10 - 15 minutes,Neutral,Satisfied,Very Satisfied,Satisfied,Satisfied,4.0,,,,,,,,,,,,,K,S17,Opp KR MRT,Monday,20:15:00,< 5 minutes,Neutral,Satisfied,Very Satisfied,Satisfied,Satisfied,5.0,1st,2nd,3rd,5th,4th,1st,3rd,6th,5th,4th,2nd,"Availability of seats, Capacity of the bus (Ov...",Occasionally,2nd,1st,6th,5th,3rd,4th,No,3,"No, service is consistent",Midterm week has more buses,More frequent buses
6,10/2/2024 14:18:47,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Friday","1100 - 1200, 1400 - 1500, 1700 - 1800",A2,Opp KR MRT,Opp HSSML,Friday,11:40:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Neutral,Satisfied,4.0,A2,Opp KR MRT,Opp HSSML,Friday,14:37:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Neutral,Satisfied,4.0,A2,BIZ2,KR MRT,Monday,11:30:00,15 - 20 minutes,Neutral,Neutral,Satisfied,Neutral,Satisfied,5.0,2nd,1st,3rd,5th,5th,1st,2nd,6th,3rd,5th,4th,"Safety, Capacity of the bus (Overcrowding), Lo...",Occasionally,1st,2nd,6th,5th,4th,3rd,No,3,"No, service is consistent",-,"More buses, better/more consistent bus stagger..."
7,10/2/2024 20:03:11,Undergraduate student,3 - 4 days a week,Commute to classes,"Monday, Wednesday, Thursday, Friday","1400 - 1500, 2000 - 2100, 2100 - 2200, 2200 - ...",A1,BIZ2,KR MRT,"Wednesday, Thursday, Friday",20:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,2.0,D1,Opp HSSML,UTown,Wednesday,14:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,3.0,D1,UTown,BIZ2,Wednesday,18:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,3.0,1st,2nd,3rd,4th,5th,3rd,4th,6th,5th,2nd,1st,"Frequency of bus services, Capacity of the bus...",Frequently,1st,4th,6th,5th,2nd,3rd,Yes,3,"No, service is consistent",worsens during semester break,"MORE BUSES, it is always so crowded and squeez..."
8,10/2/2024 23:02:50,Undergraduate student,1 - 2 days a week,Commute to classes,"Monday, Tuesday","1100 - 1200, 1200 - 1300, 1700 - 1800, 1800 - ...",A2,Opp KR MRT,Opp HSSML,Monday,11:50:00,5 - 10 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Satisfied,5.0,A1,BIZ2,KR MRT,Tuesday,17:45:00,10 - 15 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Satisfied,5.0,D1,CLB,UTown,Monday,09:30:00,10 - 15 minutes,Dissatisfied,Neutral,Very Satisfied,Satisfied,Satisfied,5.0,2nd,1st,3rd,5th,4th,3rd,4th,5th,6th,1st,2nd,"Frequency of bus services, Capacity of the bus...",Frequently,1st,2nd,6th,5th,3rd,4th,No,2,"No, service is consistent",During peak periods (mid term season in Week 7...,more buses
9,10/2/2024 23:15:36,Undergraduate student,Daily,Commute to classes,"Monday, Tuesday, Wednesday, Thursday, Friday","0800 - 0900, 0900 - 1000, 1000 - 1100, 1100 - ...",A1,KR MRT,CLB,Thursday,09:40:00,20 - 30 minutes,Dissatisfied,Satisfied,Satisfied,Neutral,Dissatisfied,5.0,D1,IT,UTown,Friday,11:40:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Satisfied,Neutral,4.0,A1,KR MRT,LT27,Tuesday,09:40:00,20 - 30 minutes,Dissatisfied,Satisfied,Satisfied,Satisfied,Satisfied,5.0,2nd,3rd,1st,5th,4th,3rd,4th,6th,5th,1st,2nd,"Frequency of bus services, Capacity of the bus...",Frequently,1st,4th,6th,5th,2nd,3rd,Yes,3,"Yes, service improves/worsens (please specify ...",increased frequency of buses lately,more buses and higher frequency especially dur...


Creating data for only the routes

In [9]:
trips = ['trip_1', 'trip_2', 'trip_3']
trip_dataframes = []

for trip in trips:
    trip_df = data[[f'ISB_Service_{trip}',
                     f'bus_stop_board_{trip}',
                     f'bus_stop_alight_{trip}',
                     f'time_start_{trip}',
                     f'travel_duration_{trip}',
                     f'frequency_{trip}',
                     f'punctuality_{trip}',
                     f'cleanliness_{trip}',
                     f'safety_{trip}',
                     f'coverage_{trip}',
                     f'crowdedness_{trip}']]

    trip_df.rename(columns={f'ISB_Service_{trip}': 'ISB_Service',
                            f'bus_stop_board_{trip}': 'bus_stop_board',
                            f'bus_stop_alight_{trip}': 'bus_stop_alight',
                            f'time_start_{trip}': 'time_start',
                            f'travel_duration_{trip}': 'travel_duration',
                            f'frequency_{trip}': 'frequency',
                            f'punctuality_{trip}': 'punctuality',
                            f'cleanliness_{trip}': 'cleanliness',
                            f'safety_{trip}': 'safety',
                            f'coverage_{trip}': 'coverage',
                            f'crowdedness_{trip}': 'crowdedness'}, inplace=True)

    trip_dataframes.append(trip_df)

routedata = pd.concat(trip_dataframes, axis=0)
routedata = routedata.dropna() # drops row with any NA value
routedata.reset_index(drop=True, inplace=True)
routedata

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trip_df.rename(columns={f'ISB_Service_{trip}': 'ISB_Service',
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trip_df.rename(columns={f'ISB_Service_{trip}': 'ISB_Service',
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trip_df.rename(columns={f'ISB_Service_{trip}': 'ISB_Service',


Unnamed: 0,ISB_Service,bus_stop_board,bus_stop_alight,time_start,travel_duration,frequency,punctuality,cleanliness,safety,coverage,crowdedness
0,D2,KR MRT,UTown,09:30:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,4.0
1,A1,KR MRT,LT27,07:50:00,< 5 minutes,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,5.0
2,D2,UTown,Opp KR MRT,17:00:00,10 - 15 minutes,Neutral,Satisfied,Very Satisfied,Satisfied,Satisfied,4.0
3,A2,Opp KR MRT,Opp HSSML,11:40:00,10 - 15 minutes,Neutral,Satisfied,Satisfied,Neutral,Satisfied,4.0
4,A1,BIZ2,KR MRT,20:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,2.0
5,A2,Opp KR MRT,Opp HSSML,11:50:00,5 - 10 minutes,Dissatisfied,Satisfied,Satisfied,Dissatisfied,Satisfied,5.0
6,A1,KR MRT,CLB,09:40:00,20 - 30 minutes,Dissatisfied,Satisfied,Satisfied,Neutral,Dissatisfied,5.0
7,A2,Opp KR MRT,Opp HSSML,14:30:00,10 - 15 minutes,Dissatisfied,Dissatisfied,Satisfied,Neutral,Satisfied,4.0
8,A1,BIZ2,KR MRT,14:45:00,5 - 10 minutes,Dissatisfied,Neutral,Neutral,Dissatisfied,Neutral,5.0
9,A1,LT13,LT27,16:00:00,10 - 15 minutes,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,3.0
