In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import warnings
warnings.filterwarnings('ignore')
import json

In [2]:
# purpose: open and read txt file function
def open_read(txt_file):
    # reading the txt data by using JSON
    with open(txt_file, encoding='utf-8') as file:
        json_data = json.load(file)
        # converting json dataset from dictionary to dataframe
        df = pd.DataFrame(json_data)
    return df

In [3]:
traveltime_df = open_read('traveltime.txt')

In [4]:
# create dictionarys - {key=RouteID : value=TravelTimes}
route_traveltimes_dict = {}
for row_index, route_id in enumerate(traveltime_df['RouteID']):
    route_traveltimes_dict[route_id] = pd.json_normalize(traveltime_df['TravelTimes'][row_index])

In [5]:
# purpose: revise linecolor label and fill the whole name
def extract_color(color_ID):
    if color_ID[0:2] == 'BR':
        return 'brown'
    elif color_ID[0:1] == 'R':
        return 'red'
    elif color_ID[0:1] == 'Y':
        return 'yellow'
    elif color_ID[0:1] == 'B' or color_ID[0:2] == 'BL':
        return 'blue'
    elif color_ID[0:1] == 'O':
        return 'orange'
    elif color_ID[0:1] == 'G':
        return 'green'

In [6]:
#  purpose: create timetables with calculating all timevalues
def get_route_timetable(traveltimes_df):
    # add a new column to calculate commute time in each station
    traveltimes_df['TakeTime'] = traveltimes_df['RunTime'] + traveltimes_df['StopTime']
    # add a new column to calculate commute time between two stations
    traveltimes_df['AccTime'] = traveltimes_df['TakeTime'].cumsum()
    # add a new column to classify stations by their linescolors
    traveltimes_df['Line_Color'] = traveltimes_df['FromStationID'].apply(extract_color)
    
    times_df = traveltimes_df.rename(columns={'FromStationName.Zh_tw':'FromStationName', 'ToStationName.Zh_tw':'ToStationName'})
    return times_df

# purpose: create a pivot table - the index and column frame by all station names
def get_route_stationtable(traveltimes_df):
    # if totalstation=n, FromStationName has n-1 station names(coz end station name in ToStationName)
    indexes = np.append(traveltimes_df['FromStationName.Zh_tw'].values, traveltimes_df['ToStationName.Zh_tw'].iloc[-1])
    stationframe_df = pd.DataFrame(index=indexes, columns=indexes)
    return stationframe_df

# purpose: fill all timevalues in the station framtable and search total commute time between two stations
def fill_timevalue(traveltimes_df, stationframe_df):
    
    stop_time = np.array(traveltimes_df['StopTime'])
    # StopTime in the last station is 0
    stop_time = np.append(stop_time, 0)
    take_time = np.array(traveltimes_df['TakeTime'])

    for row_index in range(len(stationframe_df)):
        for column_index in range(len(stationframe_df)):
            if row_index == column_index:
                stationframe_df.iloc[row_index][column_index] = stop_time[row_index]
            elif row_index < column_index:                
                stationframe_df.iloc[row_index][column_index] = take_time[row_index:column_index].sum()
            elif row_index > column_index:
                stationframe_df.iloc[row_index][column_index] = take_time[column_index:row_index].sum()
            else:
                continue
                
    schedule_df = stationframe_df
    return schedule_df

In [7]:
# create dictionarys - {key:routecolor: {key=stationname0: {key=stationnames: value=commutetimes between other stations}}
commutetimes_dict = {}    
for row_index, route_id in enumerate(traveltime_df['RouteID']):
    commutetimes_dict[route_id] = fill_timevalue(get_route_timetable(route_traveltimes_dict[route_id]), get_route_stationtable(route_traveltimes_dict[route_id])).to_dict()

# print(commutetimes_dict)
commutetimes_dict["O-2"]

{'蘆洲': {'蘆洲': 0,
  '三民高中': 111,
  '徐匯中學': 228,
  '三和國中': 335,
  '三重國小': 467,
  '大橋頭': 643,
  '民權西路': 741,
  '中山國小': 846,
  '行天宮': 972,
  '松江南京': 1080,
  '忠孝新生': 1212,
  '東門': 1386,
  '古亭': 1627,
  '頂溪': 1847,
  '永安市場': 1979,
  '景安': 2098,
  '南勢角': 2223},
 '三民高中': {'蘆洲': 111,
  '三民高中': 25,
  '徐匯中學': 117,
  '三和國中': 224,
  '三重國小': 356,
  '大橋頭': 532,
  '民權西路': 630,
  '中山國小': 735,
  '行天宮': 861,
  '松江南京': 969,
  '忠孝新生': 1101,
  '東門': 1275,
  '古亭': 1516,
  '頂溪': 1736,
  '永安市場': 1868,
  '景安': 1987,
  '南勢角': 2112},
 '徐匯中學': {'蘆洲': 228,
  '三民高中': 117,
  '徐匯中學': 25,
  '三和國中': 107,
  '三重國小': 239,
  '大橋頭': 415,
  '民權西路': 513,
  '中山國小': 618,
  '行天宮': 744,
  '松江南京': 852,
  '忠孝新生': 984,
  '東門': 1158,
  '古亭': 1399,
  '頂溪': 1619,
  '永安市場': 1751,
  '景安': 1870,
  '南勢角': 1995},
 '三和國中': {'蘆洲': 335,
  '三民高中': 224,
  '徐匯中學': 107,
  '三和國中': 25,
  '三重國小': 132,
  '大橋頭': 308,
  '民權西路': 406,
  '中山國小': 511,
  '行天宮': 637,
  '松江南京': 745,
  '忠孝新生': 877,
  '東門': 1051,
  '古亭': 1292,
  '頂溪': 1512,
  '永安市場': 1644,
  '景安'

In [8]:
# open and read the transfer stations file
linetransfer_df = open_read('linetransfer.txt')
linetransfer_df

Unnamed: 0,FromLineNo,FromLineID,FromLineName,FromStationID,FromStationName,ToLineNo,ToLineID,ToLineName,ToStationID,ToStationName,IsOnSiteTransfer,TransferTime,TransferDescription,SrcUpdateTime,UpdateTime,VersionID
0,BL,BL,"{'Zh_tw': '板南線', 'En': 'Bannan Line'}",BL23,"{'Zh_tw': '南港展覽館', 'En': 'Taipei Nangang Exhib...",BR,BR,"{'Zh_tw': '文湖線', 'En': 'Wenhu Line'}",BR24,"{'Zh_tw': '南港展覽館', 'En': 'Taipei Nangang Exhib...",1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
1,R,R,"{'Zh_tw': '淡水信義線', 'En': 'Tamsui-Xinyi Line'}",R07,"{'Zh_tw': '東門', 'En': 'Dongmen'}",O,O,"{'Zh_tw': '中和新蘆線', 'En': 'Zhonghe-Xinlu Line'}",O06,"{'Zh_tw': '東門', 'En': 'Dongmen'}",1,2,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
2,R,R,"{'Zh_tw': '淡水信義線', 'En': 'Tamsui-Xinyi Line'}",R11,"{'Zh_tw': '中山', 'En': 'Zhongshan'}",G,G,"{'Zh_tw': '松山新店線', 'En': 'Songshan-Xindian Line'}",G14,"{'Zh_tw': '中山', 'En': 'Zhongshan'}",1,3,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
3,G,G,"{'Zh_tw': '松山新店線', 'En': 'Songshan-Xindian Line'}",G16,"{'Zh_tw': '南京復興', 'En': 'Nanjing Fuxing'}",BR,BR,"{'Zh_tw': '文湖線', 'En': 'Wenhu Line'}",BR11,"{'Zh_tw': '南京復興', 'En': 'Nanjing Fuxing'}",1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
4,G,G,"{'Zh_tw': '松山新店線', 'En': 'Songshan-Xindian Line'}",G15,"{'Zh_tw': '松江南京', 'En': 'Songjiang Nanjing'}",O,O,"{'Zh_tw': '中和新蘆線', 'En': 'Zhonghe-Xinlu Line'}",O08,"{'Zh_tw': '松江南京', 'En': 'Songjiang Nanjing'}",1,2,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
5,O,O,"{'Zh_tw': '中和新蘆線', 'En': 'Zhonghe-Xinlu Line'}",O05,"{'Zh_tw': '古亭', 'En': 'Guting'}",G,G,"{'Zh_tw': '松山新店線', 'En': 'Songshan-Xindian Line'}",G09,"{'Zh_tw': '古亭', 'En': 'Guting'}",1,2,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
6,R,R,"{'Zh_tw': '淡水信義線', 'En': 'Tamsui-Xinyi Line'}",R05,"{'Zh_tw': '大安', 'En': 'Daan'}",BR,BR,"{'Zh_tw': '文湖線', 'En': 'Wenhu Line'}",BR09,"{'Zh_tw': '大安', 'En': 'Daan'}",1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
7,G,G,"{'Zh_tw': '松山新店線', 'En': 'Songshan-Xindian Line'}",G12,"{'Zh_tw': '西門', 'En': 'Ximen'}",BL,BL,"{'Zh_tw': '板南線', 'En': 'Bannan Line'}",BL11,"{'Zh_tw': '西門', 'En': 'Ximen'}",1,2,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
8,BR,BR,"{'Zh_tw': '文湖線', 'En': 'Wenhu Line'}",BR10,"{'Zh_tw': '忠孝復興', 'En': 'Zhongxiao Fuxing'}",BL,BL,"{'Zh_tw': '板南線', 'En': 'Bannan Line'}",BL15,"{'Zh_tw': '忠孝復興', 'En': 'Zhongxiao Fuxing'}",1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
9,BR,BR,"{'Zh_tw': '文湖線', 'En': 'Wenhu Line'}",BR24,"{'Zh_tw': '南港展覽館', 'En': 'Taipei Nangang Exhib...",BL,BL,"{'Zh_tw': '板南線', 'En': 'Bannan Line'}",BL23,"{'Zh_tw': '南港展覽館', 'En': 'Taipei Nangang Exhib...",1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2


In [9]:
def json_normalize(df):
    target_cols = ['FromLineName','FromStationName','ToLineName', 'ToStationName']
    for col in target_cols:
        df[col] = pd.json_normalize(df[col])
    return df

json_normalize(linetransfer_df).head(3)

Unnamed: 0,FromLineNo,FromLineID,FromLineName,FromStationID,FromStationName,ToLineNo,ToLineID,ToLineName,ToStationID,ToStationName,IsOnSiteTransfer,TransferTime,TransferDescription,SrcUpdateTime,UpdateTime,VersionID
0,BL,BL,板南線,BL23,南港展覽館,BR,BR,文湖線,BR24,南港展覽館,1,5,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
1,R,R,淡水信義線,R07,東門,O,O,中和新蘆線,O06,東門,1,2,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2
2,R,R,淡水信義線,R11,中山,G,G,松山新店線,G14,中山,1,3,,2020-01-31T14:00:00+08:00,2020-05-20T12:00:00+08:00,2


In [10]:
linetransfer_df['From_Line_Color'] = linetransfer_df['FromStationID'].apply(extract_color)
linetransfer_df['To_Line_Color'] = linetransfer_df['ToStationID'].apply(extract_color)

# remove two stations which are out of station
out_station_transfer = linetransfer_df[(linetransfer_df['FromStationName']=='新埔') | (linetransfer_df['FromStationName']=='新埔民生') | 
                                       (linetransfer_df['ToStationName']=='新埔') | (linetransfer_df['ToStationName']=='新埔民生') ]
linetransfer_df.drop(out_station_transfer.index, axis=0, inplace=True)

transfer_stations_df = linetransfer_df[['From_Line_Color', 'FromStationName', 'To_Line_Color', 'ToStationName']].sort_values(['From_Line_Color']).reset_index(drop=True)
transfer_stations_df

Unnamed: 0,From_Line_Color,FromStationName,To_Line_Color,ToStationName
0,blue,南港展覽館,brown,南港展覽館
1,blue,忠孝復興,brown,忠孝復興
2,blue,西門,green,西門
3,blue,忠孝新生,orange,忠孝新生
4,blue,板橋,yellow,板橋
5,blue,台北車站,red,台北車站
6,brown,忠孝復興,blue,忠孝復興
7,brown,南港展覽館,blue,南港展覽館
8,brown,南京復興,green,南京復興
9,brown,大安,red,大安


In [11]:
# purpose: get the line colors of thestation
def get_line_color(station_name):
    # pick up station_df in the MRT station ipynotebook
    station_df = pd.read_pickle("./station.pkl")
    color = station_df[station_df['StationName_Zh_tw']==station_name]['Line_Color']
    return color

# purpose: help to search commute time from one station to another station
def choose_searching_dict(color, isO2):
    if isO2:
        return commutetimes_dict['O-2']
    if color == 'yellow':
        return commutetimes_dict['Y-1']
    elif color == 'brown':
        return commutetimes_dict['BR-1']
    elif color == 'blue':
        return commutetimes_dict['BL-1']
    elif color == 'red':
        return commutetimes_dict['R-1']
    elif color == 'green':
        return commutetimes_dict['G-1']
    else:
        return commutetimes_dict['O-1']
        
def check_O2(station):
    O_2 = ['蘆洲', '三民高中', '徐匯中學', '三和國中', '三重國小']
    return station in O_2
     
# purpose: get the intersections to show all probable ways we can arrive the end_station
def find_intersections(start_station, end_station):
    
    # get the line colors of start_station and end_station
    start_line_color = get_line_color(start_station).values[0]
    end_line_color = get_line_color(end_station).values[0]
    
    with_start_color= transfer_stations_df[transfer_stations_df['From_Line_Color'] == start_line_color]
    with_end_color= transfer_stations_df[transfer_stations_df['To_Line_Color'] == end_line_color]
    start_intersect_end= pd.merge(with_start_color, with_end_color, left_on='To_Line_Color', right_on='From_Line_Color', suffixes=('_x', '_y'))
    
    return start_intersect_end[['From_Line_Color_x', 'To_Line_Color_x', 'ToStationName_x', 'ToStationName_y', 'To_Line_Color_y']], start_line_color, end_line_color

In [12]:
def get_trans_times(start, end):
    if get_line_color(start).values[0] == get_line_color(end).values[0]:
        return 0
    else:
        return 1
    
def get_total_taketime(start_station, end_station):
    # get line colors and intersect table
    transfer_table, start_line_color, end_line_color = find_intersections(start_station, end_station)
    trans_times = get_trans_times(start_station, end_station)
    O2_start_flag = check_O2(start_station)
    O2_end_flag = check_O2(end_station)
    O2_start_push = 0
    O2_end_push = 0
    if O2_start_flag and O2_end_flag:
        return choose_searching_dict(start_line_color, check_O2(start_station))[start_station][end_station]
    if O2_start_flag:
        O2_start_push = choose_searching_dict(start_line_color, check_O2(start_station))[start_station]["大橋頭"]
        start_station = "大橋頭"
    if O2_end_flag:
        O2_end_push = choose_searching_dict(end_line_color, check_O2(end_station))["大橋頭"][end_station]
        end_station = "大橋頭"
    
    # no transfer
    if trans_times == 0:
        return choose_searching_dict(start_line_color, check_O2(start_station))[start_station][end_station] + O2_start_push + O2_end_push
    # transfer > 0
    else:
        time_array = []
        for i in range(len(transfer_table)):
            time_sum = O2_start_push + O2_end_push
            # start_station -> intersect_from_station  
            time_sum = choose_searching_dict(start_line_color, check_O2(start_station))[start_station][transfer_table['ToStationName_x'][i]]
            # intersect_from_station -> intersect_to_station
            time_sum += choose_searching_dict(transfer_table['To_Line_Color_x'][i], False)[transfer_table['ToStationName_x'][i]][transfer_table['ToStationName_y'][i]]
            # intersect_to_station -> end_station
            time_sum += choose_searching_dict(end_line_color, check_O2(end_station))[transfer_table['ToStationName_y'][i]][end_station]

            time_array.append(time_sum)
        transfer_table["total_time"] = np.array(time_array)
        return transfer_table[transfer_table["total_time"] == transfer_table["total_time"].min()]["total_time"].values[0]

In [13]:
# get_total_taketime('三民高中', '南港')
get_total_taketime('淡水', '大橋頭')

2042

In [14]:
best = get_total_taketime('三民高中','新店')
best

1685

In [15]:
station_df = pd.read_pickle("./station.pkl")
station_df = station_df.drop(columns=['StationUID','LocationTownCode','StationName','StationAddress','BikeAllowOnHoliday','SrcUpdateTime','UpdateTime','VersionID','StationPosition', 'LocationCity', 'LocationCityCode', 'StationPosition_GeoHash'])
station_df = station_df[(station_df["StationName_Zh_tw"] != "小碧潭")]
station_df = station_df[(station_df["StationName_Zh_tw"] != "新北投")]
len(station_df["StationName_Zh_tw"].values)

133

In [38]:
stations = station_df["StationName_Zh_tw"].values
from collections import defaultdict
all_time = defaultdict(dict)
for start in stations:
    for end in stations:
        all_time[start][end] = get_total_taketime(start,end)
all_time

defaultdict(dict,
            {'中正紀念堂': {'中正紀念堂': 30,
              '象山': 779,
              '中山國中': 676,
              '辛亥': 1169,
              '圓山': 617,
              '新北產業園區': 1537,
              '竹圍': 2109,
              '民權西路': 699,
              '古亭': 479,
              '士林': 876,
              '東門': 396,
              '橋和': 1265,
              '港墘': 1393,
              '台北橋': 753,
              '頭前庄': 1319,
              '台大醫院': 121,
              '劍南路': 1168,
              '六張犁': 923,
              '行天宮': 568,
              '南港展覽館': 1456,
              '科技大樓': 776,
              '三民高中': 627,
              '大安': 677,
              '台北車站': 412,
              '唭哩岸': 1317,
              '頂溪': 375,
              '忠孝復興': 610,
              '劍潭': 755,
              '大坪林': 978,
              '永寧': 1502,
              '忠孝新生': 419,
              '萬芳社區': 1424,
              '蘆洲': 627,
              '奇岩': 1415,
              '信義安和': 537,
              '迴龍': 1871,
              '葫洲': 1747

In [39]:
all_time["中正紀念堂"]["象山"]

779

In [40]:
import pickle
with open('all_time.pickle', 'wb') as handle:
    pickle.dump(all_time, handle)
    handle.close()