In [63]:
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

In [77]:
MRT_2021 = pd.read_csv('./DATA/MRT_raw_data/202112.csv')

為什麼 2021

In [3]:
MRT_2021[MRT_2021['時段'] == 2]

Unnamed: 0,日期,時段,進站,出站,人次
9218811,2022-01-01,2,松山機場,松山機場,0
9218812,2022-01-01,2,松山機場,中山國中,0
9218813,2022-01-01,2,松山機場,南京復興,0
9218814,2022-01-01,2,松山機場,忠孝復興,0
9218815,2022-01-01,2,松山機場,大安,0
...,...,...,...,...,...
9232967,2022-01-01,2,新北產業園區,板新,5
9232968,2022-01-01,2,新北產業園區,Y板橋,16
9232969,2022-01-01,2,新北產業園區,新埔民生,14
9232970,2022-01-01,2,新北產業園區,幸福,18


In [4]:
counts, values = np.unique(MRT_2021['時段'], return_counts=True)

fig = px.bar(x = counts, y = values, log_y=True)
fig

首先，限制日期在 12/31、1/1 以及限制時段在 0 - 5

In [91]:
def filter_new_year(df):
    df['year'] = pd.to_datetime(df['日期']).dt.year
    df['month_day'] = pd.to_datetime(df['日期']).dt.strftime('%m-%d')

    mask = ((df['month_day'].isin(['01-01', '12-31'])) & (df['時段'].isin(list(range(0, 6)))))
    
    df_ = df[mask].copy()

    # Adjust 'year' based on the specified conditions
    df_.loc[df_['時段'].isin([0, 1]), 'year'] += 1

    return df_.drop(['month_day'], axis=1)


In [47]:
# def filter_new_year(df):
#     df['year'] = pd.to_datetime(df['日期']).dt.year
#     return df[df['日期'].str.contains('01-01', regex=False)]

# # MRT_2021.query("`日期` == '2022-01-01'")

由於板橋捷運站有環狀線及板南線兩種，在此將兩者名稱皆改成板橋

In [92]:
def remove_alphabets_from_stations(df):
    import re
    pattern = re.compile('[a-zA-Z]')
    df_ = df.copy()
    df_['進站'] = df['進站'].apply(lambda x: re.sub(pattern, '', x))
    return df_


我們想要得到不同年份、日期、時段以及捷運站點的進佔人次總和

In [93]:
def sum_enter_by_station_time(df):
    return df.groupby(['year', '日期', '時段', '進站'])['人次'].sum().reset_index()


In [95]:
def concatenate_all_data(files):
    final_result = pd.DataFrame()
    for file in files:
        df = pd.read_csv(file)
        processed_df = (df
                        .pipe(filter_new_year)
                        .pipe(remove_alphabets_from_stations)
                        .pipe(sum_enter_by_station_time)
                       )
        final_result = pd.concat([final_result, processed_df], ignore_index=True)
    return final_result

In [96]:
directory_path = './DATA/MRT_raw_data/'

# Specify the pattern for CSV files in the directory
file_pattern = '*12.csv'

# Use Path to get a list of file paths
csv_files = [str(file) for file in Path(directory_path).glob(file_pattern)]

# Call the function to process and concatenate data from all files
MRT_newyear_data = concatenate_all_data(csv_files)

In [50]:
MRT_coordinate = pd.read_csv('./DATA/臺北捷運車站出入口座標.csv', encoding = 'big5')

def get_unique_station_geodata(df):
    df['exit'] = df['出入口名稱'].str.split('站出口').str[0]
    df.loc[df['exit'].str.contains('台北車', regex=False), 'exit' ] = '台北車站'
    return df.groupby('exit').first().reset_index()[['exit', '經度', '緯度']]
    

MRT_first_coord = get_unique_station_geodata(MRT_coordinate)
MRT_first_coord

Unnamed: 0,exit,經度,緯度
0,七張,121.543068,24.975025
1,三和國中,121.486398,25.076586
2,三民高中,121.472714,25.085692
3,三重,121.483303,25.055352
4,三重國小,121.496702,25.070646
...,...,...,...
113,頂埔,121.418336,24.959327
114,頂溪,121.515414,25.012895
115,頭前庄,121.460303,25.039844
116,麟光,121.558606,25.018554


In [97]:
MRT_sum_with_coord = pd.merge(MRT_newyear_data, MRT_first_coord, left_on= '進站', right_on='exit', how = 'left').sort_values('時段', ascending=True)

MRT_sum_with_coord.to_csv('./DATA/MRT_sum_with_coord.csv', index = False)

In [33]:
fig = px.density_mapbox(MRT_sum_with_coord, lat='緯度', lon='經度', z='人次', radius=50, zoom=12.5,
                        center=dict(lat=25.033671, lon=121.564427),
                        mapbox_style="carto-positron", animation_frame="時段", hover_name="exit",
                        color_continuous_scale='Reds')
fig

In [80]:
processed_df = (MRT_2021
                .pipe(filter_new_year)
                .pipe(remove_alphabets_from_stations)
                .pipe(sum_enter_by_station_time)
               )

In [84]:
processed_df_with_coord = pd.merge(processed_df, MRT_first_coord, left_on= '進站', right_on='exit', how = 'left')

In [86]:
processed_df_with_coord = processed_df_with_coord.sort_values('時段', ascending=True)

In [90]:
fig = px.density_mapbox(processed_df_with_coord, lat='緯度', lon='經度', z='人次', radius=50, zoom=12.5,
                        center=dict(lat=25.033671, lon=121.564427),
                        mapbox_style="carto-positron", animation_frame="時段", hover_name="exit",
                        color_continuous_scale='Reds')
fig

In [104]:
import plotly.graph_objects as go

token = open("./mapbox_access_token").read()

fig = px.density_mapbox(processed_df_with_coord, lat='緯度', lon='經度', z='人次', radius=50, zoom=12.5,
                        center=dict(lat=25.033671, lon=121.564427),
                        mapbox_style="carto-positron", animation_frame="時段", hover_name="exit",
                        color_continuous_scale='Reds')

# Add a custom Mapbox icon at the specified location
fig.add_trace(go.Scattermapbox(
    mode="markers",
    lon=[121.564427], lat=[25.033671],
    marker={'size': 50, 'symbol': "building-alt1", 'color': 'blue'},
))

# Update layout with Mapbox token
fig.update_layout(
    mapbox=dict(
        accesstoken=token,
    ),
    showlegend=False
)

# Show the figure
fig.show()

In [108]:
fig = go.Figure(go.Densitymapbox(lat = processed_df_with_coord.緯度, 
                                 lon = processed_df_with_coord.經度, 
                                 z = processed_df_with_coord.人次, radius = 50))

fig.update_layout(mapbox_style = 'carto-positron', 
                  mapbox_center_lon = 121.564427,
                  mapbox_center_lat = 25.033671,)

fig.show()