In [4]:
import os
import random

import numpy as np
import pandas
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

plt.rc('font', family='Malgun Gothic')
sns.set(font="Malgun Gothic", #"NanumGothicCoding",
        rc={"axes.unicode_minus":False}, # 마이너스 부호 깨짐 현상 해결
        style='darkgrid')

import warnings
warnings.filterwarnings(action='ignore')

In [7]:
bus_info = pd.read_excel('../../data/서울교통수단이동/master/시내버스 2022.11월 인가현황.xlsx')
bus_info.columns = list(map(lambda x: x.replace('\n', ''), bus_info.columns))

display(bus_info.head())
print('shape:', bus_info.shape)
print('유형:', bus_info['유형'].unique())

for bus_type in ['순환', '광역', '맞춤', '심야']:
    print(f'{bus_type}:', bus_info.loc[bus_info['유형'] == bus_type, '노선번호'].unique())

# 맞춤버스
bus_fit = set(map(str, bus_info.loc[bus_info['유형'] == '맞춤', '노선번호']))

Unnamed: 0,업체명,노선번호,유형,기점,종점,인가대수,운행대수,예비대수,인가거리,운행시간,총운행횟수,최소,최대,첫차시간,막차시간
0,북부운수,1,순환,남산예장버스환승주차장,남산예장버스환승주차장,12,10,2,16.0,60,120,7,10,630,2300
1,한성여객,100,간선,하계동,용산구청,30,28,2,57.09,231,112,8,12,400,2230
2,동아운수,101,간선,우이동,서소문,23,22,1,37.81,170,119,5,11,400,2300
3,한성운수,101,간선,우이동,서소문,9,8,1,37.81,165,41,6,12,400,2300
4,삼화상운,102,간선,상계주공7단지,동대문,2,2,0,30.2,126,14,8,12,400,2310


shape: (436, 15)
유형: ['순환' '간선' '지선' '맞춤' '광역' '심야']
순환: [1]
광역: [9401 9403 9404 9408 9409 9701 9703 9707 9711 9714]
맞춤: [8002 8003 8112 8221 8331 8441 8541 8551 8552 8761 8771 8772 8774 8777]
심야: ['N13상계' 'N13송파' 'N15사당' 'N15우이' 'N16도봉' 'N16온수' 'N26강서' 'N26중랑' 'N30'
 'N32' 'N34' 'N37송파' 'N37진관' 'N51시흥' 'N51하계' 'N61상계' 'N61양천' 'N62면목'
 'N62양천' 'N64강서' 'N64염곡' 'N72' 'N75신림' 'N75진관']


In [2]:
def to_trip_velo(route_master, route_node_master, bus_fit, trip_time, trip_turn):
    trip_time = trip_time.loc[:, ['ROUTE_ID', 'FROM_STTN_SN', 'TO_STTN_SN', 'FROM_STTN_ID', 'TO_STTN_ID', 'TRIP_TIME_18H', 'TRIP_TIME_19H', 'TRIP_TIME_20H', 'TRIP_TIME_21H']]
    trip_time = trip_time.loc[~((trip_time['TRIP_TIME_18H'] == 0) & (trip_time['TRIP_TIME_19H'] == 0) & (trip_time['TRIP_TIME_20H'] == 0) & (trip_time['TRIP_TIME_21H'] == 0))]

    trip_turn = trip_turn.loc[:, ['ROUTE_ID', 'STTN_SN', 'STTN_ID', 'CNT_18H', 'CNT_19H', 'CNT_20H', 'CNT_21H']]
    trip_turn.fillna(0.0, inplace=True)

    # 노선마스터와 합치기
    trip_time = pd.merge(trip_time, route_master, on='ROUTE_ID')
    trip_time.drop(columns=['DSTNC'], inplace=True)

    # 필요 없는 버스 제거
    trip_time = trip_time.loc[trip_time['ROUTE_TY'].isin(['간선', '지선'])]  # 간선, 지선 버스만
    trip_time = trip_time.loc[trip_time['ROUTE_NM'].str[0] != 'N']         # 야간버스 제거
    trip_time = trip_time.loc[~trip_time['ROUTE_NM'].isin(bus_fit)]        # 맞춤버스 제거

    # merge trip_time, trip_trun
    trip_speed = pd.merge(trip_time, trip_turn, left_on=['ROUTE_ID', 'TO_STTN_ID', 'TO_STTN_SN'], right_on=['ROUTE_ID', 'STTN_ID', 'STTN_SN'])
    trip_speed.drop(columns=['STTN_ID', 'STTN_SN'], inplace=True)

    # merge trip_speed, route_node_master
    trip_speed = pd.merge(trip_speed, route_node_master, left_on=['ROUTE_ID', 'TO_STTN_ID', 'TO_STTN_SN'], right_on=['ROUTE_ID', 'STTN_ID', 'STTN_SN'])
    trip_speed.drop(columns=['STTN_ID', 'STTN_SN'], inplace=True)

    # 총합 = 평균 * cnt
    avg = trip_speed.loc[:, ['TRIP_TIME_18H', 'TRIP_TIME_19H', 'TRIP_TIME_20H', 'TRIP_TIME_21H']].values
    count = trip_speed.loc[:, ['CNT_18H', 'CNT_19H', 'CNT_20H', 'CNT_21H']].values

    time_sum = avg * count

    # 18 ~ 21시 평균속도
    trip_speed['TRIP_TIME_18_21'] = time_sum.sum(axis=1) / count.sum(axis=1)
    trip_speed['TRIP_VELO_18_21'] = trip_speed['LINK_DSTNC_ACMTL'] / trip_speed['TRIP_TIME_18_21'] * 3600 / 1000
    trip_speed.dropna(inplace=True)

    return trip_speed.loc[:, ['ROUTE_ID', 'FROM_STTN_SN', 'TO_STTN_SN', 'FROM_STTN_ID', 'TO_STTN_ID', 'TRIP_VELO_18_21']]

In [46]:
date = (('2022', '09'), ('2022', '10'), ('2022', '11'), ('2022', '12'), ('2023', '01'), ('2023', '02'))
year, month = date[random.randint(0, 5)]
path = f'../../data/서울교통수단이동/{year}{month}'
dirs = os.listdir(path)
dir_ = dirs[random.randint(0, len(dirs)-1)]

print(year, month, dir_)

route_master = pd.read_csv(f'{path}/{dir_}/TBIS_MS_ROUTE.csv')
route_node_master = pd.read_csv(f'{path}/{dir_}/TBIS_MS_ROUTE_NODE.csv')
trip_time = pd.read_csv(f'{path}/{dir_}/TPSS_ROUTE_SECTION_SPEED_H.csv')
trip_turn = pd.read_csv(f'{path}/{dir_}/TPSS_STA_ROUTE_INFO_H_TURN.csv')

trip_velo = to_trip_velo(route_master, route_node_master, bus_fit, trip_time, trip_turn)

target_nm = '143'
target_id = route_master.loc[route_master['ROUTE_NM'] == target_nm, 'ROUTE_ID'].values[0]

trip_velo_target = trip_velo.loc[trip_velo['ROUTE_ID'] == target_id]
trip_velo_target.sort_values(by='TO_STTN_SN', inplace=True)
trip_velo_target

2022 10 TR202100010001_15


Unnamed: 0,ROUTE_ID,FROM_STTN_SN,TO_STTN_SN,FROM_STTN_ID,TO_STTN_ID,TRIP_VELO_18_21
8865,100100022,1,2,107000071,107000073,21.728363
8905,100100022,2,3,107000073,107000518,9.648131
8862,100100022,3,4,107000518,107000075,8.565295
8874,100100022,4,5,107000075,107000077,18.494955
8860,100100022,5,6,107000077,107000079,12.864671
...,...,...,...,...,...,...
8864,100100022,111,112,107000078,107000076,14.212072
8875,100100022,112,113,107000076,107000519,5.461165
8913,100100022,113,114,107000519,107000074,10.716983
8877,100100022,114,115,107000074,107000072,19.946930
