# Trip Chain

In [1]:
import numpy as np
import pandas as pd

In [9]:
class Data:

    def __init__(self, year, month, day):
        date = (year, month, day)
        self.transit = self.read_csv(date, 'KSCC_DX_TRNSF_PATH_SUM')      # 환승 합
        self.trip_chain = self.read_csv(date, 'KSCC_DX_TRIP_CHAIN_SUM')   # 환승 체인
        self.tbis_station = self.read_csv(date, 'TBIS_MS_STTN')           # 버스 정류장 마스터
        self.tbis_route = self.read_csv(date, 'TBIS_MS_ROUTE')            # 버스 노선 마스터
        self.subway = self.read_csv(date, 'KSCC_DV_SUBWAY_STATION_GEOM')  # 지하철 역사 마스터
        self.district = self.read_csv(date, 'DISTRICT_EMD')               # 읍면동 마스터

        self._preproccessing()

    def _preproccessing(self):
        self._drop_na()
        # self._pre_trip_chain()

    def _drop_na(self):
        self.tbis_station.dropna(inplace=True)
        self.tbis_route.dropna(inplace=True)

    def _pre_transit(self):
        # type : 버스-버스=0, 버스-지하철or지하철-버스=1, 지하철-지하철=2
        df = self.transit
        type_a = df['RIDE_PUB_STA_ID'] < 10000  # 승차
        type_b = df['ALGH_PUB_STA_ID'] < 10000  # 하차
        df['type'] = df['ALGH_PUB_STA_ID']
        self.transit['type'] = self.transit['ALGH_PUB_STA_ID']

    def _pre_trip_chain(self):
        chain, district = self.trip_chain, self.district
        chain = chain.loc[chain['RIDE_PUB_STA_ID_3'] == 'X']  # 1회 환승 체인만 취급
        chain = chain.loc[chain['ROUTE_ID_1'].notnull() & chain['ROUTE_ID_2'].notnull()]  # 버스-버스
        chain = chain.loc[(chain['RIDE_PUB_STA_ID_1'] != 'X') & (chain['ALGH_PUB_STA_ID_1'] != 'X') & (chain['RIDE_PUB_STA_ID_2'] != 'X') & (chain['ALGH_PUB_STA_ID_2'] != 'X')]
        chain = chain.iloc[:, :12]
        chain = chain.astype({
            'RIDE_PUB_STA_ID_1':'int',
            'ALGH_PUB_STA_ID_1':'int',
            'RIDE_PUB_STA_ID_2':'int',
            'ALGH_PUB_STA_ID_2':'int',
        })

        # chain = pd.merge(chain, district, left_on='ALGH_PUB_STA_ID_1', right_on='EMD_ID')
        self.trip_chain = chain

    @staticmethod
    def read_csv(date, table_name):
        year, month, day = date
        path = f'data/{year}{month:02d}{day:02d}/{table_name}.csv'
        return pd.read_csv(path, nrows=None, sep=',', on_bad_lines='skip', encoding='utf-8')

In [12]:
# data = Data(2023, 2, 1)
data = [Data(2023, 2, day) for day in range(1, 7)]

In [27]:
data[4].trip_chain.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467482 entries, 0 to 467481
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   STDR_DE            467482 non-null  int64  
 1   START_PUB_STA_ID   467482 non-null  int64  
 2   END_PUB_STA_ID     455431 non-null  float64
 3   START_EMD_ID       464509 non-null  float64
 4   END_EMD_ID         452971 non-null  float64
 5   TOT_CNT            467482 non-null  int64  
 6   ROUTE_ID_1         254643 non-null  float64
 7   RIDE_PUB_STA_ID_1  467482 non-null  object 
 8   ALGH_PUB_STA_ID_1  467482 non-null  object 
 9   ROUTE_ID_2         35166 non-null   float64
 10  RIDE_PUB_STA_ID_2  467482 non-null  object 
 11  ALGH_PUB_STA_ID_2  467482 non-null  object 
 12  ROUTE_ID_3         1384 non-null    float64
 13  RIDE_PUB_STA_ID_3  467482 non-null  object 
 14  ALGH_PUB_STA_ID_3  467482 non-null  object 
 15  ROUTE_ID_4         15 non-null      float64
 16  RI

In [26]:
data[4].trip_chain

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,...,ALGH_PUB_STA_ID_2,ROUTE_ID_3,RIDE_PUB_STA_ID_3,ALGH_PUB_STA_ID_3,ROUTE_ID_4,RIDE_PUB_STA_ID_4,ALGH_PUB_STA_ID_4,ROUTE_ID_5,RIDE_PUB_STA_ID_5,ALGH_PUB_STA_ID_5
0,20230131,112000136,111000180.0,1113068.0,1112059.0,8,111000010.0,112000136,111000180,,...,X,,X,X,,X,X,,X,X
1,20230131,218000636,1950.0,3110169.0,3110157.0,3,100100358.0,218000636,218000320,,...,1950,,X,X,,X,X,,X,X
2,20230131,116000583,213000451.0,1117068.0,3106053.0,4,116900011.0,116000583,213000451,,...,X,,X,X,,X,X,,X,X
3,20230131,210000259,210000362.0,,,5,100100091.0,210000259,210000362,,...,X,,X,X,,X,X,,X,X
4,20230131,100000365,100000385.0,1101067.0,1101061.0,8,100100061.0,100000365,100000385,,...,X,,X,X,,X,X,,X,X
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467477,20230131,110000253,2729.0,1111061.0,1105053.0,3,100100181.0,110000253,110000325,,...,2729,,X,X,,X,X,,X,X
467478,20230131,326,2647.0,1123077.0,1111056.0,74,,0326,2647,,...,X,,X,X,,X,X,,X,X
467479,20230131,223,120900112.0,1122051.0,1121062.0,8,,0223,0227,120900009.0,...,120900112,,X,X,,X,X,,X,X
467480,20230131,120000674,117000098.0,1121072.0,1118053.0,65,100100278.0,120000674,117000098,,...,X,,X,X,,X,X,,X,X


In [106]:
bus_station = pd.read_csv('bus-station-master.csv')

In [107]:
bus_station

Unnamed: 0,STTN_ID,ARS_ID,정류소명,행정구,행정동,경도,위도
0,100000379,1008,서울역사박물관.경희궁앞,종로구,사직동,126.970442,37.569336
1,100000380,1007,서울역사박물관.경희궁앞,종로구,사직동,126.970420,37.569098
2,100000384,1009,광화문,종로구,사직동,126.975241,37.570051
3,100000018,1113,사직단.어린이도서관,종로구,사직동,126.968021,37.575113
4,100000019,1114,사직동주민센터,종로구,사직동,126.971318,37.576090
...,...,...,...,...,...,...,...
10360,117000906,18129,독산주공14단지,광명시,하안3동,126.887041,37.461346
10361,117000907,18128,독산주공14단지,광명시,하안3동,126.886996,37.461206
10362,117900165,18993,안양천로입구,광명시,소하1동,126.893035,37.452452
10363,124000369,25003,초이동,하남시,초이동,127.161382,37.541971


### 환승 합 테이블 - `KSCC_DX_TRNSF_PATH_SUM`

In [6]:
data.transit

Unnamed: 0,STDR_DE,ALGH_PUB_STA_ID,RIDE_PUB_STA_ID,TMZON,CNT,CNT_01,CNT_02,CNT_04,CNT_06,CNT_07,TRNSF_TIME,TRNSF_TIME_01,TRNSF_TIME_02,TRNSF_TIME_04,TRNSF_TIME_06,TRNSF_TIME_07
0,20230127,156,105000008,18,1,0,0,0,1,0,234,0,0,0,234,0
1,20230127,124000137,123000153,7,1,1,0,0,0,0,180,180,0,0,0,0
2,20230127,119000009,119000011,12,1,0,1,0,0,0,319,0,319,0,0,0
3,20230127,114000111,114000199,13,2,1,0,0,1,0,584,412,0,0,755,0
4,20230127,110000184,110000356,13,1,1,0,0,0,0,194,194,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
516416,20230127,118000005,1713,17,1,1,0,0,0,0,1817,1817,0,0,0,0
516417,20230127,119000052,119000093,11,1,1,0,0,0,0,958,958,0,0,0,0
516418,20230127,121900011,121000141,13,2,0,0,2,0,0,826,0,0,826,0,0
516419,20230127,4132,123000090,15,1,1,0,0,0,0,350,350,0,0,0,0


In [55]:
df = data.transit
# type_a = df['RIDE_PUB_STA_ID'] < 10000  # 승차
# type_b = df['ALGH_PUB_STA_ID'] < 10000  # 하차
# df['type'] = df['ALGH_PUB_STA_ID']

# df.loc[df['RIDE_PUB_STA_ID'] < 10000]
df.loc[df['ALGH_PUB_STA_ID'] < 10000]

# df.loc[(df['RIDE_PUB_STA_ID'] < 10000) & (df['ALGH_PUB_STA_ID'] < 10000) & (df['RIDE_PUB_STA_ID'] == df['ALGH_PUB_STA_ID'])]
# temp = df.loc[(df['RIDE_PUB_STA_ID'] == df['ALGH_PUB_STA_ID'])]
# temp.loc[df['RIDE_PUB_STA_ID'] < 10000]

Unnamed: 0,STDR_DE,ALGH_PUB_STA_ID,RIDE_PUB_STA_ID,TMZON,CNT,CNT_01,CNT_02,CNT_04,CNT_06,CNT_07,TRNSF_TIME,TRNSF_TIME_01,TRNSF_TIME_02,TRNSF_TIME_04,TRNSF_TIME_06,TRNSF_TIME_07
0,20230127,156,105000008,18,1,0,0,0,1,0,234,0,0,0,234,0
9,20230127,2519,115900063,21,1,1,0,0,0,0,303,303,0,0,0,0
11,20230127,204,100900009,8,1,0,0,0,1,0,1624,0,0,0,1624,0
17,20230127,4119,119900033,19,8,7,0,1,0,0,609,596,0,695,0,0
19,20230127,2714,110900136,18,1,1,0,0,0,0,229,229,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
516397,20230127,2728,104000153,21,1,1,0,0,0,0,1110,1110,0,0,0,0
516398,20230127,2716,110900025,8,2,2,0,0,0,0,1008,1008,0,0,0,0
516409,20230127,332,122900029,18,103,102,0,0,1,0,499,499,0,0,464,0
516413,20230127,4709,107000074,17,3,2,0,0,1,0,429,389,0,0,511,0


In [50]:
data.transit.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 516421 entries, 0 to 516420
Data columns (total 16 columns):
 #   Column           Non-Null Count   Dtype
---  ------           --------------   -----
 0   STDR_DE          516421 non-null  int64
 1   ALGH_PUB_STA_ID  516421 non-null  int64
 2   RIDE_PUB_STA_ID  516421 non-null  int64
 3   TMZON            516421 non-null  int64
 4   CNT              516421 non-null  int64
 5   CNT_01           516421 non-null  int64
 6   CNT_02           516421 non-null  int64
 7   CNT_04           516421 non-null  int64
 8   CNT_06           516421 non-null  int64
 9   CNT_07           516421 non-null  int64
 10  TRNSF_TIME       516421 non-null  int64
 11  TRNSF_TIME_01    516421 non-null  int64
 12  TRNSF_TIME_02    516421 non-null  int64
 13  TRNSF_TIME_04    516421 non-null  int64
 14  TRNSF_TIME_06    516421 non-null  int64
 15  TRNSF_TIME_07    516421 non-null  int64
dtypes: int64(16)
memory usage: 63.0 MB


In [39]:
df = data.trip_chain
df.loc[df['ROUTE_ID_1'].isna()]

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,...,ALGH_PUB_STA_ID_2,ROUTE_ID_3,RIDE_PUB_STA_ID_3,ALGH_PUB_STA_ID_3,ROUTE_ID_4,RIDE_PUB_STA_ID_4,ALGH_PUB_STA_ID_4,ROUTE_ID_5,RIDE_PUB_STA_ID_5,ALGH_PUB_STA_ID_5
3,20230127,2553,1804.0,1125054.0,3105252.0,5,,2553,1804,,...,X,,X,X,,X,X,,X,X
5,20230127,417,1869.0,1108066.0,3101458.0,4,,0417,1869,,...,X,,X,X,,X,X,,X,X
12,20230127,313,121900293.0,1112051.0,1122067.0,4,,0313,0332,121900001.0,...,121900293,,X,X,,X,X,,X,X
18,20230127,3134,230.0,2304062.0,1121064.0,5,,3134,0230,,...,X,,X,X,,X,X,,X,X
20,20230127,1026,2720.0,1123080.0,1107065.0,3,,1026,2720,,...,X,,X,X,,X,X,,X,X
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473270,20230127,1003,105900027.0,1103073.0,1106072.0,7,,1003,1015,105900003.0,...,105900027,,X,X,,X,X,,X,X
473279,20230127,2619,2747.0,1114074.0,1117054.0,5,,2619,2747,,...,X,,X,X,,X,X,,X,X
473283,20230127,4113,2738.0,1119056.0,1120073.0,18,,4113,2738,,...,X,,X,X,,X,X,,X,X
473285,20230127,1809,2612.0,2303069.0,1112051.0,4,,1809,2612,,...,X,,X,X,,X,X,,X,X


### 통행 사슬 - `KSCC_DX_TRIP_CHAIN_SUM`

In [73]:
chain, district = data.trip_chain, data.district

In [39]:
chain = chain.loc[chain['RIDE_PUB_STA_ID_3'] == 'X']
chain = chain.loc[chain['ROUTE_ID_1'].notnull() & chain['ROUTE_ID_2'].notnull()]
chain = chain.iloc[:, :12]
chain

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,RIDE_PUB_STA_ID_2,ALGH_PUB_STA_ID_2
2,20230127,113000019,218000636.0,1114074.0,3110169.0,3,100100447.0,113000019,111000224,100100358.0,111000002,218000636
17,20230127,122900066,122900087.0,1123074.0,1123071.0,3,122900002.0,122900066,122000260,122900006.0,122000260,122900087
342,20230127,122000148,123000077.0,1123079.0,1124064.0,3,100100057.0,122000148,123000065,100100220.0,123000065,123000077
408,20230127,104000112,104000006.0,1105066.0,1105061.0,3,100100218.0,104000112,104000055,100100053.0,104000055,104000006
530,20230127,122000642,123000579.0,1123073.0,1124077.0,5,122900002.0,122000642,123900002,100100564.0,123000007,123000579
...,...,...,...,...,...,...,...,...,...,...,...,...
471934,20230127,116000613,116000054.0,1117072.0,1117071.0,3,100100085.0,116000613,116000008,100100286.0,116000008,116000054
471987,20230127,111000232,112000012.0,1112066.0,1113075.0,3,100100462.0,111000232,111000229,100100565.0,111000007,112000012
472110,20230127,116900256,116900116.0,1117052.0,1117054.0,4,116900001.0,116900256,116900210,116900007.0,116900210,116900116
472749,20230127,108000154,109000241.0,1109071.0,1110061.0,3,100100139.0,108000154,108000089,109900011.0,108000089,109000241


In [74]:
# chain['END_EMD_ID'].isna().value_counts()
chain.loc[~chain['END_EMD_ID'].isna()]

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,...,ALGH_PUB_STA_ID_2,ROUTE_ID_3,RIDE_PUB_STA_ID_3,ALGH_PUB_STA_ID_3,ROUTE_ID_4,RIDE_PUB_STA_ID_4,ALGH_PUB_STA_ID_4,ROUTE_ID_5,RIDE_PUB_STA_ID_5,ALGH_PUB_STA_ID_5
0,20230127,213000492,213000197.0,3106068.0,3106054.0,14,100100312.0,213000492,213000197,,...,X,,X,X,,X,X,,X,X
1,20230127,100000039,112000041.0,1101055.0,1113066.0,6,100100360.0,100000039,112000041,,...,X,,X,X,,X,X,,X,X
2,20230127,113000019,218000636.0,1114074.0,3110169.0,3,100100447.0,113000019,111000224,100100358.0,...,218000636,,X,X,,X,X,,X,X
3,20230127,2553,1804.0,1125054.0,3105252.0,5,,2553,1804,,...,X,,X,X,,X,X,,X,X
4,20230127,118000146,114000208.0,1119073.0,1115058.0,3,100100289.0,118000146,114000208,,...,X,,X,X,,X,X,,X,X
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473284,20230127,123000622,123000639.0,1124082.0,1124081.0,12,100100496.0,123000622,123000639,,...,X,,X,X,,X,X,,X,X
473285,20230127,1809,2612.0,2303069.0,1112051.0,4,,1809,2612,,...,X,,X,X,,X,X,,X,X
473286,20230127,222,116000134.0,1122052.0,1117061.0,5,,0222,0234,100100085.0,...,116000134,,X,X,,X,X,,X,X
473287,20230127,113000099,112000016.0,1114072.0,1113075.0,6,100100363.0,113000099,112000016,,...,X,,X,X,,X,X,,X,X


In [100]:
chain, district = data.trip_chain, data.district
chain = chain.loc[chain['RIDE_PUB_STA_ID_3'] == 'X']  # 1회 환승 체인만 취급
chain = chain.loc[chain['ROUTE_ID_1'].notnull() & chain['ROUTE_ID_2'].notnull()]  # 버스-버스
# chain = chain.loc[chain['START_EMD_ID'].notna() & chain['END_EMD_ID'].notna()]
# chain['START_EMD_ID'] = chain['START_EMD_ID'].astype('int')
# chain['END_EMD_ID'] = chain['END_EMD_ID'].astype('int')
chain = chain.iloc[:, :12]

# chain = pd.merge(chain, district, left_on='ALGH_PUB_STA_ID_1', right_on='EMD_ID')
chain

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,RIDE_PUB_STA_ID_2,ALGH_PUB_STA_ID_2
2,20230127,113000019,218000636.0,1114074.0,3110169.0,3,100100447.0,113000019,111000224,100100358.0,111000002,218000636
17,20230127,122900066,122900087.0,1123074.0,1123071.0,3,122900002.0,122900066,122000260,122900006.0,122000260,122900087
342,20230127,122000148,123000077.0,1123079.0,1124064.0,3,100100057.0,122000148,123000065,100100220.0,123000065,123000077
408,20230127,104000112,104000006.0,1105066.0,1105061.0,3,100100218.0,104000112,104000055,100100053.0,104000055,104000006
530,20230127,122000642,123000579.0,1123073.0,1124077.0,5,122900002.0,122000642,123900002,100100564.0,123000007,123000579
...,...,...,...,...,...,...,...,...,...,...,...,...
471934,20230127,116000613,116000054.0,1117072.0,1117071.0,3,100100085.0,116000613,116000008,100100286.0,116000008,116000054
471987,20230127,111000232,112000012.0,1112066.0,1113075.0,3,100100462.0,111000232,111000229,100100565.0,111000007,112000012
472110,20230127,116900256,116900116.0,1117052.0,1117054.0,4,116900001.0,116900256,116900210,116900007.0,116900210,116900116
472749,20230127,108000154,109000241.0,1109071.0,1110061.0,3,100100139.0,108000154,108000089,109900011.0,108000089,109000241


In [104]:
data.trip_chain

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,RIDE_PUB_STA_ID_2,ALGH_PUB_STA_ID_2
2,20230127,113000019,218000636.0,1114074.0,3110169.0,3,100100447.0,113000019,111000224,100100358.0,111000002,218000636
17,20230127,122900066,122900087.0,1123074.0,1123071.0,3,122900002.0,122900066,122000260,122900006.0,122000260,122900087
342,20230127,122000148,123000077.0,1123079.0,1124064.0,3,100100057.0,122000148,123000065,100100220.0,123000065,123000077
408,20230127,104000112,104000006.0,1105066.0,1105061.0,3,100100218.0,104000112,104000055,100100053.0,104000055,104000006
530,20230127,122000642,123000579.0,1123073.0,1124077.0,5,122900002.0,122000642,123900002,100100564.0,123000007,123000579
...,...,...,...,...,...,...,...,...,...,...,...,...
471934,20230127,116000613,116000054.0,1117072.0,1117071.0,3,100100085.0,116000613,116000008,100100286.0,116000008,116000054
471987,20230127,111000232,112000012.0,1112066.0,1113075.0,3,100100462.0,111000232,111000229,100100565.0,111000007,112000012
472110,20230127,116900256,116900116.0,1117052.0,1117054.0,4,116900001.0,116900256,116900210,116900007.0,116900210,116900116
472749,20230127,108000154,109000241.0,1109071.0,1110061.0,3,100100139.0,108000154,108000089,109900011.0,108000089,109000241


### Merge

In [133]:
chain = data.trip_chain

In [134]:
bus_station

Unnamed: 0,STTN_ID,ARS_ID,정류소명,행정구,행정동,경도,위도
0,100000379,1008,서울역사박물관.경희궁앞,종로구,사직동,126.970442,37.569336
1,100000380,1007,서울역사박물관.경희궁앞,종로구,사직동,126.970420,37.569098
2,100000384,1009,광화문,종로구,사직동,126.975241,37.570051
3,100000018,1113,사직단.어린이도서관,종로구,사직동,126.968021,37.575113
4,100000019,1114,사직동주민센터,종로구,사직동,126.971318,37.576090
...,...,...,...,...,...,...,...
10360,117000906,18129,독산주공14단지,광명시,하안3동,126.887041,37.461346
10361,117000907,18128,독산주공14단지,광명시,하안3동,126.886996,37.461206
10362,117900165,18993,안양천로입구,광명시,소하1동,126.893035,37.452452
10363,124000369,25003,초이동,하남시,초이동,127.161382,37.541971


In [135]:
merge = pd.merge(chain, bus_station, left_on='ALGH_PUB_STA_ID_1', right_on='STTN_ID')

In [136]:
merge.groupby(by='')

Unnamed: 0,STDR_DE,START_PUB_STA_ID,END_PUB_STA_ID,START_EMD_ID,END_EMD_ID,TOT_CNT,ROUTE_ID_1,RIDE_PUB_STA_ID_1,ALGH_PUB_STA_ID_1,ROUTE_ID_2,RIDE_PUB_STA_ID_2,ALGH_PUB_STA_ID_2,STTN_ID,ARS_ID,정류소명,행정구,행정동,경도,위도
0,20230127,113000019,218000636.0,1114074.0,3110169.0,3,100100447.0,113000019,111000224,100100358.0,111000002,218000636,111000224,12314,덕은교.은평차고지앞,은평구,수색동,126.885533,37.588518
1,20230127,122900066,122900087.0,1123074.0,1123071.0,3,122900002.0,122900066,122000260,122900006.0,122000260,122900087,122000260,23364,개포래미안포레스트,강남구,개포4동,127.054420,37.480199
2,20230127,122900065,122900087.0,1123074.0,1123071.0,3,122900002.0,122900065,122000260,122900006.0,122000260,122900087,122000260,23364,개포래미안포레스트,강남구,개포4동,127.054420,37.480199
3,20230127,122900066,121000299.0,1123074.0,1122068.0,3,122900002.0,122900066,122000260,100100020.0,122000260,121000299,122000260,23364,개포래미안포레스트,강남구,개포4동,127.054420,37.480199
4,20230127,122900022,122900087.0,1123073.0,1123071.0,3,122900002.0,122900022,122000260,122900006.0,122000260,122900087,122000260,23364,개포래미안포레스트,강남구,개포4동,127.054420,37.480199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4025,20230127,113900067,118000008.0,1114061.0,1119054.0,3,113900008.0,113900067,113900221,100100033.0,113000006,118000008,113900221,14760,아현동주민센터,마포구,아현동,126.953117,37.547114
4026,20230127,111900111,111000096.0,1112057.0,1112058.0,5,111900004.0,111900111,111000093,100100084.0,111000093,111000096,111000093,12181,구산역2번출구.예일여고,은평구,역촌동,126.916762,37.610015
4027,20230127,119900175,101000033.0,1120072.0,1102052.0,3,119900011.0,119900175,119900226,100100029.0,119000011,101000033,119900226,20942,노량진한국법학교육원,동작구,노량진1동,126.944304,37.512354
4028,20230127,100000036,113000415.0,1101054.0,1114066.0,5,100100048.0,100000036,112000036,100100363.0,112000017,113000415,112000036,13119,이대부중,서대문구,신촌동,126.944262,37.563564


In [145]:
merge.groupby(by='행정동')['TOT_CNT'].sum().sort_values(ascending=False)

행정동
한남동       367
상도1동      334
연희동       250
구로2동      210
휘경1동      203
         ... 
암사3동        3
송파2동        3
성수1가2동      3
거여2동        3
우이동         3
Name: TOT_CNT, Length: 389, dtype: int64