```
23일 토요일 각자 eda 후 올리기
24일 일요일 슬랙 또는 행아웃


EDA 해볼 것
1. 사람이 어디서 어디로 가장 많이 이동하는 지. 새로운 노선 만들기(인구수가 많은 지역(유동인구), 정류장 이용 많은 곳)
2. 시간대 이용자 파악 후 버스 배차간격 조정 - 주중, 주말 나눠서 분석 (예측)
3. 신도시 버스 인프라 먼저 파악 (지하철x)
4. 서울에서 오는 버스가 어느 정류장에 서는지 확인

최적화 방법
1. 가장 많은 이용자가 있는 정류장 최단거리
2. 비용을 최소화 하는 방법 (최대한 노선 합치기)
3. 있는 노선 확인 후 사람을 많이 태우는 노선 추천(수요)
```

In [40]:
%matplotlib inline

import geopandas as gpd

import pandas as pd
import os
import requests
import folium
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# import plotly.express as px
# import plotly.graph_objects as go

In [41]:
# matplotlib에 “AppleGothc”(Mac) 또는 “Malgun Gothic”(Windows) 글꼴 적용

import matplotlib.font_manager as fm

for font in fm.fontManager.ttflist:
    if font.name in ['AppleGothic', 'Malgun Gothic']:
        plt.rcParams['font.family'] = font.name
        break

plt.rcParams['font.family']

['Malgun Gothic']

In [42]:
#2018년 7월 승차일 기준 1~4일의 버스 카드태깅 정보를 담고있습니다.
#미터!!!!!!!!!!!!!!!!!!!!!!!!!
TripChain = pd.read_csv('../Data/PJT001_TripChain.csv')

#17~18년 기준, 경기도 버스 정류장에 대한 정보를 담고있습니다.
StationTable = pd.read_csv('../Data/PJT001_stations_table.csv')

#2018년 7월 1~4일의 행정동별 이동 인구수 정보를 담고 있습니다.
AreaPeople = pd.read_csv('../Data/PJT001_sk_emd_od.csv')

#버스노선-정류장 매핑 테이블 정보 입니다.
RouteStationInfo = pd.read_csv('../Data/PJT001_routestationinfo.csv')

In [43]:
TripChain.columns=['암호화카드번호', '트랜잭션ID', '환승횟수', '교통카드발행사ID',
       '총이용객수', '사용자구분', '교통수단CD1', '교통수단CD2',
       '교통수단CD3', '교통수단CD4', '교통수단CD5', '버스노선ID1',
       '버스노선ID2', '버스노선ID3', '버스노선ID4', '버스노선ID5',
       '차량ID1', '차량ID2', '차량ID3', '차량ID4',
       '차량ID5', '총통행거리', '총탑승시간', '총소요시간',
       '승차일시1', '승차일시2', '승차일시3', '승차일시4',
       '승차일시5', '하차일시1', '하차일시2', '하차일시3',
       '하차일시4', '하차일시5', '최초승차일시', '최종하차일시',
       '승차역ID1', '승차역ID2', '승차역ID3', '승차역ID4',
       '승차역ID5', '하차역ID1', '하차역ID2', '하차역ID3',
       '하차역ID4', '하차역ID5', '최초승차역ID', '최종하차역ID',
       '총이용금액', '수집건수', '트립체인완료코드']

In [44]:
TripChain.head()

Unnamed: 0,암호화카드번호,트랜잭션ID,환승횟수,교통카드발행사ID,총이용객수,사용자구분,교통수단CD1,교통수단CD2,교통수단CD3,교통수단CD4,...,하차역ID1,하차역ID2,하차역ID3,하차역ID4,하차역ID5,최초승차역ID,최종하차역ID,총이용금액,수집건수,트립체인완료코드
0,900079696430,56,2,9000923,1,1,500.0,500.0,,,...,,,,,,,4116708.0,1350,2,;
1,900079697651,5,1,9000923,1,1,500.0,,,,...,,,,,,,4117269.0,1250,1,;
2,900079698254,32,1,9000923,1,1,500.0,,,,...,,,,,,,4107936.0,1550,1,;
3,900079699257,80,1,9000923,1,1,500.0,,,,...,,,,,,,4116717.0,1250,1,;
4,900079701419,64,1,9000923,1,1,530.0,,,,...,,,,,,,4116848.0,2050,1,;


날짜, 시간 데이터 datetime으로 데이터타입 변경

In [45]:
TripChain['승차일시1'] = pd.to_datetime(TripChain['승차일시1'], format='%Y%m%d%H%S%f')
TripChain['승차일시2'] = pd.to_datetime(TripChain['승차일시2'], format='%Y%m%d%H%S%f')
TripChain['승차일시3'] = pd.to_datetime(TripChain['승차일시3'], format='%Y%m%d%H%S%f')
TripChain['승차일시4'] = pd.to_datetime(TripChain['승차일시4'], format='%Y%m%d%H%S%f')
TripChain['승차일시5'] = pd.to_datetime(TripChain['승차일시5'], format='%Y%m%d%H%S%f')
TripChain['하차일시1'] = pd.to_datetime(TripChain['하차일시1'], format='%Y%m%d%H%S%f')
TripChain['하차일시2'] = pd.to_datetime(TripChain['하차일시2'], format='%Y%m%d%H%S%f')
TripChain['하차일시3'] = pd.to_datetime(TripChain['하차일시3'], format='%Y%m%d%H%S%f')
TripChain['하차일시4'] = pd.to_datetime(TripChain['하차일시4'], format='%Y%m%d%H%S%f')
TripChain['하차일시5'] = pd.to_datetime(TripChain['하차일시5'], format='%Y%m%d%H%S%f')

In [46]:
TripChain.shape

(1048575, 51)

In [47]:
TripChain['암호화카드번호'].nunique()

498764

In [48]:
TripChain['승차일시1'].unique()

array(['2018-07-01T05:00:25.430000000', '2018-07-01T07:00:21.560000000',
       '2018-07-01T12:00:36.530000000', ...,
       '2018-07-04T14:00:02.590000000', '2018-07-04T09:00:49.560000000',
       '2018-07-04T21:00:21.540000000'], dtype='datetime64[ns]')

In [49]:
# RouteStationInfo_latlon = RouteStationInfo_latlon.dropna()

# TripChain[]

---

버스노선-정류장 매핑 정보
```
'seq ' : 순번.
'pr_station_id' : 노선 ID.
'bus_line_no' : 버스 노선 번호.
'bus_line_no_seq' : 버스 라인 정류장 순서.
'station_nm ' : 정류장 명칭.
'station_id ' : 표준정류장ID.
'mobile_no' : 모바일정류장ID.
```

In [50]:
# 버스 개수
RouteStationInfo['bus_line_no'].nunique()

226

In [51]:
RouteStationInfo.head()

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0


Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443.0,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444.0,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445.0,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446.0,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447.0,
5,228003427,용인시,기응사,Gieungsa,56448.0,노변정류장,경기도 용인시,,37.338267,127.075033,56448.0,
6,228003428,용인시,기응사,Gieungsa,56449.0,노변정류장,경기도 용인시,,37.338333,127.075067,56449.0,
7,228003429,용인시,동천동용인한빛중학교,Dongcheon-dong Yongin Hanbit Middle School,56450.0,노변정류장,경기도 용인시,,37.337900,127.084967,56450.0,
8,228003430,용인시,서봉마을.노블랜드,"Seobong Maeul, Novel Land",56451.0,노변정류장,경기도 용인시,,37.333083,127.052017,,
9,228003431,용인시,서봉마을.노블랜드,"Seobong Maeul, Noble Land",56452.0,노변정류장,경기도 용인시,,37.333150,127.051900,,


In [1]:
StationTable.shape

NameError: name 'StationTable' is not defined

In [54]:
# 정류소 번호 중복 제거
StationTable.drop_duplicates(subset ="정류소번호", 
                     keep = 'first', inplace = True) 

In [55]:
StationTable.shape

(31686, 12)

## 버스별로 노선 확인 (Folium 이용)

In [56]:
StationTable.sample()

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
35337,235000520,양주시,현진에버빌아파트,Hyunjin Evervill Apartment,39768.0,노변정류장,경기도 양주시,경기도 양주시 덕계동,37.8064,127.060717,[None None None None None None None None None ...,4196233.0


In [57]:
StationTable[StationTable['표준정류장ID'] == 228001552]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
18624,228001552,용인시,용인터미널,Yongin Terminal,47634.0,노변정류장,경기도 용인시,경기도 용인시 처인구 김량장동,37.232783,127.2101,[None None None None None None None None None ...,4176783.0


In [58]:
RouteStationInfo.shape

(37831, 7)

In [59]:
RouteStationInfo_latlon = pd.merge(RouteStationInfo, 
             StationTable[['정류소명', '표준정류장ID', 'WGS84위도', 'WGS84경도', '관할관청','이비카드정류장ID']],
             left_on='station_id', right_on='표준정류장ID', how='left')

In [60]:
RouteStationInfo_latlon.head()

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,정류소명,표준정류장ID,WGS84위도,WGS84경도,관할관청,이비카드정류장ID
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,용인터미널,228001552.0,37.232783,127.2101,경기도 용인시,4176783.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,,,,,,,
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,포브스병원,228000443.0,37.235267,127.210617,경기도 용인시,4150418.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,제일교회,228000665.0,37.2343,127.213333,경기도 용인시,4196832.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,라이프아파트,228000664.0,37.231483,127.213767,경기도 용인시,4150529.0


In [61]:
RouteStationInfo_latlon.isnull().sum()

seq                    0
pr_station_id          0
bus_line_no            0
bus_line_no_seq        0
station_nm             0
station_id             0
mobile_no           2350
정류소명                4627
표준정류장ID             4627
WGS84위도             4627
WGS84경도             4627
관할관청                5538
이비카드정류장ID          10753
dtype: int64

총 4627개의 결측값이 존재하는 정류장을 어떻게 해야하나...흠...

In [62]:
# 일단 결측값 데이터 제외
RouteStationInfo_latlon = RouteStationInfo_latlon.dropna()
RouteStationInfo_latlon.head()

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,정류소명,표준정류장ID,WGS84위도,WGS84경도,관할관청,이비카드정류장ID
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,용인터미널,228001552.0,37.232783,127.2101,경기도 용인시,4176783.0
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,포브스병원,228000443.0,37.235267,127.210617,경기도 용인시,4150418.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,제일교회,228000665.0,37.2343,127.213333,경기도 용인시,4196832.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,라이프아파트,228000664.0,37.231483,127.213767,경기도 용인시,4150529.0
5,65291,228000018,10-4,6,송담대,228000663,29464.0,송담대,228000663.0,37.228133,127.215267,경기도 용인시,4111697.0


In [63]:
RouteStationInfo_latlon.reset_index()

Unnamed: 0,index,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,정류소명,표준정류장ID,WGS84위도,WGS84경도,관할관청,이비카드정류장ID
0,0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,용인터미널,228001552.0,37.232783,127.210100,경기도 용인시,4176783.0
1,2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,포브스병원,228000443.0,37.235267,127.210617,경기도 용인시,4150418.0
2,3,65289,228000018,10-4,4,제일교회,228000665,29881.0,제일교회,228000665.0,37.234300,127.213333,경기도 용인시,4196832.0
3,4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,라이프아파트,228000664.0,37.231483,127.213767,경기도 용인시,4150529.0
4,5,65291,228000018,10-4,6,송담대,228000663,29464.0,송담대,228000663.0,37.228133,127.215267,경기도 용인시,4111697.0
5,6,65292,228000018,10-4,7,기후변화체험교육센터.삼삼부락,228001508,29883.0,기후변화체험교육센터.삼삼부락,228001508.0,37.223167,127.224067,경기도 용인시,4196834.0
6,7,65293,228000018,10-4,8,삼삼부락마을회관,228000662,29488.0,삼삼부락마을회관,228000662.0,37.221133,127.226167,경기도 용인시,4150530.0
7,8,65294,228000018,10-4,9,목동야목농원,228001509,29885.0,목동야목농원,228001509.0,37.218167,127.229717,경기도 용인시,4196836.0
8,9,65295,228000018,10-4,10,예비군훈련장,228000661,29497.0,예비군훈련장,228000661.0,37.216117,127.230517,경기도 용인시,4150533.0
9,10,65296,228000018,10-4,11,별학,228000660,29508.0,별학,228000660.0,37.209367,127.234033,경기도 용인시,4150534.0


In [64]:
RouteStationInfo_latlon.iloc[0][['WGS84위도', 'WGS84경도']]

WGS84위도    37.2328
WGS84경도     127.21
Name: 0, dtype: object

In [65]:
busline = RouteStationInfo_latlon['bus_line_no'].unique()
busline

array(['10-4', '7', '8', '22', '12', '9-1', '73', '2', '3', '9', '11',
       '6', '2-1', '73-1', '81', '24', '20', '10-1', '10-2', '14', '6-1',
       '16', '77', '100', '10-6', '10-7', '11-1', '13', '19', '17',
       '12-1', '11-2', '11-3', '66', '80', '31', '200', '333', '150',
       '38', '37', '19-1', '10', '15', '23', '29', '1', '2-2', '2-3',
       '7-1', '1-1', '50-2', '50-1', '50-5', '50-7', '35', '3-2', '3-1',
       '5', '5-2', '38-1', '35-2', '15-4', '10-8', '1002', '201', '202',
       '1004', '21', '4', '4-1', '330', '33-1', '5-4', '2-4', '18', '6-2',
       '6-3', '8-3', '8-2', '8-1', '11-5', '11-4', '1004-1', '340',
       '1000', '330-1', '25', '26', '27', '340-1', '1008', '33-2', '712',
       '6001', '116-3', '1001', '8501', '6002', '8155', '6002-1', '710',
       '203', '8156', '2000A', '2000B', '4403', '6003', '28', '8156(급행)',
       '4108', '9802', '8472', '8471', '39', 'H6005', '116-5', 'H6007',
       'H6006', '205', '10-5', '31-3', '720-3', '13-2', '6004', '

```
단순히 버스 번호로는 구분지을 수 없음 ㅜㅜ
관할관청에 따라서 또 따로 구분해야함!!!

용인시가 껴있는데
화성시만 뽑아내서 쓰면 되는건가??
```

## 관할 관청 화성시데이터만 뽑아내기

In [66]:
RouteStationInfo_latlon[RouteStationInfo_latlon['관할관청'] == '경기도 화성시']['bus_line_no'].nunique()

218

In [83]:
RouteStationInfo_latlon['bus_line_no'].nunique()

218

In [84]:
RouteStationInfo_latlon = RouteStationInfo_latlon[RouteStationInfo_latlon['관할관청'] == '경기도 화성시']

## 화성시 버스 노선 전부 나타내기

In [85]:
RouteStationInfo_latlon.head()

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,정류소명,표준정류장ID,WGS84위도,WGS84경도,관할관청,이비카드정류장ID
927,68834,228000204,24,53,한원IC,233000169,37403.0,한원IC,233000169.0,37.152733,127.122883,경기도 화성시,4170725.0
928,68835,228000204,24,54,장지동,233000168,36808.0,장지리,233000168.0,37.155117,127.1179,경기도 화성시,4170397.0
929,68836,228000204,24,55,장지동,233000167,36809.0,장지리,233000167.0,37.155867,127.115017,경기도 화성시,4170396.0
932,68839,228000204,24,58,꽃가마약수터,233000166,36810.0,꽃가마약수터,233000166.0,37.1591,127.09935,경기도 화성시,4170395.0
954,68861,228000204,24,80,꽃가마약수터,233001323,36943.0,꽃가마약수터,233001323.0,37.1589,127.09875,경기도 화성시,4170651.0


In [163]:
All_busline = []
bus_num_ = 0

for j in range(len(busline)):
    tmp_busline = RouteStationInfo_latlon[RouteStationInfo_latlon['bus_line_no'] == busline[j]]
    tmp_buslineArr = []
    for i in range(len(tmp_busline)):
        tmp = tmp_busline.iloc[i]
        tmp_buslineArr.append((tmp['WGS84위도'], tmp['WGS84경도']))
    All_busline.append(tmp_buslineArr)
    bus_num_ += 1

In [87]:
type(All_busline)

list

In [172]:
Busline = folium.Map(location = [37.234300,127.213333])

# 왜 앙대닝 흐엉
for i in range(226):
    if(len(All_busline[i]) == 0):
        continue
    folium.PolyLine(All_busline[i], color="red", weight=2.5, opacity=1).add_to(Busline)



folium.PolyLine(All_busline[0], color="red", weight=2.5, opacity=1).add_to(Busline)
# folium.PolyLine(All_busline[1], color="red", weight=2.5, opacity=1).add_to(Busline)
# folium.PolyLine(All_busline[2], color="red", weight=2.5, opacity=1).add_to(Busline)
# folium.PolyLine(All_busline[3], color="red", weight=2.5, opacity=1).add_to(Busline)
Busline

## 환승량이 많은 이용자

In [102]:
TripChain_order= TripChain.sort_values(['환승횟수'], ascending= [False])
# data = data.sort_values(["time"], ascending=[False])
TripChain_over3=TripChain_order[TripChain_order['환승횟수']>=3]

In [103]:
TripChain_over3.shape

(98393, 51)

In [104]:
TripChain_over3['최초승차역ID'] = TripChain_over3['승차역ID1']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [105]:
TripChain_over3 = TripChain_over3[TripChain_over3['최초승차역ID']!=TripChain_over3['최종하차역ID']]

In [106]:
TripChain_over3.head()

Unnamed: 0,암호화카드번호,트랜잭션ID,환승횟수,교통카드발행사ID,총이용객수,사용자구분,교통수단CD1,교통수단CD2,교통수단CD3,교통수단CD4,...,하차역ID1,하차역ID2,하차역ID3,하차역ID4,하차역ID5,최초승차역ID,최종하차역ID,총이용금액,수집건수,트립체인완료코드
338653,100475967013,11,5,3104012,1,1,500.0,500.0,582.0,582.0,...,,,,4115139.0,4115131.0,4179371.0,4115131.0,1450,5,;
1036825,900490539913,41,5,9000901,1,1,500.0,500.0,500.0,530.0,...,,,,4170271.0,4170352.0,4170347.0,4170352.0,2050,5,;
146918,900484810216,3,5,3101000,1,4,500.0,500.0,500.0,500.0,...,,,,4170101.0,4107952.0,4108069.0,4107952.0,870,5,;
727713,900467123199,44,5,9000901,1,1,202.0,500.0,500.0,202.0,...,,,,4311.0,4179149.0,1863.0,4179149.0,1350,5,;
395258,900523539981,59,5,9000901,1,1,115.0,115.0,582.0,511.0,...,,,,9179237.0,4100625.0,9000258.0,4100625.0,1250,5,;


In [107]:
RouteStationInfo_latlon['이비카드정류장ID'].nunique()

1444

In [108]:
TripChain_over3['최초승차역ID'].nunique()

10455

In [109]:
TripChain_over3['최종하차역ID'].nunique()

10963

In [110]:
RouteStationInfo_latlon.head()

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,정류소명,표준정류장ID,WGS84위도,WGS84경도,관할관청,이비카드정류장ID
927,68834,228000204,24,53,한원IC,233000169,37403.0,한원IC,233000169.0,37.152733,127.122883,경기도 화성시,4170725.0
928,68835,228000204,24,54,장지동,233000168,36808.0,장지리,233000168.0,37.155117,127.1179,경기도 화성시,4170397.0
929,68836,228000204,24,55,장지동,233000167,36809.0,장지리,233000167.0,37.155867,127.115017,경기도 화성시,4170396.0
932,68839,228000204,24,58,꽃가마약수터,233000166,36810.0,꽃가마약수터,233000166.0,37.1591,127.09935,경기도 화성시,4170395.0
954,68861,228000204,24,80,꽃가마약수터,233001323,36943.0,꽃가마약수터,233001323.0,37.1589,127.09875,경기도 화성시,4170651.0


In [99]:
# TripChain_over3.sample()



# pd.merge(TripChain_over3[['암호화카드번호', '환승횟수','사용자구분', '승차역ID1', '승차역ID2', '승차역ID3', '승차역ID4', '승차역ID5', 
# '하차역ID1', '하차역ID2', '하차역ID3', '하차역ID4', '하차역ID5', ,'최초승차역ID', '최종하차역ID', '총이용금액']], 
#          RouteStationInfo_latlon[['정류소명', 'station_id', 'WGS84위도', 'WGS84경도','이비카드정류장ID']], 

In [111]:
TripChain_over3.head()

Unnamed: 0,암호화카드번호,트랜잭션ID,환승횟수,교통카드발행사ID,총이용객수,사용자구분,교통수단CD1,교통수단CD2,교통수단CD3,교통수단CD4,...,하차역ID1,하차역ID2,하차역ID3,하차역ID4,하차역ID5,최초승차역ID,최종하차역ID,총이용금액,수집건수,트립체인완료코드
338653,100475967013,11,5,3104012,1,1,500.0,500.0,582.0,582.0,...,,,,4115139.0,4115131.0,4179371.0,4115131.0,1450,5,;
1036825,900490539913,41,5,9000901,1,1,500.0,500.0,500.0,530.0,...,,,,4170271.0,4170352.0,4170347.0,4170352.0,2050,5,;
146918,900484810216,3,5,3101000,1,4,500.0,500.0,500.0,500.0,...,,,,4170101.0,4107952.0,4108069.0,4107952.0,870,5,;
727713,900467123199,44,5,9000901,1,1,202.0,500.0,500.0,202.0,...,,,,4311.0,4179149.0,1863.0,4179149.0,1350,5,;
395258,900523539981,59,5,9000901,1,1,115.0,115.0,582.0,511.0,...,,,,9179237.0,4100625.0,9000258.0,4100625.0,1250,5,;


In [112]:
TripChain_over3[['최초승차역ID', '최종하차역ID']].reset_index()

Unnamed: 0,index,최초승차역ID,최종하차역ID
0,338653,4179371.0,4115131.0
1,1036825,4170347.0,4170352.0
2,146918,4108069.0,4107952.0
3,727713,1863.0,4179149.0
4,395258,9000258.0,4100625.0
5,666707,4102932.0,4120750.0
6,929996,4116789.0,4116787.0
7,73077,9117184.0,4197682.0
8,328849,4170093.0,4116659.0
9,227157,4122896.0,4110198.0


### ID type을 모두 int64로 변경

In [113]:
tmp = TripChain_over3[['최초승차역ID', '최종하차역ID']].reset_index()
TripChain_over3['최초승차역ID'] = tmp['최초승차역ID'].values.astype(np.int64)

In [114]:
TripChain_over3.head()

Unnamed: 0,암호화카드번호,트랜잭션ID,환승횟수,교통카드발행사ID,총이용객수,사용자구분,교통수단CD1,교통수단CD2,교통수단CD3,교통수단CD4,...,하차역ID1,하차역ID2,하차역ID3,하차역ID4,하차역ID5,최초승차역ID,최종하차역ID,총이용금액,수집건수,트립체인완료코드
338653,100475967013,11,5,3104012,1,1,500.0,500.0,582.0,582.0,...,,,,4115139.0,4115131.0,4179371,4115131.0,1450,5,;
1036825,900490539913,41,5,9000901,1,1,500.0,500.0,500.0,530.0,...,,,,4170271.0,4170352.0,4170347,4170352.0,2050,5,;
146918,900484810216,3,5,3101000,1,4,500.0,500.0,500.0,500.0,...,,,,4170101.0,4107952.0,4108069,4107952.0,870,5,;
727713,900467123199,44,5,9000901,1,1,202.0,500.0,500.0,202.0,...,,,,4311.0,4179149.0,1863,4179149.0,1350,5,;
395258,900523539981,59,5,9000901,1,1,115.0,115.0,582.0,511.0,...,,,,9179237.0,4100625.0,9000258,4100625.0,1250,5,;


---

---

# 참고

### list형태가 아니라 scalar 값 하나가 필요할 때

In [115]:
tmp = [1]

In [149]:
tmp3 = ['1', '2','dd']
tmp3[np.random.randint(2, size=1)[0]]

'1'