# 1단계. 데이터 전처리

### 1) 데이터 불러오기

In [1]:
import pandas as pd

data = pd.read_excel('Basic_preprocessed_두리발_2020년1월_2022년7월_콜상세내역.xlsx')
data

Unnamed: 0,접수일시,월,출발지역,목적지역,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),주말,공휴일,접수시간,요일,거리
0,2020-01-01 05:06:44.297,1,부산 사상구,부산 사상구,489507,285817,491872,284227,41.345050,False,True,5,2,2849.793852
1,2020-01-01 05:32:39.097,1,부산 연제구,부산 부산진구,498495,288086,495760,284851,18.931717,False,True,5,2,4236.207030
2,2020-01-01 05:57:06.427,1,부산 연제구,부산 사하구,498923,286584,487815,279035,82.792883,False,True,5,2,13430.378438
3,2020-01-01 06:21:30.313,1,부산 부산진구,부산 금정구,493723,285383,499594,297423,51.678117,False,True,6,2,13395.157371
4,2020-01-01 06:33:43.930,1,부산 북구,부산 부산진구,494108,290599,498091,287066,33.817833,False,True,6,2,5324.131666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,2022-07-31 21:13:08.880,7,부산 동래구,부산 해운대구,501300,288679,507568,286400,41.235333,True,False,21,6,6669.457624
610321,2022-07-31 21:35:45.633,7,부산 해운대구,부산 해운대구,506206,292715,509093,287693,49.372783,True,False,21,6,5792.689617
610322,2022-07-31 21:46:47.023,7,부산 해운대구,부산 기장군,506180,285160,506010,304852,48.949617,True,False,21,6,19692.733787
610323,2022-07-31 21:53:21.040,7,부산 해운대구,부산 수영구,502745,286538,501997,285978,84.516000,True,False,21,6,934.400342


### 2) SHAP를 위한 간단 인코딩

원핫 인코딩

In [2]:
import pandas as pd
import numpy as np
import shap
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 데이터 준비
data_encoded = data.copy()  # data 변수를 사용

# 접수일시에서 초와 년도 제거
data_encoded['접수일시'] = data['접수일시'].dt.strftime('%m-%d %H:%M')

# 주말과 공휴일을 숫자로 변환 (True: 1, False: 0)
#data_encoded['주말'] = data_encoded['주말'].astype(int)
#data_encoded['공휴일'] = data_encoded['공휴일'].astype(int)

# 범주형 변수 목록
categorical_columns = ['출발지역', '목적지역','주말','공휴일','요일']

# 원-핫 인코딩 적용
data_encoded = pd.get_dummies(data_encoded, columns=categorical_columns)

# 결과 확인
data_encoded.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),접수시간,거리,출발지역_부산,...,주말_True,공휴일_False,공휴일_True,요일_0,요일_1,요일_2,요일_3,요일_4,요일_5,요일_6
0,01-01 05:06,1,489507,285817,491872,284227,41.34505,5,2849.793852,False,...,False,False,True,False,False,True,False,False,False,False
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,5,4236.20703,False,...,False,False,True,False,False,True,False,False,False,False
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,5,13430.378438,False,...,False,False,True,False,False,True,False,False,False,False
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,6,13395.157371,False,...,False,False,True,False,False,True,False,False,False,False
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,6,5324.131666,False,...,False,False,True,False,False,True,False,False,False,False


In [3]:
import numpy as np

# 분위수 계산
q1 = np.percentile(data_encoded['총대기시간(분)'], 33)
q2 = np.percentile(data_encoded['총대기시간(분)'], 66)

# 구간화
data_encoded['대기시간_구간'] = pd.cut(data_encoded['총대기시간(분)'], bins=[0, q1, q2, data_encoded['총대기시간(분)'].max()], 
                         labels=['짧음', '보통', '김'])

In [3]:
import numpy as np
import pandas as pd

data_encoded_customize = data_encoded.copy()

# 구간화
data_encoded_customize['대기시간_구간'] = pd.cut(
    data_encoded['총대기시간(분)'], 
    bins=[0, 15, 40, np.inf],  # 새 구간 설정
    labels=['짧음', '보통', '김']
)

In [6]:
from sklearn.preprocessing import LabelEncoder

# 범주형 변수 레이블 인코딩
label_encoders = {}
categorical_columns = ['대기시간_구간']  # 범주형 변수 목록

for col in categorical_columns:
    le = LabelEncoder()
    data_encoded[col] = le.fit_transform(data_encoded[col])
    label_encoders[col] = le

# 결과 확인
data_encoded

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),접수시간,거리,출발지역_부산,...,공휴일_False,공휴일_True,요일_0,요일_1,요일_2,요일_3,요일_4,요일_5,요일_6,대기시간_구간
0,01-01 05:06,1,489507,285817,491872,284227,41.345050,5,2849.793852,False,...,False,True,False,False,True,False,False,False,False,0
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,5,4236.207030,False,...,False,True,False,False,True,False,False,False,False,1
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,5,13430.378438,False,...,False,True,False,False,True,False,False,False,False,0
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,6,13395.157371,False,...,False,True,False,False,True,False,False,False,False,0
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,6,5324.131666,False,...,False,True,False,False,True,False,False,False,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,41.235333,21,6669.457624,False,...,True,False,False,False,False,False,False,False,True,0
610321,07-31 21:35,7,506206,292715,509093,287693,49.372783,21,5792.689617,False,...,True,False,False,False,False,False,False,False,True,0
610322,07-31 21:46,7,506180,285160,506010,304852,48.949617,21,19692.733787,False,...,True,False,False,False,False,False,False,False,True,0
610323,07-31 21:53,7,502745,286538,501997,285978,84.516000,21,934.400342,False,...,True,False,False,False,False,False,False,False,True,0


In [7]:
from sklearn.preprocessing import LabelEncoder

# 범주형 변수 레이블 인코딩
label_encoders = {}
categorical_columns = ['대기시간_구간']  # 범주형 변수 목록

for col in categorical_columns:
    le = LabelEncoder()
    data_encoded_customize[col] = le.fit_transform(data_encoded_customize[col])
    label_encoders[col] = le

# 결과 확인
data_encoded_customize

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),주말,공휴일,접수시간,...,목적지역_부산 북구,목적지역_부산 사상구,목적지역_부산 사하구,목적지역_부산 서구,목적지역_부산 수영구,목적지역_부산 연제구,목적지역_부산 영도구,목적지역_부산 중구,목적지역_부산 해운대구,대기시간_구간
0,01-01 05:06,1,489507,285817,491872,284227,41.345050,0,1,5,...,False,True,False,False,False,False,False,False,False,0
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,0,1,5,...,False,False,False,False,False,False,False,False,False,1
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,0,1,5,...,False,False,True,False,False,False,False,False,False,0
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,0,1,6,...,False,False,False,False,False,False,False,False,False,0
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,0,1,6,...,False,False,False,False,False,False,False,False,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,41.235333,1,0,21,...,False,False,False,False,False,False,False,False,True,0
610321,07-31 21:35,7,506206,292715,509093,287693,49.372783,1,0,21,...,False,False,False,False,False,False,False,False,True,0
610322,07-31 21:46,7,506180,285160,506010,304852,48.949617,1,0,21,...,False,False,False,False,False,False,False,False,False,0
610323,07-31 21:53,7,502745,286538,501997,285978,84.516000,1,0,21,...,False,False,False,False,True,False,False,False,False,0


In [None]:
data_encoded['총대기시간(분)'].sort_values(ascending=False)

543951    89.771500
579755    89.771450
72636     89.770617
547190    89.768450
532751    89.766833
            ...    
282819     1.018950
373200     1.014667
393453     1.012117
496803     1.011500
395125     1.008500
Name: 총대기시간(분), Length: 610325, dtype: float64

사인 코사인 변환(월, 접수시간, 요일)

In [5]:
import numpy as np

data_encoded_sin = data_encoded

# 월 (1~12)
data_encoded_sin['month_sin'] = np.sin(2 * np.pi * data_encoded['월'] / 12)
data_encoded_sin['month_cos'] = np.cos(2 * np.pi * data_encoded['월'] / 12)

# 요일 (0~6, 월~일)
data_encoded_sin['weekday_sin'] = np.sin(2 * np.pi * data_encoded['요일'] / 7)
data_encoded_sin['weekday_cos'] = np.cos(2 * np.pi * data_encoded['요일'] / 7)

# 접수시간 (0~23)
data_encoded_sin['hour_sin'] = np.sin(2 * np.pi * data_encoded['접수시간'] / 24)
data_encoded_sin['hour_cos'] = np.cos(2 * np.pi * data_encoded['접수시간'] / 24)
data_encoded_sin

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),주말,공휴일,접수시간,...,목적지역_부산 영도구,목적지역_부산 중구,목적지역_부산 해운대구,대기시간_구간,month_sin,month_cos,weekday_sin,weekday_cos,hour_sin,hour_cos
0,01-01 05:06,1,489507,285817,491872,284227,41.345050,0,1,5,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,0,1,5,...,False,False,False,2,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,0,1,5,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,0,1,6,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,1.000000,6.123234e-17
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,0,1,6,...,False,False,False,1,0.5,0.866025,0.974928,-0.222521,1.000000,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,41.235333,1,0,21,...,False,False,True,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610321,07-31 21:35,7,506206,292715,509093,287693,49.372783,1,0,21,...,False,False,True,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610322,07-31 21:46,7,506180,285160,506010,304852,48.949617,1,0,21,...,False,False,False,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610323,07-31 21:53,7,502745,286538,501997,285978,84.516000,1,0,21,...,False,False,False,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01


In [7]:
data_encoded_sin.columns

Index(['접수일시', '월', '호출지경도', '호출지위도', '목적지경도', '목적지위도', '총대기시간(분)', '주말',
       '공휴일', '접수시간', '요일', '거리', '출발지역_부산', '출발지역_부산 강서구', '출발지역_부산 금정구',
       '출발지역_부산 기장군', '출발지역_부산 남구', '출발지역_부산 동구', '출발지역_부산 동래구',
       '출발지역_부산 부산진구', '출발지역_부산 북구', '출발지역_부산 사상구', '출발지역_부산 사하구',
       '출발지역_부산 서구', '출발지역_부산 수영구', '출발지역_부산 연제구', '출발지역_부산 영도구', '출발지역_부산 중구',
       '출발지역_부산 해운대구', '목적지역_부산', '목적지역_부산 강서구', '목적지역_부산 금정구', '목적지역_부산 기장군',
       '목적지역_부산 남구', '목적지역_부산 동구', '목적지역_부산 동래구', '목적지역_부산 부산진구', '목적지역_부산 북구',
       '목적지역_부산 사상구', '목적지역_부산 사하구', '목적지역_부산 서구', '목적지역_부산 수영구',
       '목적지역_부산 연제구', '목적지역_부산 영도구', '목적지역_부산 중구', '목적지역_부산 해운대구', '대기시간_구간',
       'month_sin', 'month_cos', 'weekday_sin', 'weekday_cos', 'hour_sin',
       'hour_cos'],
      dtype='object')

In [8]:
data_encoded_sin=data_encoded_sin.drop(['접수일시', '월', '총대기시간(분)', '접수시간', '요일', '거리'],axis=1)
data_encoded_sin

Unnamed: 0,호출지경도,호출지위도,목적지경도,목적지위도,주말,공휴일,출발지역_부산,출발지역_부산 강서구,출발지역_부산 금정구,출발지역_부산 기장군,...,목적지역_부산 영도구,목적지역_부산 중구,목적지역_부산 해운대구,대기시간_구간,month_sin,month_cos,weekday_sin,weekday_cos,hour_sin,hour_cos
0,489507,285817,491872,284227,0,1,False,False,False,False,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
1,498495,288086,495760,284851,0,1,False,False,False,False,...,False,False,False,2,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
2,498923,286584,487815,279035,0,1,False,False,False,False,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,0.965926,2.588190e-01
3,493723,285383,499594,297423,0,1,False,False,False,False,...,False,False,False,0,0.5,0.866025,0.974928,-0.222521,1.000000,6.123234e-17
4,494108,290599,498091,287066,0,1,False,False,False,False,...,False,False,False,1,0.5,0.866025,0.974928,-0.222521,1.000000,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,501300,288679,507568,286400,1,0,False,False,False,False,...,False,False,True,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610321,506206,292715,509093,287693,1,0,False,False,False,False,...,False,False,True,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610322,506180,285160,506010,304852,1,0,False,False,False,False,...,False,False,False,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01
610323,502745,286538,501997,285978,1,0,False,False,False,False,...,False,False,False,0,-0.5,-0.866025,-0.781831,0.623490,-0.707107,7.071068e-01


요일과 접수시간을 조합해 새로운 feature 만들기

In [None]:
data_encoded_FE = data_encoded
data_encoded_FE['weekday_hour'] = data_encoded['요일'].astype(str) + '_' + data_encoded['접수시간'].astype(str)

# Optional: One-hot encoding 적용 예시
data_encoded_FE = pd.get_dummies(data_encoded_FE, columns=['weekday_hour'])
data_encoded_FE

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),주말,공휴일,접수시간,...,weekday_hour_6_21,weekday_hour_6_22,weekday_hour_6_23,weekday_hour_6_3,weekday_hour_6_4,weekday_hour_6_5,weekday_hour_6_6,weekday_hour_6_7,weekday_hour_6_8,weekday_hour_6_9
0,01-01 05:06,1,489507,285817,491872,284227,41.345050,0,1,5,...,False,False,False,False,False,False,False,False,False,False
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,0,1,5,...,False,False,False,False,False,False,False,False,False,False
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,0,1,5,...,False,False,False,False,False,False,False,False,False,False
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,0,1,6,...,False,False,False,False,False,False,False,False,False,False
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,0,1,6,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,41.235333,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610321,07-31 21:35,7,506206,292715,509093,287693,49.372783,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610322,07-31 21:46,7,506180,285160,506010,304852,48.949617,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610323,07-31 21:53,7,502745,286538,501997,285978,84.516000,1,0,21,...,True,False,False,False,False,False,False,False,False,False


In [13]:
data_encoded_FE=data_encoded_FE.drop(['접수일시', '총대기시간(분)', '접수시간', '요일', '거리'],axis=1)
data_encoded_FE

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,주말,공휴일,출발지역_부산,출발지역_부산 강서구,...,weekday_hour_6_21,weekday_hour_6_22,weekday_hour_6_23,weekday_hour_6_3,weekday_hour_6_4,weekday_hour_6_5,weekday_hour_6_6,weekday_hour_6_7,weekday_hour_6_8,weekday_hour_6_9
0,01-01 05:06,1,489507,285817,491872,284227,0,1,False,False,...,False,False,False,False,False,False,False,False,False,False
1,01-01 05:32,1,498495,288086,495760,284851,0,1,False,False,...,False,False,False,False,False,False,False,False,False,False
2,01-01 05:57,1,498923,286584,487815,279035,0,1,False,False,...,False,False,False,False,False,False,False,False,False,False
3,01-01 06:21,1,493723,285383,499594,297423,0,1,False,False,...,False,False,False,False,False,False,False,False,False,False
4,01-01 06:33,1,494108,290599,498091,287066,0,1,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,1,0,False,False,...,True,False,False,False,False,False,False,False,False,False
610321,07-31 21:35,7,506206,292715,509093,287693,1,0,False,False,...,True,False,False,False,False,False,False,False,False,False
610322,07-31 21:46,7,506180,285160,506010,304852,1,0,False,False,...,True,False,False,False,False,False,False,False,False,False
610323,07-31 21:53,7,502745,286538,501997,285978,1,0,False,False,...,True,False,False,False,False,False,False,False,False,False


요일+접수시간 feature, 월, 시간대, 요일

In [None]:
import numpy as np

data_encoded_sinFE = data_encoded

# 월 (1~12)
data_encoded_sinFE['month_sin'] = np.sin(2 * np.pi * data_encoded['월'] / 12)
data_encoded_sinFE['month_cos'] = np.cos(2 * np.pi * data_encoded['월'] / 12)

# 요일 (0~6, 월~일)
data_encoded_sinFE['weekday_sin'] = np.sin(2 * np.pi * data_encoded['요일'] / 7)
data_encoded_sinFE['weekday_cos'] = np.cos(2 * np.pi * data_encoded['요일'] / 7)

# 접수시간 (0~23)
data_encoded_sinFE['hour_sin'] = np.sin(2 * np.pi * data_encoded['접수시간'] / 24)
data_encoded_sinFE['hour_cos'] = np.cos(2 * np.pi * data_encoded['접수시간'] / 24)

data_encoded_sinFE['weekday_hour'] = data_encoded['요일'].astype(str) + '_' + data_encoded['접수시간'].astype(str)

# Optional: One-hot encoding 적용 예시
data_encoded_sinFE = pd.get_dummies(data_encoded_sinFE, columns=['weekday_hour'])
data_encoded_sinFE

Unnamed: 0,접수일시,월,호출지경도,호출지위도,목적지경도,목적지위도,총대기시간(분),주말,공휴일,접수시간,...,weekday_hour_6_21,weekday_hour_6_22,weekday_hour_6_23,weekday_hour_6_3,weekday_hour_6_4,weekday_hour_6_5,weekday_hour_6_6,weekday_hour_6_7,weekday_hour_6_8,weekday_hour_6_9
0,01-01 05:06,1,489507,285817,491872,284227,41.345050,0,1,5,...,False,False,False,False,False,False,False,False,False,False
1,01-01 05:32,1,498495,288086,495760,284851,18.931717,0,1,5,...,False,False,False,False,False,False,False,False,False,False
2,01-01 05:57,1,498923,286584,487815,279035,82.792883,0,1,5,...,False,False,False,False,False,False,False,False,False,False
3,01-01 06:21,1,493723,285383,499594,297423,51.678117,0,1,6,...,False,False,False,False,False,False,False,False,False,False
4,01-01 06:33,1,494108,290599,498091,287066,33.817833,0,1,6,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,07-31 21:13,7,501300,288679,507568,286400,41.235333,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610321,07-31 21:35,7,506206,292715,509093,287693,49.372783,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610322,07-31 21:46,7,506180,285160,506010,304852,48.949617,1,0,21,...,True,False,False,False,False,False,False,False,False,False
610323,07-31 21:53,7,502745,286538,501997,285978,84.516000,1,0,21,...,True,False,False,False,False,False,False,False,False,False


In [15]:
data_encoded_sinFE.columns

Index(['접수일시', '월', '호출지경도', '호출지위도', '목적지경도', '목적지위도', '총대기시간(분)', '주말',
       '공휴일', '접수시간',
       ...
       'weekday_hour_6_21', 'weekday_hour_6_22', 'weekday_hour_6_23',
       'weekday_hour_6_3', 'weekday_hour_6_4', 'weekday_hour_6_5',
       'weekday_hour_6_6', 'weekday_hour_6_7', 'weekday_hour_6_8',
       'weekday_hour_6_9'],
      dtype='object', length=221)

In [None]:
data_encoded_sinFE=data_encoded_sinFE.drop(['접수일시', '월', '총대기시간(분)', '접수시간', '요일', '거리'],axis=1)
data_encoded_sinFE

Unnamed: 0,호출지경도,호출지위도,목적지경도,목적지위도,주말,공휴일,출발지역_부산,출발지역_부산 강서구,출발지역_부산 금정구,출발지역_부산 기장군,...,weekday_hour_6_21,weekday_hour_6_22,weekday_hour_6_23,weekday_hour_6_3,weekday_hour_6_4,weekday_hour_6_5,weekday_hour_6_6,weekday_hour_6_7,weekday_hour_6_8,weekday_hour_6_9
0,489507,285817,491872,284227,0,1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,498495,288086,495760,284851,0,1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,498923,286584,487815,279035,0,1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,493723,285383,499594,297423,0,1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,494108,290599,498091,287066,0,1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610320,501300,288679,507568,286400,1,0,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
610321,506206,292715,509093,287693,1,0,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
610322,506180,285160,506010,304852,1,0,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
610323,502745,286538,501997,285978,1,0,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False


In [8]:
data_encoded.to_excel('Final_Basic_preprocessed_두리발_2020년1월_2022년7월_콜상세내역.xlsx', index=False)

In [9]:
data_encoded_customize.to_excel('Fina_customize_Basic_preprocessed_두리발_2020년1월_2022년7월_콜상세내역.xlsx', index=False)

In [10]:
data_encoded.to_excel('Fina_AllOnehot_customize_Basic_preprocessed_두리발_2020년1월_2022년7월_콜상세내역.xlsx', index=False)