In [1539]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import os
import logging

from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

In [1540]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.4.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [1541]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Thu_Jun_11_22:26:48_Pacific_Daylight_Time_2020
Cuda compilation tools, release 11.0, V11.0.194
Build cuda_11.0_bu.relgpu_drvr445TC445_37.28540450_0


In [1542]:
weather = pd.read_csv("data/진주기상정보.csv", encoding = "cp949")
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

In [1543]:
weather.rename(columns = {"일시" : "일자"}, inplace = True)
train.head()

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계
0,2016-02-01,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 (쌀:국내산) 된장찌개 쥐...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 오징어찌개 쇠불고기 (쇠고기:호주산) 계란찜 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 육개장 자반고등어구이 두부조림 건파래무침 ...",1039.0,331.0
1,2016-02-02,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 (쌀:국내산) 호박젓국찌...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무...","콩나물밥*양념장 (쌀,현미흑미:국내산) 어묵국 유산슬 (쇠고기:호주산) 아삭고추무...",867.0,560.0
2,2016-02-03,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 (쌀:국내산) 콩나물국...,"카레덮밥 (쌀,현미흑미:국내산) 팽이장국 치킨핑거 (닭고기:국내산) 쫄면야채무침 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 청국장찌개 황태양념구이 (황태:러시아산) 고기...",1017.0,573.0
3,2016-02-04,목,2601,104,220,355,0.0,"모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 (쌀,닭:국내산) 근대국...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 쇠고기무국 주꾸미볶음 부추전 시금치나물 ...","미니김밥*겨자장 (쌀,현미흑미:국내산) 우동 멕시칸샐러드 군고구마 무피클 포...",978.0,525.0
4,2016-02-05,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 (쌀:국내산) 재첩국 방...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 떡국 돈육씨앗강정 (돼지고기:국내산) 우엉잡채...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 차돌박이찌개 (쇠고기:호주산) 닭갈비 (닭고기:...",925.0,330.0


In [1544]:
# train_merge = pd.merge(train, weather, on = "일자", how = "left")
# test_merge = pd.merge(test, weather, on = "일자", how = "left")
# train_merge.describe()
train.drop(["조식메뉴"], inplace = True, axis = 1)
test.drop(["조식메뉴"], inplace = True, axis = 1)

In [1545]:
train_merge = train
test_merge = test

In [1546]:
import re
def reg_modi(s):
    s = re.sub(pattern = r'\[[\S]*\]', repl = " ", string = s)
    s = re.sub(pattern = r'\([\S]*\)', repl = " ", string = s)
    s = re.sub(pattern = r'\s\s+', repl = " ", string = s)
    s = re.sub(pattern = r'\*', repl = " ", string = s)
    s = re.sub(pattern = r' $', repl = "", string = s)
    if len(s) < 15 or "개발의" in s or "가정의" in s: # issue
        return "X"
    return s

for menu in ["중식메뉴", "석식메뉴"]:
    train_merge[menu] = train_merge[menu].apply(reg_modi)
    test_merge[menu] = test_merge[menu].apply(reg_modi)
train_merge.head()

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식메뉴,석식메뉴,중식계,석식계
0,2016-02-01,월,2601,50,150,238,0.0,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0
1,2016-02-02,화,2601,50,173,319,0.0,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0
2,2016-02-03,수,2601,56,180,111,0.0,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0
3,2016-02-04,목,2601,104,220,355,0.0,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0
4,2016-02-05,금,2601,278,181,34,0.0,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0


In [1547]:
# del outliers
#train_merge = train_merge[train_merge["중식계"] > 0]
#train_merge = train_merge[train_merge["석식계"] > 0]

# keep submission source
submission = test_merge[["일자"]]

In [1548]:
day_lunch = train_merge[["일자", "중식메뉴", "중식계", "요일"]]
day_dinner = train_merge[["일자", "석식메뉴", "석식계", "요일"]]
day_lunch_group = day_lunch.groupby("요일").mean().reset_index()
day_dinner_group = day_dinner.groupby("요일").mean().reset_index()
#train_group_by = train_merge.groupby('요일').mean()

In [1549]:
lunch_mapping = {k : v for k, v in zip(day_lunch_group["요일"], day_lunch_group["중식계"])}
lunch_mapping

{'금': 653.6099585062241,
 '목': 823.9918032786885,
 '수': 905.2133891213389,
 '월': 1144.331950207469,
 '화': 925.6208333333333}

In [1550]:
dinner_mapping = {k : v for k, v in zip(day_dinner_group["요일"], day_dinner_group["석식계"])}
dinner_mapping

{'금': 404.9792531120332,
 '목': 480.4016393442623,
 '수': 363.6150627615063,
 '월': 538.9336099585062,
 '화': 520.1291666666667}

In [1551]:
def dic_mapping_lunch(s):
    return dinner_mapping[s]

def dic_mapping_dinner(s):
    return dinner_mapping[s]

train_merge["중식요일"] = train_merge["요일"].apply(dic_mapping_lunch)
train_merge["석식요일"] = train_merge["요일"].apply(dic_mapping_dinner)

test_merge["중식요일"] = train_merge["요일"].apply(dic_mapping_lunch)
test_merge["석식요일"] = train_merge["요일"].apply(dic_mapping_dinner)

train_merge

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일
0,2016-02-01,월,2601,50,150,238,0.0,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.933610,538.933610
1,2016-02-02,화,2601,50,173,319,0.0,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167
2,2016-02-03,수,2601,56,180,111,0.0,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063
3,2016-02-04,목,2601,104,220,355,0.0,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639
4,2016-02-05,금,2601,278,181,34,0.0,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,2021-01-20,수,2983,75,198,4,391.0,쌀밥/흑미밥/찰현미밥 아욱국 수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치...,김치볶음밥 미니쫄우동 맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치,1093.0,421.0,363.615063,363.615063
1201,2021-01-21,목,2983,92,231,462,351.0,쌀밥/수수밥/찰현미밥 냉이된장국 동파육 봄동전 청경채/버섯숙회 초장 무생채 양상추샐...,흑미밥 쇠고기무국 삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치,832.0,353.0,480.401639,480.401639
1202,2021-01-22,금,2983,255,248,1,303.0,전주비빔밥 약고추장 계란파국 요거닭 올방개묵무침 파프리카해초무침 포기김치 양상추샐러...,흑미밥 수제비국 수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치,579.0,217.0,404.979253,404.979253
1203,2021-01-25,월,2983,107,153,616,327.0,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 돈육간장불고기 깐풍연근 연두부 달래양념장 봄...,흑미밥 열무된장국 장어강정 데리야끼소스 깻잎쌈 생강채 오이선 포기김치,1145.0,502.0,538.933610,538.933610


In [1552]:
train_merge.drop(["요일"], inplace = True, axis = 1)
test_merge.drop(["요일"], inplace = True, axis = 1)
train_merge.head(100)

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일
0,2016-02-01,2601,50,150,238,0.0,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.933610,538.933610
1,2016-02-02,2601,50,173,319,0.0,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167
2,2016-02-03,2601,56,180,111,0.0,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063
3,2016-02-04,2601,104,220,355,0.0,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639
4,2016-02-05,2601,278,181,34,0.0,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253
...,...,...,...,...,...,...,...,...,...,...,...,...
95,2016-06-23,2637,53,271,346,0.0,쌀밥/잡곡밥 쇠고기무국 매콤낙지볶음 메추리알조림 숙주나물 포기김치,작은밥/맑은국 토마토스파게티 양송이스프 쇠고기퀘사디아 오이피클/음료 깍두기,877.0,588.0,480.401639,480.401639
96,2016-06-24,2637,169,284,52,0.0,쌀밥/잡곡밥 도토리묵냉국 목살데리야끼구이 골뱅이무침&소면 간장깻잎지 포기김치,쌀밥/잡곡밥 순두부찌개 해물누룽지탕 버섯메밀전 미역줄기 포기김치,805.0,475.0,404.979253,404.979253
97,2016-06-27,2637,96,227,300,0.0,쌀밥/잡곡밥 배추된장국 훈제오리구이 실곤약초무침 쌈무/야채스틱 포기김치,쌀밥/잡곡밥 김치찌개 모둠장조림 주꾸미초무침 파래김 양념장 깍두기,1142.0,609.0,538.933610,538.933610
98,2016-06-28,2637,63,253,226,0.0,쌀밥/잡곡밥 육개장 고등어구이 닭가슴살냉채 마늘쫑무침 포기김치,볶음밥/짜장소스 계란국 탕수만두 멕시칸샐러드 슈크림/단무지 포기김치,917.0,535.0,520.129167,520.129167


In [1553]:
!pip install lxml
import requests
from urllib import parse
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

def getHoliday(year: int) -> pd.DataFrame:
    url = "http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo"
    api_key_utf8 = "I2Mv6nH4CWf3VYYi83oy0EJO2upQgmp2GDI%2Fdbxh9ekXkRD4EjMSq0gWZxTy%2FNtAxnjV7soTkOW2xs1qYMdjBg%3D%3D"
    api_key_decode = parse.unquote(api_key_utf8)

    params = {
        "ServiceKey": api_key_decode,
        "solYear": year,
        "numOfRows": 100
    }

    response = requests.get(url, params=params)
    xml = BeautifulSoup(response.text, "html.parser")
    items = xml.find('items')
    item_list = []
    for item in items:
        item_dict = {
            "공휴일": 1, # 무슨 공휴일인지는 없어도 될 듯 하다
            "일자": datetime.strptime(item.find("locdate").text.strip(), '%Y%m%d')
        }
        item_list.append(item_dict)

    return pd.DataFrame(item_list)



In [1554]:
holidays = pd.concat([getHoliday(year) for year in range(2016, 2022)])
holidays.describe()

Unnamed: 0,공휴일
count,103.0
mean,1.0
std,0.0
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [1555]:
from datetime import datetime, timedelta

def one_day_before(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = -1))

def one_day_after(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = 1))

holidays["공휴일전날"] = holidays["일자"].apply(one_day_before)
holidays["공휴일다음날"] = holidays["일자"].apply(one_day_after)

In [1556]:
new_holidays = pd.concat([holidays["공휴일전날"], holidays["공휴일다음날"]])

new_holidays = pd.DataFrame(new_holidays, columns = ["일자"])

new_holidays["휴일전후날여부"] = 1
new_holidays.head(130)

Unnamed: 0,일자,휴일전후날여부
0,2015-12-31,1
1,2016-02-06,1
2,2016-02-07,1
3,2016-02-08,1
4,2016-02-09,1
...,...,...
5,2017-03-02,1
6,2017-05-04,1
7,2017-05-06,1
8,2017-05-10,1


In [1557]:
train_merge = pd.merge(train_merge, new_holidays, how = 'left', on  = '일자')
test_merge = pd.merge(test_merge, new_holidays, how = 'left', on = '일자')

train_merge["휴일전후날여부"] = train_merge["휴일전후날여부"].fillna(0)
test_merge["휴일전후날여부"] = test_merge["휴일전후날여부"].fillna(0)

train_merge.head(20)

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일,휴일전후날여부
0,2016-02-01,2601,50,150,238,0.0,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.93361,538.93361,0.0
1,2016-02-02,2601,50,173,319,0.0,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167,0.0
2,2016-02-03,2601,56,180,111,0.0,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063,0.0
3,2016-02-04,2601,104,220,355,0.0,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639,0.0
4,2016-02-05,2601,278,181,34,0.0,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253,0.0
5,2016-02-11,2601,383,143,417,0.0,쌀밥/잡곡밥 시래기국 훈제오리구이 도토리묵무침 쌈무/양파절임 요구르트 포기김치,참치회덮밥 맑은국 군만두 과일샐러드 락교 포기김치,1045.0,550.0,480.401639,480.401639,1.0
6,2016-02-12,2601,389,156,93,0.0,쌀밥/잡곡밥 꽃게탕 돈육굴소스볶음 옥수수전 유채나물 요구르트 포기김치,쌀밥/잡곡밥 김치콩나물국 미니함박 어묵볶음 물파래무침 깍두기,909.0,598.0,404.979253,404.979253,0.0
7,2016-02-15,2601,87,204,482,0.0,쌀밥/잡곡밥 시금치국 닭감자조림 연두부 양념장 콩나물무침 요구르트 포기김치,쌀밥/잡곡밥 홍합미역국 등갈비김치찜 임연수구이 브로컬리초장 포기김치,1268.0,672.0,538.93361,538.93361,0.0
8,2016-02-16,2601,72,236,526,0.0,쌀밥/잡곡밥 쇠고기무국 탕수어 오징어숙회무침 취나물 요구르트 포기김치,쌀밥/잡곡밥 된장찌개 쇠불고기 해파리겨자채 봄동무침 포기김치,1014.0,523.0,520.129167,520.129167,0.0
9,2016-02-17,2601,78,250,23,0.0,쌀밥/잡곡밥 냉이된장국 쇠고기장조림 통도라지구이 치커리무침 요구르트 포기김치,볶음밥 자장소스 맑은국 새우또띠아 쨔샤이무침 요플레 포기김치,916.0,588.0,363.615063,363.615063,0.0


In [1558]:
train_merge = pd.get_dummies(train_merge, columns = ["휴일전후날여부"], prefix = '휴일전후날여부')
test_merge = pd.get_dummies(test_merge, columns = ["휴일전후날여부"], prefix = '휴일전후날여부')

train_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0
0,2016-02-01,2601,50,150,238,0.0,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.93361,538.93361,1,0
1,2016-02-02,2601,50,173,319,0.0,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167,1,0
2,2016-02-03,2601,56,180,111,0.0,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063,1,0
3,2016-02-04,2601,104,220,355,0.0,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639,1,0
4,2016-02-05,2601,278,181,34,0.0,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253,1,0


In [1559]:
train_merge["본사실근무자수"] = train_merge["본사정원수"] - train_merge["본사휴가자수"] - train_merge["본사출장자수"] - train_merge["현본사소속재택근무자수"]
test_merge["본사실근무자수"] = test_merge["본사정원수"] - test_merge["본사휴가자수"] - test_merge["본사출장자수"] - test_merge["현본사소속재택근무자수"]

#train_merge["중식비율"] = train_merge["중식계"] / train_merge["본사실근무자수"]
#test_merge["중식비율"] = test_merge["중식계"] / test_merge["본사실근무자수"]

#train_merge["석식비율"] = train_merge["석식계"] / train_merge["본사실근무자수"]
#test_merge["석식비율"] = test_merge["석식계"] / test_merge["본사실근무자수"]

del_features = ["본사정원수", "본사휴가자수", "본사출장자수", "현본사소속재택근무자수"]
train_merge.drop(del_features, inplace = True, axis = 1)
test_merge.drop(del_features, inplace = True, axis = 1)

In [1560]:
#train_merge = pd.get_dummies(train_merge, columns = ["요일"], prefix = '요일')
#test_merge = pd.get_dummies(test_merge, columns = ["요일"], prefix = '요일')
train_merge.head()

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수
0,2016-02-01,238,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.93361,538.93361,1,0,2401.0
1,2016-02-02,319,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167,1,0,2378.0
2,2016-02-03,111,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063,1,0,2365.0
3,2016-02-04,355,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639,1,0,2277.0
4,2016-02-05,34,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253,1,0,2142.0


In [1561]:
import fasttext.util
#fasttext.util.download_model('ko', if_exists = 'ignore')  # English
#ft_model = fasttext.load_model('cc.ko.300.bin')

In [1562]:
# # word2vec
# TRAIN_W2V = True
# model = 0
# try:
#     model = Word2Vec.load('food_embedding.model')
#     print("Model loaded")
# except:
#     if TRAIN_W2V:
#         print("Training w2v")
#         model = Word2Vec(sentences= foods, size = 300, window = 4, min_count = 0, workers = 4)
#         model.save('food_embedding.model')
#     else:
#         print("Model loading failed. Do not train.")

In [1563]:
menus = ["중식메뉴", "석식메뉴"]
# Convert the column of stringified dicts to dicts
# skip this line, if the column contains dicts
# for menu in menus:
#     train_merge = pd.concat([train_merge, train_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)
#     test_merge = pd.concat([test_merge, test_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)

# train_merge

In [1564]:
train_merge.head(100)

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수
0,2016-02-01,238,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,538.933610,538.933610,1,0,2401.0
1,2016-02-02,319,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,520.129167,520.129167,1,0,2378.0
2,2016-02-03,111,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,363.615063,363.615063,1,0,2365.0
3,2016-02-04,355,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,480.401639,480.401639,1,0,2277.0
4,2016-02-05,34,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,404.979253,404.979253,1,0,2142.0
...,...,...,...,...,...,...,...,...,...,...,...
95,2016-06-23,346,쌀밥/잡곡밥 쇠고기무국 매콤낙지볶음 메추리알조림 숙주나물 포기김치,작은밥/맑은국 토마토스파게티 양송이스프 쇠고기퀘사디아 오이피클/음료 깍두기,877.0,588.0,480.401639,480.401639,1,0,2313.0
96,2016-06-24,52,쌀밥/잡곡밥 도토리묵냉국 목살데리야끼구이 골뱅이무침&소면 간장깻잎지 포기김치,쌀밥/잡곡밥 순두부찌개 해물누룽지탕 버섯메밀전 미역줄기 포기김치,805.0,475.0,404.979253,404.979253,1,0,2184.0
97,2016-06-27,300,쌀밥/잡곡밥 배추된장국 훈제오리구이 실곤약초무침 쌈무/야채스틱 포기김치,쌀밥/잡곡밥 김치찌개 모둠장조림 주꾸미초무침 파래김 양념장 깍두기,1142.0,609.0,538.933610,538.933610,1,0,2314.0
98,2016-06-28,226,쌀밥/잡곡밥 육개장 고등어구이 닭가슴살냉채 마늘쫑무침 포기김치,볶음밥/짜장소스 계란국 탕수만두 멕시칸샐러드 슈크림/단무지 포기김치,917.0,535.0,520.129167,520.129167,1,0,2321.0


In [1565]:
when = ["중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈", 
             "나머지"]

def dish_split_lunch(s):
    s = s.split()
    global sub_menus
    l = {"중식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["중식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

def dish_split_dinner(s):
    s = s.split()
    global sub_menus
    l = {"석식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["석식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

train_merge["중식메뉴"] = train_merge["중식메뉴"].apply(dish_split_lunch)
test_merge["중식메뉴"] = test_merge["중식메뉴"].apply(dish_split_lunch)

train_merge["석식메뉴"] = train_merge["석식메뉴"].apply(dish_split_dinner)
test_merge["석식메뉴"] = test_merge["석식메뉴"].apply(dish_split_dinner)

train_merge

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식메뉴,석식메뉴,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수
0,2016-02-01,238,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,1039.0,331.0,538.933610,538.933610,1,0,2401.0
1,2016-02-02,319,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,867.0,560.0,520.129167,520.129167,1,0,2378.0
2,2016-02-03,111,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,1017.0,573.0,363.615063,363.615063,1,0,2365.0
3,2016-02-04,355,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,978.0,525.0,480.401639,480.401639,1,0,2277.0
4,2016-02-05,34,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,925.0,330.0,404.979253,404.979253,1,0,2142.0
...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,4,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,1093.0,421.0,363.615063,363.615063,1,0,2319.0
1202,2021-01-21,462,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,832.0,353.0,480.401639,480.401639,1,0,2309.0
1203,2021-01-22,1,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,579.0,217.0,404.979253,404.979253,1,0,2177.0
1204,2021-01-25,616,{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두...,1145.0,502.0,538.933610,538.933610,1,0,2396.0


In [1566]:
menus = ["중식메뉴", "석식메뉴"]
# Convert the column of stringified dicts to dicts
# skip this line, if the column contains dicts
for menu in menus:
    train_merge = pd.concat([train_merge, train_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)
    test_merge = pd.concat([test_merge, test_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)

train_merge

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈,중식/나머지,석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈,석식/나머지
0,2016-02-01,238,1039.0,331.0,538.933610,538.933610,1,0,2401.0,쌀밥/잡곡밥 오징어찌개 요구르트,쇠불고기 계란찜 청포묵무침 포기김치,쌀밥/잡곡밥 육개장,자반고등어구이 두부조림 건파래무침 포기김치
1,2016-02-02,319,867.0,560.0,520.129167,520.129167,1,0,2378.0,쌀밥/잡곡밥 김치찌개 요구르트,가자미튀김 모둠소세지구이 마늘쫑무침 배추겉절이,콩나물밥 어묵국,양념장 유산슬 아삭고추무침 바나나 포기김치
2,2016-02-03,111,1017.0,573.0,363.615063,363.615063,1,0,2365.0,카레덮밥 팽이장국 요구르트,치킨핑거 쫄면야채무침 견과류조림 포기김치,쌀밥/잡곡밥 청국장찌개,황태양념구이 고기전 새송이버섯볶음 포기김치
3,2016-02-04,355,978.0,525.0,480.401639,480.401639,1,0,2277.0,쌀밥/잡곡밥 쇠고기무국 요구르트,주꾸미볶음 부추전 시금치나물 포기김치,미니김밥 우동 멕시칸샐러드 무피클,겨자장 군고구마 포기김치
4,2016-02-05,34,925.0,330.0,404.979253,404.979253,1,0,2142.0,쌀밥/잡곡밥 떡국 요구르트,돈육씨앗강정 우엉잡채 청경채무침 포기김치,쌀밥/잡곡밥 차돌박이찌개,닭갈비 감자소세지볶음 콩나물무침 포기김치
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,4,1093.0,421.0,363.615063,363.615063,1,0,2319.0,쌀밥/흑미밥/찰현미밥 아욱국 양상추샐러드,수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치 오리엔탈D,김치볶음밥 미니쫄우동,맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치
1202,2021-01-21,462,832.0,353.0,480.401639,480.401639,1,0,2309.0,쌀밥/수수밥/찰현미밥 냉이된장국 무생채 양상추샐러드,동파육 봄동전 청경채/버섯숙회 초장 파인D,흑미밥 쇠고기무국,삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치
1203,2021-01-22,1,579.0,217.0,404.979253,404.979253,1,0,2177.0,전주비빔밥 계란파국 양상추샐러드,약고추장 요거닭 올방개묵무침 파프리카해초무침 포기김치 자몽D,흑미밥 수제비국,수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치
1204,2021-01-25,616,1145.0,502.0,538.933610,538.933610,1,0,2396.0,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 양상추샐러드,돈육간장불고기 깐풍연근 연두부 달래양념장 봄동겉절이/양념김 오렌지D,흑미밥 열무된장국 깻잎쌈,장어강정 데리야끼소스 생강채 오이선 포기김치


In [1567]:
when = ["중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈", 
             "나머지"]

def embedding(s):
    tmp = [np.array(ft_model[i]) for i in s.split()]
    if tmp:
        ans = sum(tmp)/len(tmp)
        return ans
    else:
        return np.zeros((300))

for w in when:
    for sm in sub_menus:
        train_merge[w + "/" + sm] = train_merge[w + "/" + sm].apply(embedding)
        test_merge[w + "/" + sm] = test_merge[w + "/" + sm].apply(embedding)

In [1568]:
train_merge

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈,중식/나머지,석식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈,석식/나머지
0,2016-02-01,238,1039.0,331.0,538.933610,538.933610,1,0,2401.0,"[-0.009021566, -0.0059680566, -0.0029994827, 0...","[-0.01597105, -0.028787985, 0.008258557, 0.015...","[-0.011328502, -0.03489685, -0.051722296, 0.02...","[-0.008463055, -0.0067860894, -0.0007102494, 0..."
1,2016-02-02,319,867.0,560.0,520.129167,520.129167,1,0,2378.0,"[-0.022630759, -0.021249453, -0.01211046, 0.06...","[-0.00024044956, -0.0059660394, 0.03746883, 0....","[0.014477678, -0.012611352, -0.012776874, 0.00...","[-0.0036032118, -0.046703767, -0.026835075, 0...."
2,2016-02-03,111,1017.0,573.0,363.615063,363.615063,1,0,2365.0,"[-0.020261256, -0.012177922, 0.0025198802, 0.0...","[-0.018608566, -0.0053525334, 0.011916319, -0....","[0.008841409, 0.0024761423, -0.04665175, -0.00...","[-0.0007222025, -0.0022683793, 0.011267977, 0...."
3,2016-02-04,355,978.0,525.0,480.401639,480.401639,1,0,2277.0,"[-0.01792589, -0.013907288, 0.0042366125, 0.04...","[-0.015220439, -0.012342174, -0.00089789205, 0...","[0.046877716, 0.073178165, 0.017126754, -0.018...","[-0.018222265, -0.03741447, -0.045719277, 0.02..."
4,2016-02-05,34,925.0,330.0,404.979253,404.979253,1,0,2142.0,"[-0.024799675, -0.05777968, -0.054803044, 0.06...","[-0.009853404, -0.0061942944, 0.012871876, -0....","[-0.00211759, -0.0013612653, 0.009182181, 0.01...","[-0.017457742, -0.030443257, -0.018935578, 0.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,4,1093.0,421.0,363.615063,363.615063,1,0,2319.0,"[-0.013167973, 0.0015584858, 0.027612226, 0.00...","[-0.011329092, -0.010711754, -0.017086413, 0.0...","[-0.009000048, -0.01021237, -0.01420886, -0.00...","[-0.012123312, -0.003679589, -0.0027083256, 0...."
1202,2021-01-21,462,832.0,353.0,480.401639,480.401639,1,0,2309.0,"[-0.0070627416, -0.0021746943, 0.035651226, 0....","[0.044242457, -0.03205401, -0.027028954, 0.021...","[-0.004132853, -0.018846883, 0.07740514, 0.000...","[0.000657212, -0.011905553, 0.009328121, 0.007..."
1203,2021-01-22,1,579.0,217.0,404.979253,404.979253,1,0,2177.0,"[0.0094711585, -0.02476763, 0.031836003, 0.013...","[-0.009193643, -0.013219789, 0.008184872, -0.0...","[0.015309716, -0.029849533, 0.10077386, 0.0073...","[-0.00093624345, -0.0021879587, 0.006673075, 0..."
1204,2021-01-25,616,1145.0,502.0,538.933610,538.933610,1,0,2396.0,"[-0.0077912374, 0.00022890356, 0.039123062, 0....","[-0.00042859884, -0.00058587705, -0.0031435348...","[0.006461442, -0.0016984638, 0.043237325, 0.01...","[-0.0227213, -0.0023810572, 0.0100454, 0.01105..."


In [1569]:
# seperate each vecs to new cols
when = ["중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈", 
             "나머지"]
for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge[tmp] = pd.DataFrame(train_merge[tmp_col].to_list(), columns = tmp)
        test_merge[tmp] = pd.DataFrame(test_merge[tmp_col].to_list(), columns = tmp)
        train_merge.drop([tmp_col], inplace = True, axis = 1)
        test_merge.drop([tmp_col], inplace = True, axis = 1)

train_merge

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈0,...,석식/나머지290,석식/나머지291,석식/나머지292,석식/나머지293,석식/나머지294,석식/나머지295,석식/나머지296,석식/나머지297,석식/나머지298,석식/나머지299
0,2016-02-01,238,1039.0,331.0,538.933610,538.933610,1,0,2401.0,-0.009022,...,0.004170,0.010728,-0.017883,0.014925,0.024990,-0.011716,0.020715,0.000160,-0.004070,-0.001437
1,2016-02-02,319,867.0,560.0,520.129167,520.129167,1,0,2378.0,-0.022631,...,0.014311,0.025974,-0.019712,0.006741,0.058076,-0.035742,0.032093,-0.037256,-0.005234,0.035360
2,2016-02-03,111,1017.0,573.0,363.615063,363.615063,1,0,2365.0,-0.020261,...,-0.008009,0.025644,-0.021813,0.017591,0.017668,-0.014059,0.023668,-0.013707,-0.008314,-0.007854
3,2016-02-04,355,978.0,525.0,480.401639,480.401639,1,0,2277.0,-0.017926,...,0.004920,0.042633,-0.017416,0.040675,0.023234,0.010613,0.031908,0.035277,0.009209,-0.001624
4,2016-02-05,34,925.0,330.0,404.979253,404.979253,1,0,2142.0,-0.024800,...,-0.005859,0.020131,-0.003703,0.012683,0.020809,-0.028818,0.019825,-0.027615,0.009442,0.001985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,4,1093.0,421.0,363.615063,363.615063,1,0,2319.0,-0.013168,...,0.000293,0.022048,-0.014772,-0.023563,0.023621,0.006152,0.002051,-0.008594,-0.010062,0.004908
1202,2021-01-21,462,832.0,353.0,480.401639,480.401639,1,0,2309.0,-0.007063,...,0.010505,0.009282,-0.008891,0.001210,0.020745,-0.005849,0.009055,-0.005254,-0.007279,-0.002749
1203,2021-01-22,1,579.0,217.0,404.979253,404.979253,1,0,2177.0,0.009471,...,0.001280,0.016546,-0.009470,-0.020122,0.009086,0.004731,0.008766,-0.001272,-0.002164,-0.004649
1204,2021-01-25,616,1145.0,502.0,538.933610,538.933610,1,0,2396.0,-0.007791,...,-0.000551,0.017068,-0.017832,0.004498,0.022398,0.001995,0.008492,-0.008337,-0.010093,0.003049


In [1570]:
from sklearn.decomposition import PCA

# PCA 300 -> 50
pca_target = 50
pca = PCA(n_components = pca_target)
for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        target = [tmp_col + "_pca" + str(i) for i in range(pca_target)]
        train_merge[target] = pca.fit_transform(train_merge[tmp])
        test_merge[target] = pca.fit_transform(test_merge[tmp])
        
# del cols before PCA

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge.drop(tmp, inplace = True, axis = 1)
        test_merge.drop(tmp, inplace = True, axis = 1)
        
train_merge.head()

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca0,...,석식/나머지_pca40,석식/나머지_pca41,석식/나머지_pca42,석식/나머지_pca43,석식/나머지_pca44,석식/나머지_pca45,석식/나머지_pca46,석식/나머지_pca47,석식/나머지_pca48,석식/나머지_pca49
0,2016-02-01,238,1039.0,331.0,538.93361,538.93361,1,0,2401.0,-0.0275,...,0.011053,0.009614,0.00071,0.001419,-0.007934,0.001879,-0.002761,0.006137,-0.005588,-0.002539
1,2016-02-02,319,867.0,560.0,520.129167,520.129167,1,0,2378.0,0.155666,...,0.003902,-0.011935,-0.015337,0.011155,0.015432,-0.009664,-0.00919,0.014253,-0.013929,-0.00993
2,2016-02-03,111,1017.0,573.0,363.615063,363.615063,1,0,2365.0,-0.024361,...,-0.008679,-0.035094,-0.002906,-0.002235,-0.016471,-0.002469,-0.003526,0.015461,-0.017884,0.005483
3,2016-02-04,355,978.0,525.0,480.401639,480.401639,1,0,2277.0,-0.021216,...,-0.077297,-0.043759,-0.014366,0.079209,-0.039015,-0.084219,0.015986,0.020593,-0.01476,-0.010496
4,2016-02-05,34,925.0,330.0,404.979253,404.979253,1,0,2142.0,0.716497,...,0.006683,-0.003866,0.018666,0.015034,-0.004305,0.002678,0.011171,0.013965,0.016844,-0.000543


In [1571]:
from sklearn.preprocessing import RobustScaler

target = []

# for w in when:
#     tmp_col = w
#     target += [tmp_col + "_pca" + str(i) for i in range(pca_target)]
        
scaled = ["본사시간외근무명령서승인건수", 
          #"최저기온(°C)", "평균기온(°C)", "최고기온(°C)", "평균 풍속(m/s)", 
          #"최대 풍속(m/s)", "일강수량(mm)", 
          "본사실근무자수", "중식요일", "석식요일"] # + target
train_merge[scaled] = RobustScaler().fit_transform(train_merge[scaled].values)
test_merge[scaled] = RobustScaler().fit_transform(test_merge[scaled].values)

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + "_pca" + str(i) for i in range(pca_target)]
        train_merge[tmp] = RobustScaler().fit_transform(train_merge[tmp].values)
        test_merge[tmp] = RobustScaler().fit_transform(test_merge[tmp].values)

In [1572]:
train_merge

Unnamed: 0,일자,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca0,...,석식/나머지_pca40,석식/나머지_pca41,석식/나머지_pca42,석식/나머지_pca43,석식/나머지_pca44,석식/나머지_pca45,석식/나머지_pca46,석식/나머지_pca47,석식/나머지_pca48,석식/나머지_pca49
0,2016-02-01,-0.135045,1039.0,331.0,0.508311,0.508311,1,0,0.243767,0.275175,...,0.781986,0.744463,0.049982,0.135936,-0.597120,0.100504,-0.171094,0.442015,-0.432421,-0.210843
1,2016-02-02,0.045759,867.0,560.0,0.345007,0.345007,1,0,0.116343,1.231319,...,0.267233,-0.905811,-1.112188,0.905567,1.170538,-0.844445,-0.580975,1.053776,-1.070320,-0.891031
2,2016-02-03,-0.418527,1017.0,573.0,-1.014213,-1.014213,1,0,0.044321,0.291560,...,-0.638367,-2.679394,-0.211898,-0.152905,-1.242948,-0.255455,-0.219837,1.144810,-1.372845,0.527374
3,2016-02-04,0.126116,978.0,525.0,0.000000,0.000000,1,0,-0.443213,0.307978,...,-5.577554,-3.342967,-1.041840,6.285147,-2.948450,-6.947979,1.024262,1.531681,-1.133887,-0.943042
4,2016-02-05,-0.590402,925.0,330.0,-0.654993,-0.654993,1,0,-1.191136,4.158899,...,0.467409,-0.287924,1.350445,1.212152,-0.322580,0.165970,0.717238,1.032097,1.283173,-0.027130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,-0.657366,1093.0,421.0,-1.014213,-1.014213,1,0,-0.210526,0.278142,...,0.739541,1.264284,0.382012,0.000794,-0.184799,-0.961542,0.434689,-0.730755,-0.013455,-0.976263
1202,2021-01-21,0.364955,832.0,353.0,0.000000,0.000000,1,0,-0.265928,0.408865,...,0.328654,-0.482724,0.396323,0.715173,-0.422476,-0.032011,-0.785386,-0.619887,0.190642,-0.868560
1203,2021-01-22,-0.664062,579.0,217.0,-0.654993,-0.654993,1,0,-0.997230,0.264108,...,0.013462,0.521876,0.094031,0.602364,-0.534912,-0.708097,0.408065,-0.336409,-0.351600,-0.143974
1204,2021-01-25,0.708705,1145.0,502.0,0.508311,0.508311,1,0,0.216066,-0.257173,...,0.489270,0.559591,-0.478625,0.007169,0.793018,0.338856,0.969230,-1.158648,0.229460,-0.715571


In [1573]:

from sklearn.preprocessing import StandardScaler

# std scaling embedded vecs

# scaled_menus_train = {}
# scaled_menus_test = {}

# menus = ["중식메뉴", "석식메뉴"]

# for menu in menus:
#     scaled_menus_train[menu] = StandardScaler().fit_transform(train_merge[menu].to_list())
#     scaled_menus_test[menu] = StandardScaler().fit_transform(test_merge[menu].to_list())
#     for idx, val in enumerate(scaled_menus_train[menu]):
#         train_merge[menu][idx] = val
#     for idx, val in enumerate(scaled_menus_test[menu]):
#         test_merge[menu][idx] = val

In [1574]:
import pandas_profiling
from pandas_profiling import ProfileReport

# profile = train_merge[["본사시간외근무명령서승인건수", "중식계", "석식계", "최저기온(°C)", "평균기온(°C)", 
#                               "최고기온(°C)", "평균 풍속(m/s)", "최대 풍속(m/s)", "일강수량(mm)", "휴일전후날여부_0.0", "휴일전후날여부_1.0"]].profile_report()
# profile

In [1575]:
from datetime import datetime, timedelta

def date(s):
    return int(s.split("-")[2])

def month(s):
    return int(s.split("-")[1])

# train_merge["월"] = train_merge["일자"].apply(month)
train_merge["일"] = train_merge["일자"].apply(date)

# test_merge["월"] = test_merge["일자"].apply(month)
test_merge["일"] = test_merge["일자"].apply(date)

train_merge.drop(["일자"], inplace = True, axis = 1)
test_merge.drop(["일자"], inplace = True, axis = 1)
train_merge.head()

Unnamed: 0,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca0,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca1,...,석식/나머지_pca41,석식/나머지_pca42,석식/나머지_pca43,석식/나머지_pca44,석식/나머지_pca45,석식/나머지_pca46,석식/나머지_pca47,석식/나머지_pca48,석식/나머지_pca49,일
0,-0.135045,1039.0,331.0,0.508311,0.508311,1,0,0.243767,0.275175,0.368416,...,0.744463,0.049982,0.135936,-0.59712,0.100504,-0.171094,0.442015,-0.432421,-0.210843,1
1,0.045759,867.0,560.0,0.345007,0.345007,1,0,0.116343,1.231319,-0.834697,...,-0.905811,-1.112188,0.905567,1.170538,-0.844445,-0.580975,1.053776,-1.07032,-0.891031,2
2,-0.418527,1017.0,573.0,-1.014213,-1.014213,1,0,0.044321,0.29156,0.176607,...,-2.679394,-0.211898,-0.152905,-1.242948,-0.255455,-0.219837,1.14481,-1.372845,0.527374,3
3,0.126116,978.0,525.0,0.0,0.0,1,0,-0.443213,0.307978,0.288335,...,-3.342967,-1.04184,6.285147,-2.94845,-6.947979,1.024262,1.531681,-1.133887,-0.943042,4
4,-0.590402,925.0,330.0,-0.654993,-0.654993,1,0,-1.191136,4.158899,7.614822,...,-0.287924,1.350445,1.212152,-0.32258,0.16597,0.717238,1.032097,1.283173,-0.02713,5


In [1576]:
train_merge

Unnamed: 0,본사시간외근무명령서승인건수,중식계,석식계,중식요일,석식요일,휴일전후날여부_0.0,휴일전후날여부_1.0,본사실근무자수,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca0,중식/밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장/우유/두유/주스/르트/음료/러드/피클/생채/쌈_pca1,...,석식/나머지_pca41,석식/나머지_pca42,석식/나머지_pca43,석식/나머지_pca44,석식/나머지_pca45,석식/나머지_pca46,석식/나머지_pca47,석식/나머지_pca48,석식/나머지_pca49,일
0,-0.135045,1039.0,331.0,0.508311,0.508311,1,0,0.243767,0.275175,0.368416,...,0.744463,0.049982,0.135936,-0.597120,0.100504,-0.171094,0.442015,-0.432421,-0.210843,1
1,0.045759,867.0,560.0,0.345007,0.345007,1,0,0.116343,1.231319,-0.834697,...,-0.905811,-1.112188,0.905567,1.170538,-0.844445,-0.580975,1.053776,-1.070320,-0.891031,2
2,-0.418527,1017.0,573.0,-1.014213,-1.014213,1,0,0.044321,0.291560,0.176607,...,-2.679394,-0.211898,-0.152905,-1.242948,-0.255455,-0.219837,1.144810,-1.372845,0.527374,3
3,0.126116,978.0,525.0,0.000000,0.000000,1,0,-0.443213,0.307978,0.288335,...,-3.342967,-1.041840,6.285147,-2.948450,-6.947979,1.024262,1.531681,-1.133887,-0.943042,4
4,-0.590402,925.0,330.0,-0.654993,-0.654993,1,0,-1.191136,4.158899,7.614822,...,-0.287924,1.350445,1.212152,-0.322580,0.165970,0.717238,1.032097,1.283173,-0.027130,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,-0.657366,1093.0,421.0,-1.014213,-1.014213,1,0,-0.210526,0.278142,-0.285479,...,1.264284,0.382012,0.000794,-0.184799,-0.961542,0.434689,-0.730755,-0.013455,-0.976263,20
1202,0.364955,832.0,353.0,0.000000,0.000000,1,0,-0.265928,0.408865,-0.320067,...,-0.482724,0.396323,0.715173,-0.422476,-0.032011,-0.785386,-0.619887,0.190642,-0.868560,21
1203,-0.664062,579.0,217.0,-0.654993,-0.654993,1,0,-0.997230,0.264108,-0.327016,...,0.521876,0.094031,0.602364,-0.534912,-0.708097,0.408065,-0.336409,-0.351600,-0.143974,22
1204,0.708705,1145.0,502.0,0.508311,0.508311,1,0,0.216066,-0.257173,0.395537,...,0.559591,-0.478625,0.007169,0.793018,0.338856,0.969230,-1.158648,0.229460,-0.715571,25


In [1577]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor

import warnings

warnings.simplefilter(action = 'ignore')


train_target_lunch = train_merge['중식계']
train_target_dinner = train_merge['석식계']

lunch_features = [s for s in train_merge.columns if "중식" in s and "중식계" not in s]
dinner_features = [s for s in train_merge.columns if "석식" in s and "석식계" not in s] + ['본사시간외근무명령서승인건수']

train_lunch_features = train_merge.drop(['중식계', '석식계'] + dinner_features, axis=1)
train_dinner_features = train_merge.drop(['중식계', '석식계'] + lunch_features, axis=1)

test_lunch_features = test_merge.drop(dinner_features, axis=1)
test_dinner_features = test_merge.drop(lunch_features, axis=1)

In [1578]:
lunch_xgb = XGBRegressor(objective='reg:squarederror', eval_metric = 'mae')
dinner_xgb = XGBRegressor(objective='reg:squarederror', eval_metric = 'mae')

lunch_xgb_params = {
    'learning_rate': [0.01 * i for i in range(3, 7, 2)],
    'booster': ['gbtree', 'dart'],
    'max_depth' : [5, 6],
    'subsample' : [0.5, 0.7],
    'n_estimators' : [100, 300],
}

dinner_xgb_params = {
    'learning_rate': [0.01 * i for i in range(3, 7, 2)],
    'booster': ['gbtree', 'dart'],
    'max_depth' : [5, 6],
    'subsample' : [0.5, 0.7],
    'n_estimators' : [100, 300],
}

lunch_xgb_model = GridSearchCV(lunch_xgb, lunch_xgb_params, scoring = 'neg_mean_absolute_error', cv = 5, verbose = 4)
dinner_xgb_model = GridSearchCV(dinner_xgb, dinner_xgb_params, scoring = 'neg_mean_absolute_error', cv = 5, verbose = 4)

In [1579]:
list(lunch_xgb.get_params().keys())

['objective',
 'base_score',
 'booster',
 'colsample_bylevel',
 'colsample_bynode',
 'colsample_bytree',
 'gamma',
 'gpu_id',
 'importance_type',
 'interaction_constraints',
 'learning_rate',
 'max_delta_step',
 'max_depth',
 'min_child_weight',
 'missing',
 'monotone_constraints',
 'n_estimators',
 'n_jobs',
 'num_parallel_tree',
 'random_state',
 'reg_alpha',
 'reg_lambda',
 'scale_pos_weight',
 'subsample',
 'tree_method',
 'validate_parameters',
 'verbosity',
 'eval_metric']

In [1580]:
lunch_xgb_model.fit(train_lunch_features, train_target_lunch)
lunch_xgb_model_est = lunch_xgb_model.best_estimator_

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-121.738 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-97.674 total time=   0.2s
[CV 3/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-102.151 total time=   0.2s
[CV 4/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-103.195 total time=   0.2s
[CV 5/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-140.560 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.7;, score=-122.603 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.7;, score=-96.693 total time=   0.2s
[CV 3/5] END booster=gbtree, l

[CV 4/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.5;, score=-114.144 total time=   0.2s
[CV 5/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.5;, score=-136.340 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-100.719 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-87.118 total time=   0.2s
[CV 3/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-94.077 total time=   0.2s
[CV 4/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-114.333 total time=   0.2s
[CV 5/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-132.821 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=300, subsample=0.

[CV 3/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=100, subsample=0.7;, score=-98.254 total time=   1.1s
[CV 4/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=100, subsample=0.7;, score=-118.901 total time=   1.1s
[CV 5/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=100, subsample=0.7;, score=-130.116 total time=   1.2s
[CV 1/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-97.589 total time=   9.4s
[CV 2/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-87.454 total time=   9.4s
[CV 3/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-98.560 total time=   9.5s
[CV 4/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-122.391 total time=   9.6s
[CV 5/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-130.390

In [1581]:
xgb_preds_lunch = lunch_xgb_model_est.predict(test_lunch_features)

In [1582]:
print(f"best params : {lunch_xgb_model.best_params_}")
print(f"best estimator : {lunch_xgb_model.best_estimator_}")
print(f"best score : {-1 * lunch_xgb_model.best_score_}")

best params : {'booster': 'dart', 'learning_rate': 0.03, 'max_depth': 5, 'n_estimators': 300, 'subsample': 0.7}
best estimator : XGBRegressor(base_score=0.5, booster='dart', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, eval_metric='mae', gamma=0,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.03, max_delta_step=0, max_depth=5,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=300, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.7,
             tree_method='exact', validate_parameters=1, verbosity=None)
best score : 104.62283046339516


In [1583]:
submission["중식계"] = xgb_preds_lunch

In [1584]:
dinner_xgb_model.fit(train_dinner_features, train_target_dinner)
dinner_xgb_model_est = dinner_xgb_model.best_estimator_
xgb_preds_dinner = dinner_xgb_model_est.predict(test_dinner_features)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-94.053 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-54.287 total time=   0.2s
[CV 3/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-59.210 total time=   0.2s
[CV 4/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-60.005 total time=   0.2s
[CV 5/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.5;, score=-87.559 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.7;, score=-93.350 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.03, max_depth=5, n_estimators=100, subsample=0.7;, score=-53.305 total time=   0.2s
[CV 3/5] END booster=gbtree, learni

[CV 5/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.5;, score=-95.490 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-79.418 total time=   0.2s
[CV 2/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-53.880 total time=   0.2s
[CV 3/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-52.218 total time=   0.2s
[CV 4/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-61.419 total time=   0.2s
[CV 5/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=100, subsample=0.7;, score=-94.830 total time=   0.2s
[CV 1/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=300, subsample=0.5;, score=-76.763 total time=   0.8s
[CV 2/5] END booster=gbtree, learning_rate=0.05, max_depth=6, n_estimators=300, subsample=0.5;, s

[CV 5/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=100, subsample=0.7;, score=-94.970 total time=   1.1s
[CV 1/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-73.261 total time=   9.0s
[CV 2/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-56.775 total time=   8.9s
[CV 3/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-53.742 total time=   9.0s
[CV 4/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-62.622 total time=   9.0s
[CV 5/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.5;, score=-98.299 total time=   8.8s
[CV 1/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.7;, score=-74.552 total time=   9.0s
[CV 2/5] END booster=dart, learning_rate=0.05, max_depth=5, n_estimators=300, subsample=0.7;, score=-56.318 tot

In [1585]:
print(f"best params : {dinner_xgb_model.best_params_}")
print(f"best estimator : {dinner_xgb_model.best_estimator_}")
print(f"best score : {-1 * dinner_xgb_model.best_score_}")

best params : {'booster': 'dart', 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.7}
best estimator : XGBRegressor(base_score=0.5, booster='dart', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, eval_metric='mae', gamma=0,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.05, max_delta_step=0, max_depth=5,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.7,
             tree_method='exact', validate_parameters=1, verbosity=None)
best score : 67.60261525247843


In [1586]:
submission["석식계"] = xgb_preds_dinner
submission

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,1162.596924,299.518768
1,2021-01-28,966.274658,469.759064
2,2021-01-29,856.507812,352.16391
3,2021-02-01,873.128357,511.132843
4,2021-02-02,730.102783,532.375488
5,2021-02-03,859.12616,279.089783
6,2021-02-04,713.90332,495.032288
7,2021-02-05,1093.410522,285.066437
8,2021-02-08,930.55542,545.406982
9,2021-02-09,917.424927,474.89502


In [1587]:
submission.to_csv('submission_성동현.csv', index = False, encoding = "UTF-8")

In [1588]:
submission

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,1162.596924,299.518768
1,2021-01-28,966.274658,469.759064
2,2021-01-29,856.507812,352.16391
3,2021-02-01,873.128357,511.132843
4,2021-02-02,730.102783,532.375488
5,2021-02-03,859.12616,279.089783
6,2021-02-04,713.90332,495.032288
7,2021-02-05,1093.410522,285.066437
8,2021-02-08,930.55542,545.406982
9,2021-02-09,917.424927,474.89502
