In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import os
import logging

from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

In [4]:
# load embedding model

from gensim.models import fasttext

model = fasttext.load_facebook_model("wiki.ko/wiki.ko.bin")

In [61]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.4.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [62]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Thu_Jun_11_22:26:48_Pacific_Daylight_Time_2020
Cuda compilation tools, release 11.0, V11.0.194
Build cuda_11.0_bu.relgpu_drvr445TC445_37.28540450_0


In [63]:
weather = pd.read_csv("data/진주기상정보.csv", encoding = "cp949")
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

In [64]:
weather.rename(columns = {"일시" : "일자"}, inplace = True)
train.describe()

Unnamed: 0,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계
count,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0
mean,2807.815768,157.913693,241.142739,274.117012,43.506224,890.33444,461.772614
std,171.264404,144.190572,43.532298,246.239651,109.9374,209.505057,139.179202
min,2601.0,23.0,41.0,0.0,0.0,296.0,0.0
25%,2645.0,71.0,217.0,4.0,0.0,758.0,406.0
50%,2760.0,105.0,245.0,299.0,0.0,879.0,483.0
75%,2962.0,185.0,272.0,452.0,0.0,1032.0,545.0
max,3305.0,1224.0,378.0,1044.0,533.0,1459.0,905.0


In [65]:
train_merge = pd.merge(train, weather, on = "일자", how = "left")
test_merge = pd.merge(test, weather, on = "일자", how = "left")
train_merge.describe()

Unnamed: 0,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm)
count,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0
mean,2807.815768,157.913693,241.142739,274.117012,43.506224,890.33444,461.772614,8.032697,13.680463,20.120913,1.072402,3.312365,12.259833
std,171.264404,144.190572,43.532298,246.239651,109.9374,209.505057,139.179202,10.318334,9.326583,8.893296,0.538097,1.171812,11.916519
min,2601.0,23.0,41.0,0.0,0.0,296.0,0.0,-15.4,-9.1,-1.9,0.2,1.2,0.0
25%,2645.0,71.0,217.0,4.0,0.0,758.0,406.0,-1.0,5.7,12.6,0.7,2.5,7.233019
50%,2760.0,105.0,245.0,299.0,0.0,879.0,483.0,7.6,13.8,20.6,1.0,3.1,12.346667
75%,2962.0,185.0,272.0,452.0,0.0,1032.0,545.0,17.6,21.9,27.8,1.3,3.9,13.8768
max,3305.0,1224.0,378.0,1044.0,533.0,1459.0,905.0,26.8,30.6,37.9,4.4,8.7,139.6


In [66]:
train_merge.head()

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm)
0,2016-02-01,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 (쌀:국내산) 된장찌개 쥐...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 오징어찌개 쇠불고기 (쇠고기:호주산) 계란찜 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 육개장 자반고등어구이 두부조림 건파래무침 ...",1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768
1,2016-02-02,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 (쌀:국내산) 호박젓국찌...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무...","콩나물밥*양념장 (쌀,현미흑미:국내산) 어묵국 유산슬 (쇠고기:호주산) 아삭고추무...",867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768
2,2016-02-03,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 (쌀:국내산) 콩나물국...,"카레덮밥 (쌀,현미흑미:국내산) 팽이장국 치킨핑거 (닭고기:국내산) 쫄면야채무침 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 청국장찌개 황태양념구이 (황태:러시아산) 고기...",1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768
3,2016-02-04,목,2601,104,220,355,0.0,"모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 (쌀,닭:국내산) 근대국...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 쇠고기무국 주꾸미볶음 부추전 시금치나물 ...","미니김밥*겨자장 (쌀,현미흑미:국내산) 우동 멕시칸샐러드 군고구마 무피클 포...",978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768
4,2016-02-05,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 (쌀:국내산) 재첩국 방...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 떡국 돈육씨앗강정 (돼지고기:국내산) 우엉잡채...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 차돌박이찌개 (쇠고기:호주산) 닭갈비 (닭고기:...",925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768


In [67]:
!pip install lxml
import requests
from urllib import parse
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

def getHoliday(year: int) -> pd.DataFrame:
    url = "http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo"
    api_key_utf8 = "I2Mv6nH4CWf3VYYi83oy0EJO2upQgmp2GDI%2Fdbxh9ekXkRD4EjMSq0gWZxTy%2FNtAxnjV7soTkOW2xs1qYMdjBg%3D%3D"
    api_key_decode = parse.unquote(api_key_utf8)

    params = {
        "ServiceKey": api_key_decode,
        "solYear": year,
        "numOfRows": 100
    }

    response = requests.get(url, params=params)
    xml = BeautifulSoup(response.text, "html.parser")
    items = xml.find('items')
    item_list = []
    for item in items:
        item_dict = {
            "공휴일": 1, # 무슨 공휴일인지는 없어도 될 듯 하다
            "일자": datetime.strptime(item.find("locdate").text.strip(), '%Y%m%d')
        }
        item_list.append(item_dict)

    return pd.DataFrame(item_list)



In [68]:
holidays = pd.concat([getHoliday(year) for year in range(2016, 2022)])
holidays.describe()

Unnamed: 0,공휴일
count,103.0
mean,1.0
std,0.0
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [69]:
from datetime import datetime, timedelta

def one_day_before(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = -1))

def one_day_after(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = 1))

holidays["공휴일전날"] = holidays["일자"].apply(one_day_before)
holidays["공휴일다음날"] = holidays["일자"].apply(one_day_after)

In [70]:
new_holidays = pd.concat([holidays["공휴일전날"], holidays["공휴일다음날"]])

new_holidays = pd.DataFrame(new_holidays, columns = ["일자"])

new_holidays["휴일전후날여부"] = 1
new_holidays.head(110)

Unnamed: 0,일자,휴일전후날여부
0,2015-12-31,1
1,2016-02-06,1
2,2016-02-07,1
3,2016-02-08,1
4,2016-02-09,1
...,...,...
2,2016-02-09,1
3,2016-02-10,1
4,2016-02-11,1
5,2016-03-02,1


In [71]:
train_merge = pd.merge(train_merge, new_holidays, how = 'left', on  = '일자')
test_merge = pd.merge(test_merge, new_holidays, how = 'left', on = '일자')

train_merge["휴일전후날여부"] = train_merge["휴일전후날여부"].fillna(0)
test_merge["휴일전후날여부"] = test_merge["휴일전후날여부"].fillna(0)

# keep submission source
submission = test_merge[["일자"]]

train_merge.drop(["일자"], inplace = True, axis = 1)
test_merge.drop(["일자"], inplace = True, axis = 1)
train_merge.head(50)

Unnamed: 0,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부
0,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 (쌀:국내산) 된장찌개 쥐...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 오징어찌개 쇠불고기 (쇠고기:호주산) 계란찜 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 육개장 자반고등어구이 두부조림 건파래무침 ...",1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768,0.0
1,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 (쌀:국내산) 호박젓국찌...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무...","콩나물밥*양념장 (쌀,현미흑미:국내산) 어묵국 유산슬 (쇠고기:호주산) 아삭고추무...",867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768,0.0
2,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 (쌀:국내산) 콩나물국...,"카레덮밥 (쌀,현미흑미:국내산) 팽이장국 치킨핑거 (닭고기:국내산) 쫄면야채무침 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 청국장찌개 황태양념구이 (황태:러시아산) 고기...",1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768,0.0
3,목,2601,104,220,355,0.0,"모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 (쌀,닭:국내산) 근대국...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 쇠고기무국 주꾸미볶음 부추전 시금치나물 ...","미니김밥*겨자장 (쌀,현미흑미:국내산) 우동 멕시칸샐러드 군고구마 무피클 포...",978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768,0.0
4,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 (쌀:국내산) 재첩국 방...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 떡국 돈육씨앗강정 (돼지고기:국내산) 우엉잡채...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 차돌박이찌개 (쇠고기:호주산) 닭갈비 (닭고기:...",925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768,0.0
5,목,2601,383,143,417,0.0,팬케익/찐빵 우유/두유/주스 계란후라이 견과류죽/쌀밥 (쌀:국내산) 감자찌개 ...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 시래기국 훈제오리구이 도토리묵무침 쌈무/양...","참치회덮밥 (쌀,현미흑미:국내산) 맑은국 군만두 과일샐러드 락교 포기김치 (...",1045.0,550.0,-3.0,6.1,14.4,1.0,3.7,13.8768,1.0
6,금,2601,389,156,93,0.0,모닝롤/야채샌드 우유/두유/주스 계란후라이 고구마죽/쌀밥 (쌀:국내산) 봄동된...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 꽃게탕 돈육굴소스볶음 옥수수전 유채나물 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치콩나물국 미니함박 어묵볶음 물파래무침 ...",909.0,598.0,5.8,9.2,14.4,1.1,3.1,62.8,0.0
7,월,2601,87,204,482,0.0,모닝롤/치즈프레즐 우유/두유/주스 계란후라이 잣죽/쌀밥 (쌀:국내산) 민물새우...,쌀밥/잡곡밥 (쌀:국내산) 시금치국 닭감자조림 (닭고기:국내산) 연두부*양념장 ...,"쌀밥/잡곡밥 (쌀:국내산) 홍합미역국 등갈비김치찜 (돼지고기,김치:국내산) 임연수...",1268.0,672.0,-4.4,-1.5,2.3,1.8,4.7,13.8768,0.0
8,화,2601,72,236,526,0.0,모닝롤/마늘빵 우유/두유/주스 계란후라이 단호박죽/쌀밥 (쌀:국내산) 어묵국 ...,쌀밥/잡곡밥 (쌀:국내산) 쇠고기무국 (쇠고기:호주산) 탕수어 (동태:러시아산) 오...,쌀밥/잡곡밥 (쌀:국내산) 된장찌개 쇠불고기 (쇠고기:호주산) 해파리겨자채 봄동...,1014.0,523.0,-6.3,0.2,5.4,1.8,7.3,13.8768,0.0
9,수,2601,78,250,23,0.0,모닝롤/참치샌드 우유/두유/주스 계란후라이 흑임자죽/쌀밥 (쌀:국내산) 북어계...,쌀밥/잡곡밥 (쌀:국내산) 냉이된장국 쇠고기장조림 (쇠고기:호주산) 통도라지구이 ...,볶음밥*자장소스 (쌀:국내산) 맑은국 새우또띠아 쨔샤이무침 요플레 포기김치 ...,916.0,588.0,-3.4,1.7,8.1,1.1,3.4,13.8768,0.0


In [72]:
import re
def reg_modi(s):
    s = re.sub(pattern = r'\[[\S]*\]', repl = " ", string = s)
    s = re.sub(pattern = r'\([\S]*\)', repl = " ", string = s)
    s = re.sub(pattern = r'\s\s+', repl = " ", string = s)
    s = re.sub(pattern = r'\*', repl = " ", string = s)
    s = re.sub(pattern = r' $', repl = "", string = s)
    if len(s) < 15 or "개발의" in s or "가정의" in s: # issue
        return "메뉴없음"
    return s

for menu in ["조식메뉴", "중식메뉴", "석식메뉴"]:
    train_merge[menu] = train_merge[menu].apply(reg_modi)
    test_merge[menu] = test_merge[menu].apply(reg_modi)
train_merge.head()

Unnamed: 0,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부
0,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768,0.0
1,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768,0.0
2,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768,0.0
3,목,2601,104,220,355,0.0,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768,0.0
4,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768,0.0


In [91]:
def split_str(s):
    s = s.split(" ")
    return s

menus = ["조식메뉴", "중식메뉴", "석식메뉴"]
foods = []
for menu in menus:
    foods += train_merge[menu].apply(split_str).to_list()
    
foods = [list(set(f)) for f in foods]
foods

AttributeError: 'list' object has no attribute 'split'

In [90]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
    
TRAIN_W2V = True
model = 0
try:
    model = Word2Vec.load('food_embedding.model')
    print("Model loaded")
except:
    if TRAIN_W2V:
        print("Training w2v")
        model = Word2Vec(sentences= foods, size = 300, window = 4, min_count = 0, workers = 4)
        model.save('food_embedding.model')
    else:
        print("Model loading failed. Do not train.")

Model loaded


In [75]:
# w2v demo
model.wv.most_similar("된장찌개")

[('쌀밥/잡곡밥', 0.9999106526374817),
 ('포기김치', 0.999906063079834),
 ('쌀밥/흑미밥/찰현미밥', 0.9999012351036072),
 ('양상추샐러드', 0.9998995661735535),
 ('흑미밥', 0.9998968243598938),
 ('알타리김치', 0.999894917011261),
 ('배추겉절이', 0.9998926520347595),
 ('깍두기', 0.9998902082443237),
 ('열무김치', 0.9998860955238342),
 ('석박지', 0.9998859763145447)]

In [76]:
train_merge["본사실근무자수"] = train_merge["본사정원수"] - train_merge["본사휴가자수"] - train_merge["본사출장자수"]/2 - train_merge["현본사소속재택근무자수"]
test_merge["본사실근무자수"] = test_merge["본사정원수"] - test_merge["본사휴가자수"] - test_merge["본사출장자수"]/2 - test_merge["현본사소속재택근무자수"]

del_features = ["본사정원수", "본사휴가자수", "본사출장자수", "현본사소속재택근무자수"]
train_merge.drop(del_features, inplace = True, axis = 1)
test_merge.drop(del_features, inplace = True, axis = 1)

In [77]:
train_merge = pd.get_dummies(train_merge, columns = ["요일"], prefix = '요일')
test_merge = pd.get_dummies(test_merge, columns = ["요일"], prefix = '요일')
train_merge.head()

Unnamed: 0,본사시간외근무명령서승인건수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부,본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화
0,238,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768,0.0,2476.0,0,0,0,1,0
1,319,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768,0.0,2464.5,0,0,0,0,1
2,111,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768,0.0,2455.0,0,0,1,0,0
3,355,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768,0.0,2387.0,0,1,0,0,0
4,34,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768,0.0,2232.5,1,0,0,0,0


In [78]:
train_merge = pd.get_dummies(train_merge, columns = ["휴일전후날여부"], prefix = '휴일전후날여부')
test_merge = pd.get_dummies(test_merge, columns = ["휴일전후날여부"], prefix = '휴일전후날여부')

In [79]:
from sklearn.preprocessing import RobustScaler

scaled = ["본사시간외근무명령서승인건수", 
          "최저기온(°C)", "평균기온(°C)", "최고기온(°C)", "평균 풍속(m/s)", 
          "최대 풍속(m/s)", "일강수량(mm)", "본사실근무자수"]
train_merge[scaled] = RobustScaler().fit_transform(train_merge[scaled].values)
test_merge[scaled] = RobustScaler().fit_transform(test_merge[scaled].values)

In [80]:
train_merge

Unnamed: 0,본사시간외근무명령서승인건수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화,휴일전후날여부_0.0,휴일전후날여부_1.0
0,-0.135045,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.500000,0.857143,0.230311,0.011315,0,0,0,1,0,1,0
1,0.045759,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,0,0,0,0,1,1,0
2,-0.418527,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,0,0,1,0,0,1,0
3,0.126116,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.500000,0.285714,0.230311,-0.492221,0,1,0,0,0,1,0
4,-0.590402,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-0.759085,-0.774691,-0.726974,0.000000,-0.142857,0.230311,-1.366337,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,-0.657366,모닝롤/페퍼로니피자 우유/주스 계란후라이/찐계란 크루통크림스프/흑미밥 아귀지리 마늘...,쌀밥/흑미밥/찰현미밥 아욱국 수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치...,김치볶음밥 미니쫄우동 맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치,1093.0,421.0,-0.931359,-0.879630,-0.661184,-0.833333,-0.642857,-0.581028,-0.316832,0,0,1,0,0,1,0
1202,0.364955,모닝롤/생크림단팥빵 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 떡국 해물땡굴소스볶...,쌀밥/수수밥/찰현미밥 냉이된장국 동파육 봄동전 청경채/버섯숙회 초장 무생채 양상추샐...,흑미밥 쇠고기무국 삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치,832.0,353.0,-0.549125,-0.533951,-0.529605,-0.666667,-0.714286,-1.602501,-0.280057,0,1,0,0,0,1,0
1203,-0.664062,모닝롤/BLT샌드위치 우유/주스 계란후라이/찐계란 흑임자죽/흑미밥 바지락살국 두부조...,전주비빔밥 약고추장 계란파국 요거닭 올방개묵무침 파프리카해초무침 포기김치 양상추샐러...,흑미밥 수제비국 수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치,579.0,217.0,-0.134590,-0.422840,-0.713816,-0.833333,-1.285714,-1.677759,-0.978784,1,0,0,0,0,1,0
1204,0.708705,모닝롤/호박고구마오븐구이 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 감자양파국 분...,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 돈육간장불고기 깐풍연근 연두부 달래양념장 봄...,흑미밥 열무된장국 장어강정 데리야끼소스 깻잎쌈 생강채 오이선 포기김치,1145.0,502.0,-0.473755,-0.466049,-0.417763,0.000000,0.000000,-0.581028,-0.008487,0,0,0,1,0,1,0


In [52]:
when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료/르트/러드/피클/생채/쌈", 
             "무침/자반/잡채/나물/강정/튀김/가스",
             "조림/볶음/구이/고기/김치/두기/박지/절이", 
             "나머지"]

def dish_split_breakfast(s):
    s = s.split()
    global sub_menus
    l = {"조식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["조식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
        
    return l

def dish_split_lunch(s):
    s = s.split()
    global sub_menus
    l = {"중식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["중식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

def dish_split_dinner(s):
    s = s.split()
    global sub_menus
    l = {"석식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["석식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

train_merge["조식메뉴"] = train_merge["조식메뉴"].apply(dish_split_breakfast)
test_merge["조식메뉴"] = test_merge["조식메뉴"].apply(dish_split_breakfast)

In [53]:
train_merge["중식메뉴"] = train_merge["중식메뉴"].apply(dish_split_lunch)
test_merge["중식메뉴"] = test_merge["중식메뉴"].apply(dish_split_lunch)
train_merge["석식메뉴"] = train_merge["석식메뉴"].apply(dish_split_dinner)
test_merge["석식메뉴"] = test_merge["석식메뉴"].apply(dish_split_dinner)

In [81]:
train_merge.head()

Unnamed: 0,본사시간외근무명령서승인건수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화,휴일전후날여부_0.0,휴일전후날여부_1.0
0,-0.135045,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.5,0.857143,0.230311,0.011315,0,0,0,1,0,1,0
1,0.045759,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,0,0,0,0,1,1,0
2,-0.418527,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,0,0,1,0,0,1,0
3,0.126116,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.5,0.285714,0.230311,-0.492221,0,1,0,0,0,1,0
4,-0.590402,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-0.759085,-0.774691,-0.726974,0.0,-0.142857,0.230311,-1.366337,1,0,0,0,0,1,0


In [57]:
# menus = ["조식메뉴", "중식메뉴", "석식메뉴"]
# # Convert the column of stringified dicts to dicts
# # skip this line, if the column contains dicts
# for menu in menus:
#     train_merge = pd.concat([train_merge, train_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)
#     test_merge = pd.concat([test_merge, test_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)

KeyError: '조식메뉴'

In [82]:
train_merge

Unnamed: 0,본사시간외근무명령서승인건수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화,휴일전후날여부_0.0,휴일전후날여부_1.0
0,-0.135045,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.500000,0.857143,0.230311,0.011315,0,0,0,1,0,1,0
1,0.045759,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,0,0,0,0,1,1,0
2,-0.418527,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,0,0,1,0,0,1,0
3,0.126116,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.500000,0.285714,0.230311,-0.492221,0,1,0,0,0,1,0
4,-0.590402,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-0.759085,-0.774691,-0.726974,0.000000,-0.142857,0.230311,-1.366337,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,-0.657366,모닝롤/페퍼로니피자 우유/주스 계란후라이/찐계란 크루통크림스프/흑미밥 아귀지리 마늘...,쌀밥/흑미밥/찰현미밥 아욱국 수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치...,김치볶음밥 미니쫄우동 맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치,1093.0,421.0,-0.931359,-0.879630,-0.661184,-0.833333,-0.642857,-0.581028,-0.316832,0,0,1,0,0,1,0
1202,0.364955,모닝롤/생크림단팥빵 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 떡국 해물땡굴소스볶...,쌀밥/수수밥/찰현미밥 냉이된장국 동파육 봄동전 청경채/버섯숙회 초장 무생채 양상추샐...,흑미밥 쇠고기무국 삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치,832.0,353.0,-0.549125,-0.533951,-0.529605,-0.666667,-0.714286,-1.602501,-0.280057,0,1,0,0,0,1,0
1203,-0.664062,모닝롤/BLT샌드위치 우유/주스 계란후라이/찐계란 흑임자죽/흑미밥 바지락살국 두부조...,전주비빔밥 약고추장 계란파국 요거닭 올방개묵무침 파프리카해초무침 포기김치 양상추샐러...,흑미밥 수제비국 수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치,579.0,217.0,-0.134590,-0.422840,-0.713816,-0.833333,-1.285714,-1.677759,-0.978784,1,0,0,0,0,1,0
1204,0.708705,모닝롤/호박고구마오븐구이 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 감자양파국 분...,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 돈육간장불고기 깐풍연근 연두부 달래양념장 봄...,흑미밥 열무된장국 장어강정 데리야끼소스 깻잎쌈 생강채 오이선 포기김치,1145.0,502.0,-0.473755,-0.466049,-0.417763,0.000000,0.000000,-0.581028,-0.008487,0,0,0,1,0,1,0


In [89]:
def embedding(s):
    return [model.wv[ss] for ss in s.split()]

when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동/찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료/르트/러드/피클/생채/쌈", 
             "무침/자반/잡채/나물/강정/튀김/가스",
             "조림/볶음/구이/고기/김치/두기/박지/절이", 
             "나머지"]

# for w in when:
#     for sm in sub_menus:
#         train_merge[w + "/" + sm] = train_merge[w + "/" + sm].apply(embedding)
#         test_merge[w + "/" + sm] = test_merge[w + "/" + sm].apply(embedding)
        
for menu in menus:
    train_merge[menu] = train_merge[menu].apply(embedding)
    test_merge[menu] = test_merge[menu].apply(embedding)

KeyError: "word '모닝롤/연유버터베이글' not in vocabulary"

In [85]:
train_merge

Unnamed: 0,본사시간외근무명령서승인건수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화,휴일전후날여부_0.0,휴일전후날여부_1.0
0,-0.135045,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.500000,0.857143,0.230311,0.011315,0,0,0,1,0,1,0
1,0.045759,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,0,0,0,0,1,1,0
2,-0.418527,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포기김치,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,0,0,1,0,0,1,0
3,0.126116,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.500000,0.285714,0.230311,-0.492221,0,1,0,0,0,1,0
4,-0.590402,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-0.759085,-0.774691,-0.726974,0.000000,-0.142857,0.230311,-1.366337,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,-0.657366,모닝롤/페퍼로니피자 우유/주스 계란후라이/찐계란 크루통크림스프/흑미밥 아귀지리 마늘...,쌀밥/흑미밥/찰현미밥 아욱국 수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치...,김치볶음밥 미니쫄우동 맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치,1093.0,421.0,-0.931359,-0.879630,-0.661184,-0.833333,-0.642857,-0.581028,-0.316832,0,0,1,0,0,1,0
1202,0.364955,모닝롤/생크림단팥빵 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 떡국 해물땡굴소스볶...,쌀밥/수수밥/찰현미밥 냉이된장국 동파육 봄동전 청경채/버섯숙회 초장 무생채 양상추샐...,흑미밥 쇠고기무국 삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치,832.0,353.0,-0.549125,-0.533951,-0.529605,-0.666667,-0.714286,-1.602501,-0.280057,0,1,0,0,0,1,0
1203,-0.664062,모닝롤/BLT샌드위치 우유/주스 계란후라이/찐계란 흑임자죽/흑미밥 바지락살국 두부조...,전주비빔밥 약고추장 계란파국 요거닭 올방개묵무침 파프리카해초무침 포기김치 양상추샐러...,흑미밥 수제비국 수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치,579.0,217.0,-0.134590,-0.422840,-0.713816,-0.833333,-1.285714,-1.677759,-0.978784,1,0,0,0,0,1,0
1204,0.708705,모닝롤/호박고구마오븐구이 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 감자양파국 분...,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 돈육간장불고기 깐풍연근 연두부 달래양념장 봄...,흑미밥 열무된장국 장어강정 데리야끼소스 깻잎쌈 생강채 오이선 포기김치,1145.0,502.0,-0.473755,-0.466049,-0.417763,0.000000,0.000000,-0.581028,-0.008487,0,0,0,1,0,1,0


In [246]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# std scaling embedded vecs

# scaled_menus_train = {}
# scaled_menus_test = {}

# menus = ["조식메뉴", "중식메뉴", "석식메뉴"]

# for menu in menus:
#     scaled_menus_train[menu] = StandardScaler().fit_transform(train_merge[menu].to_list())
#     scaled_menus_test[menu] = StandardScaler().fit_transform(test_merge[menu].to_list())
#     for idx, val in enumerate(scaled_menus_train[menu]):
#         train_merge[menu][idx] = val
#     for idx, val in enumerate(scaled_menus_test[menu]):
#         test_merge[menu][idx] = val

In [247]:
# seperate each vecs to new cols

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge[tmp] = pd.DataFrame(train_merge[tmp_col].to_list(), columns = tmp)
        test_merge[tmp] = pd.DataFrame(test_merge[tmp_col].to_list(), columns = tmp)
        train_merge.drop([tmp_col], inplace = True, axis = 1)
        test_merge.drop([tmp_col], inplace = True, axis = 1)

In [248]:
from sklearn.decomposition import PCA

# PCA 300 -> 20
pca_target = 20
pca = PCA(n_components = pca_target)
for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        target = [tmp_col + "_pca" + str(i) for i in range(pca_target)]
        train_merge[target] = pca.fit_transform(train_merge[tmp])
        test_merge[target] = pca.fit_transform(test_merge[tmp])
        

In [249]:
train_merge.describe()

Unnamed: 0,본사시간외근무명령서승인건수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
count,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,...,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0
mean,-0.05485,889.913765,461.606965,0.023298,-0.01024929,-0.0344931,0.120708,0.151623,-0.014525,0.06177,...,-3.712251e-16,-3.057164e-15,8.310286e-15,3.255492e-15,1.688107e-15,-5.440409e-15,-1.677049e-14,-3.252696e-15,-1.223225e-14,1.249053e-15
std,0.549611,209.927051,139.240321,0.555265,0.575525,0.5849449,0.896457,0.836665,1.793603,1.03801,...,0.3017071,0.2957572,0.2924974,0.2802982,0.2748762,0.2609353,0.2521816,0.2409663,0.2343352,0.2312654
min,-0.666295,296.0,0.0,-1.238223,-1.416667,-1.483553,-1.333333,-1.357143,-1.85838,-6.118812,...,-0.920959,-0.8667396,-1.175237,-1.012571,-0.9252944,-1.252896,-1.036227,-1.170675,-1.23757,-0.8173613
25%,-0.657366,758.0,406.0,-0.462988,-0.5030864,-0.5296053,-0.5,-0.428571,-0.769689,-0.372702,...,-0.1438278,-0.1658436,-0.1609902,-0.1233358,-0.1135772,-0.1226179,-0.1093329,-0.1073054,-0.1023609,-0.09758221
50%,0.0,879.0,483.0,0.0,-5.486063e-17,1.16877e-16,0.0,0.0,0.0,0.0,...,2.292841e-06,-0.0002211939,-2.910799e-05,-0.000176293,-9.743002e-05,-0.000205652,0.0001106989,-5.202409e-05,-0.0001390889,2.808482e-05
75%,0.342634,1031.75,545.0,0.537012,0.4969136,0.4703947,0.5,0.571429,0.230311,0.627298,...,0.1169064,0.1082202,0.114371,0.09391193,0.1215069,0.1138734,0.119589,0.08115436,0.1121,0.08701845
max,1.664062,1459.0,905.0,1.033647,1.033951,1.134868,5.666667,4.0,19.153752,3.281471,...,1.119297,1.760377,2.216336,1.422239,1.203303,1.253968,1.40069,1.297416,1.371331,1.806431


In [250]:
# del cols before PCA

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge.drop(tmp, inplace = True, axis = 1)
        test_merge.drop(tmp, inplace = True, axis = 1)
train_merge.head()

Unnamed: 0,본사시간외근무명령서승인건수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
0,-0.135045,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.5,0.857143,0.230311,0.011315,...,2e-06,-0.000221,-2.9e-05,-0.000176,-9.7e-05,-0.000206,0.000111,-5.2e-05,-0.000139,2.8e-05
1,0.045759,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,...,-0.132137,-0.120377,-0.128438,0.117918,0.284232,-0.323262,-0.082399,0.312139,-0.231465,-0.125826
2,-0.418527,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,...,-0.049206,0.014951,-0.470244,-0.173313,0.65719,-0.226722,0.213528,-0.028952,0.168393,0.139942
3,0.126116,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.5,0.285714,0.230311,-0.492221,...,-0.016796,0.236707,-0.131426,-0.068332,0.224098,0.24656,0.238436,0.009001,-0.121933,-0.194368
4,-0.590402,925.0,330.0,-0.759085,-0.774691,-0.726974,0.0,-0.142857,0.230311,-1.366337,...,-0.920959,1.194244,-0.209457,-0.493682,-0.849843,0.313542,0.264411,-0.056793,0.675299,-0.036987


In [251]:
import pandas_profiling
from pandas_profiling import ProfileReport

# profile = train_merge[["본사시간외근무명령서승인건수", "중식계", "석식계", "최저기온(°C)", "평균기온(°C)", 
#                               "최고기온(°C)", "평균 풍속(m/s)", "최대 풍속(m/s)", "일강수량(mm)", "휴일전후날여부_0.0", "휴일전후날여부_1.0"]].profile_report()
# profile

In [252]:
train_merge.head()

Unnamed: 0,본사시간외근무명령서승인건수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),본사실근무자수,...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
0,-0.135045,1039.0,331.0,-0.629879,-0.891975,-1.029605,0.5,0.857143,0.230311,0.011315,...,2e-06,-0.000221,-2.9e-05,-0.000176,-9.7e-05,-0.000206,0.000111,-5.2e-05,-0.000139,2.8e-05
1,0.045759,867.0,560.0,-0.899058,-0.996914,-0.950658,-0.333333,-0.428571,0.230311,-0.053748,...,-0.132137,-0.120377,-0.128438,0.117918,0.284232,-0.323262,-0.082399,0.312139,-0.231465,-0.125826
2,-0.418527,1017.0,573.0,-0.909825,-0.959877,-0.851974,-0.666667,-0.642857,0.230311,-0.107496,...,-0.049206,0.014951,-0.470244,-0.173313,0.65719,-0.226722,0.213528,-0.028952,0.168393,0.139942
3,0.126116,978.0,525.0,-0.823688,-0.867284,-0.851974,-0.5,0.285714,0.230311,-0.492221,...,-0.016796,0.236707,-0.131426,-0.068332,0.224098,0.24656,0.238436,0.009001,-0.121933,-0.194368
4,-0.590402,925.0,330.0,-0.759085,-0.774691,-0.726974,0.0,-0.142857,0.230311,-1.366337,...,-0.920959,1.194244,-0.209457,-0.493682,-0.849843,0.313542,0.264411,-0.056793,0.675299,-0.036987


In [172]:
from pycaret.regression import *

when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동", 
             "찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료", 
             "무침/자반/잡채/나물",
             "강정/튀김/가스",
             "조림/볶음/구이/고기", 
             "김치/깍두기/박지/절이", 
             "러드/피클/생채", 
             "나머지"]

exp_lunch = setup(train_merge, target = '중식계',
                 imputation_type='iterative',
                  ignore_features = ["일자"],
                 remove_perfect_collinearity = False,
                 )

TypeError: setup() got an unexpected keyword argument 'imputation_type'

In [None]:
best_4_lunch = compare_models(sort = 'MAE', n_select = 4)

In [None]:
blended_lunch = blend_models(estimator_list = best_4_lunch, fold = 5)

In [None]:
prediction_lunch = predict_model(blended_lunch)

In [None]:
final_lunch = finalize_model(blended_lunch)

In [None]:
final_prediction_lunch = predict_model(final_lunch, data = test_merge)

In [None]:
submission["중식계"] = final_prediction_lunch["Label"]

In [None]:
from pycaret.regression import *
exp_dinner = setup(train_merge, target = '석식계', 
                   ignore_features = ['중식계'])

In [None]:
best_4_dinner = compare_models(sort = 'MAE', n_select = 4)

In [None]:
blended_dinner = blend_models(estimator_list = best_4_dinner, fold = 5)

In [None]:
prediction_dinner = predict_model(blended_dinner)

In [None]:
final_dinner = finalize_model(blended_dinner)

In [None]:
final_prediction_dinner = predict_model(final_dinner, data = test_merge)

In [None]:
submission["석식계"] = final_prediction_dinner["Label"]

In [None]:
submission.head(20)

In [None]:
submission.to_csv('submission_성동현.csv', index = False, encoding = "UTF-8")

In [None]:
test.head(20)

In [None]:
list(train_merge.columns)

In [253]:
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

warnings.simplefilter(action = 'ignore')

In [256]:
train_lunch = train_merge['중식계']
train_dinner = train_merge['석식계']

lunch_features = [s for s in train_merge.columns if "중식" in s and "중식계" not in s]
dinner_features = [s for s in train_merge.columns if "석식" in s and "석식계" not in s]

train_lunch_features = train_merge.drop(['중식계', '석식계'] + dinner_features, axis=1)
train_dinner_features = train_merge.drop(['중식계', '석식계'] + lunch_features, axis=1)

test_lunch_features = test_merge.drop(dinner_features, axis=1)
test_dinner_features = test_merge.drop(lunch_features, axis=1)

In [257]:
def print_best_params(model, params, train, train_features):
    grid_model = GridSearchCV(
        model, 
        param_grid = params,
        verbose = 4,
        cv = 5,
        scoring = 'neg_mean_absolute_error')

    grid_model.fit(train_features, train)
    score = -1 * grid_model.best_score_ # mae
    print(
        '{0} 5 CV 시 최적 평균 MAE 값 {1} \n최적 alpha:{2}'.format(model.__class__.__name__, np.round(score, 4), grid_model.best_params_))
    return grid_model.best_estimator_

In [258]:
# Simple LGBM Regressor w/o tuning

lgb_params_lunch = {
    'metric' : ['mae'],
    'objective':['mae'],
    'max_depth' : [10 + i for i in range(0, 12, 4)],
    'learning_rate' : [0.015, 0.020],
    'n_estimators':[400 + i for i in range(0, 500, 100)],
    'max_bin' : [400 + i for i in range(0, 300, 100)],
}


In [259]:
lgb_model_lunch = LGBMRegressor()
lgb_estimator_lunch = print_best_params(lgb_model_lunch, lgb_params_lunch, train_lunch, train_lunch_features)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV 1/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-98.342 total time=   1.5s
[CV 2/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-84.584 total time=   1.6s
[CV 3/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-95.005 total time=   1.5s
[CV 4/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-106.421 total time=   1.3s
[CV 5/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-115.409 total time=   1.2s
[CV 1/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=500, objective=mae;, score=-98.883 total time=   1.6s
[CV 2/5] END learning_rate=0.015, max_bin=400, max_depth=10, metric=mae, n_estimators=500, objec

[CV 4/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-105.358 total time=   1.9s
[CV 5/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-115.814 total time=   1.7s
[CV 1/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=600, objective=mae;, score=-98.173 total time=   2.2s
[CV 2/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=600, objective=mae;, score=-84.729 total time=   2.4s
[CV 3/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=600, objective=mae;, score=-94.351 total time=   2.4s
[CV 4/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=600, objective=mae;, score=-104.551 total time=   2.3s
[CV 5/5] END learning_rate=0.015, max_bin=400, max_depth=18, metric=mae, n_estimators=600, objective=mae;, score=-115.094 total time=   2.0s
[CV 1/5] END lea

[CV 3/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-94.363 total time=   2.5s
[CV 4/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-104.169 total time=   2.5s
[CV 5/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-115.210 total time=   2.3s
[CV 1/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=800, objective=mae;, score=-98.164 total time=   2.9s
[CV 2/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=800, objective=mae;, score=-84.324 total time=   3.0s
[CV 3/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=800, objective=mae;, score=-94.570 total time=   3.0s
[CV 4/5] END learning_rate=0.015, max_bin=500, max_depth=14, metric=mae, n_estimators=800, objective=mae;, score=-104.927 total time=   2.8s
[CV 5/5] END lear

[CV 2/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=400, objective=mae;, score=-84.647 total time=   1.4s
[CV 3/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=400, objective=mae;, score=-95.262 total time=   1.4s
[CV 4/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=400, objective=mae;, score=-106.162 total time=   1.4s
[CV 5/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=400, objective=mae;, score=-116.730 total time=   1.3s
[CV 1/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=500, objective=mae;, score=-95.145 total time=   1.7s
[CV 2/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=500, objective=mae;, score=-84.696 total time=   1.7s
[CV 3/5] END learning_rate=0.015, max_bin=600, max_depth=14, metric=mae, n_estimators=500, objective=mae;, score=-94.651 total time=   1.7s
[CV 4/5] END learn

[CV 1/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=600, objective=mae;, score=-99.855 total time=   2.1s
[CV 2/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=600, objective=mae;, score=-84.374 total time=   2.1s
[CV 3/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=600, objective=mae;, score=-94.837 total time=   2.0s
[CV 4/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=600, objective=mae;, score=-103.847 total time=   2.0s
[CV 5/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=600, objective=mae;, score=-114.653 total time=   1.9s
[CV 1/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=700, objective=mae;, score=-101.162 total time=   2.5s
[CV 2/5] END learning_rate=0.02, max_bin=400, max_depth=10, metric=mae, n_estimators=700, objective=mae;, score=-84.320 total time=   2.4s
[CV 3/5] END learning_ra

[CV 5/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=700, objective=mae;, score=-114.556 total time=   1.7s
[CV 1/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=800, objective=mae;, score=-99.265 total time=   2.1s
[CV 2/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=800, objective=mae;, score=-84.523 total time=   2.2s
[CV 3/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=800, objective=mae;, score=-94.910 total time=   2.3s
[CV 4/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=800, objective=mae;, score=-105.633 total time=   2.5s
[CV 5/5] END learning_rate=0.02, max_bin=400, max_depth=18, metric=mae, n_estimators=800, objective=mae;, score=-114.433 total time=   2.2s
[CV 1/5] END learning_rate=0.02, max_bin=500, max_depth=10, metric=mae, n_estimators=400, objective=mae;, score=-96.995 total time=   1.0s
[CV 2/5] END learning_ra

[CV 4/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=400, objective=mae;, score=-105.728 total time=   1.3s
[CV 5/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=400, objective=mae;, score=-116.853 total time=   1.3s
[CV 1/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-96.374 total time=   1.8s
[CV 2/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-84.187 total time=   1.8s
[CV 3/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-95.022 total time=   1.9s
[CV 4/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-103.604 total time=   1.8s
[CV 5/5] END learning_rate=0.02, max_bin=500, max_depth=18, metric=mae, n_estimators=500, objective=mae;, score=-116.145 total time=   1.6s
[CV 1/5] END learning_r

[CV 3/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=600, objective=mae;, score=-95.503 total time=   2.2s
[CV 4/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=600, objective=mae;, score=-104.951 total time=   2.1s
[CV 5/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=600, objective=mae;, score=-115.306 total time=   2.1s
[CV 1/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-96.912 total time=   2.3s
[CV 2/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-84.608 total time=   2.4s
[CV 3/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-94.602 total time=   2.5s
[CV 4/5] END learning_rate=0.02, max_bin=600, max_depth=14, metric=mae, n_estimators=700, objective=mae;, score=-104.939 total time=   2.6s
[CV 5/5] END learning_ra

In [260]:
lgb_preds_lunch = lgb_estimator_lunch.predict(test_lunch_features, axis=1)

In [261]:
submission["중식계"] = lgb_preds_lunch

In [262]:
# Simple LGBM Regressor w/o tuning

lgb_params_dinner = {
    'metric' : ['mae'],
    'objective':['mae'],
    'max_depth' : [15 + i for i in range(0, 12, 4)],
    'learning_rate' : [0.015, 0.020, 0.025],
    'n_estimators':[300 + i for i in range(0, 400, 100)],
    'max_bin' : [200 + i for i in range(0, 400, 100)],
}

In [263]:
lgb_model_dinner = LGBMRegressor()
lgb_estimator_dinner = print_best_params(lgb_model_dinner, lgb_params_dinner, train_dinner, train_dinner_features)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV 1/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-72.012 total time=   1.0s
[CV 2/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-59.439 total time=   0.9s
[CV 3/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-58.393 total time=   1.0s
[CV 4/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-66.361 total time=   1.0s
[CV 5/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-111.452 total time=   0.9s
[CV 1/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=400, objective=mae;, score=-71.724 total time=   1.4s
[CV 2/5] END learning_rate=0.01, max_bin=200, max_depth=15, metric=mae, n_estimators=400, objective=ma

[CV 5/5] END learning_rate=0.01, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-112.900 total time=   1.8s
[CV 1/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-72.013 total time=   1.1s
[CV 2/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-58.310 total time=   1.1s
[CV 3/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-57.947 total time=   1.2s
[CV 4/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-65.425 total time=   1.1s
[CV 5/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-110.692 total time=   1.0s
[CV 1/5] END learning_rate=0.01, max_bin=300, max_depth=15, metric=mae, n_estimators=400, objective=mae;, score=-71.944 total time=   1.5s
[CV 2/5] END learning_rat

[CV 4/5] END learning_rate=0.01, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-62.263 total time=   2.4s
[CV 5/5] END learning_rate=0.01, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-109.110 total time=   2.0s
[CV 1/5] END learning_rate=0.01, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-72.264 total time=   1.0s
[CV 2/5] END learning_rate=0.01, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-58.786 total time=   1.1s
[CV 3/5] END learning_rate=0.01, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-57.873 total time=   1.1s
[CV 4/5] END learning_rate=0.01, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-65.111 total time=   1.2s
[CV 5/5] END learning_rate=0.01, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-110.352 total time=   1.1s
[CV 1/5] END learning_rat

[CV 3/5] END learning_rate=0.01, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.403 total time=   2.2s
[CV 4/5] END learning_rate=0.01, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-63.336 total time=   2.3s
[CV 5/5] END learning_rate=0.01, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-111.095 total time=   2.0s
[CV 1/5] END learning_rate=0.01, max_bin=500, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-71.629 total time=   1.1s
[CV 2/5] END learning_rate=0.01, max_bin=500, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-58.940 total time=   1.0s
[CV 3/5] END learning_rate=0.01, max_bin=500, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-57.712 total time=   1.2s
[CV 4/5] END learning_rate=0.01, max_bin=500, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-64.977 total time=   1.1s
[CV 5/5] END learning_rate

[CV 2/5] END learning_rate=0.01, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-57.099 total time=   1.7s
[CV 3/5] END learning_rate=0.01, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.286 total time=   1.7s
[CV 4/5] END learning_rate=0.01, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-62.111 total time=   1.5s
[CV 5/5] END learning_rate=0.01, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-112.415 total time=   1.3s
[CV 1/5] END learning_rate=0.015, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-71.921 total time=   0.6s
[CV 2/5] END learning_rate=0.015, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-56.745 total time=   0.6s
[CV 3/5] END learning_rate=0.015, max_bin=200, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-56.395 total time=   0.7s
[CV 4/5] END learning_r

[CV 1/5] END learning_rate=0.015, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-72.720 total time=   1.9s
[CV 2/5] END learning_rate=0.015, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.519 total time=   1.9s
[CV 3/5] END learning_rate=0.015, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.032 total time=   2.0s
[CV 4/5] END learning_rate=0.015, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-62.880 total time=   2.0s
[CV 5/5] END learning_rate=0.015, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-107.543 total time=   1.8s
[CV 1/5] END learning_rate=0.015, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-71.696 total time=   1.2s
[CV 2/5] END learning_rate=0.015, max_bin=300, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-57.564 total time=   1.1s
[CV 3/5] END learni

[CV 5/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-109.456 total time=   1.7s
[CV 1/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-73.427 total time=   2.2s
[CV 2/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.406 total time=   2.4s
[CV 3/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.498 total time=   2.4s
[CV 4/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-62.282 total time=   2.6s
[CV 5/5] END learning_rate=0.015, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-110.213 total time=   2.1s
[CV 1/5] END learning_rate=0.015, max_bin=400, max_depth=15, metric=mae, n_estimators=300, objective=mae;, score=-71.413 total time=   1.1s
[CV 2/5] END learn

[CV 4/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-63.620 total time=   2.0s
[CV 5/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-107.473 total time=   1.6s
[CV 1/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-72.142 total time=   2.2s
[CV 2/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.558 total time=   2.4s
[CV 3/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.956 total time=   2.4s
[CV 4/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-63.256 total time=   2.7s
[CV 5/5] END learning_rate=0.015, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-106.676 total time=   2.2s
[CV 1/5] END learn

[CV 3/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.422 total time=   1.8s
[CV 4/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-62.893 total time=   1.8s
[CV 5/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-110.798 total time=   1.6s
[CV 1/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-72.944 total time=   2.2s
[CV 2/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.743 total time=   2.1s
[CV 3/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.376 total time=   2.2s
[CV 4/5] END learning_rate=0.015, max_bin=500, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-62.792 total time=   2.2s
[CV 5/5] END learni

[CV 2/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.731 total time=   1.3s
[CV 3/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.777 total time=   1.2s
[CV 4/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-62.648 total time=   1.3s
[CV 5/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-109.394 total time=   1.1s
[CV 1/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-72.760 total time=   1.4s
[CV 2/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.711 total time=   1.4s
[CV 3/5] END learning_rate=0.02, max_bin=200, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-55.706 total time=   1.4s
[CV 4/5] END learning_rate

[CV 1/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-74.461 total time=   2.0s
[CV 2/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-54.651 total time=   2.1s
[CV 3/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-56.643 total time=   2.1s
[CV 4/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-62.134 total time=   2.0s
[CV 5/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-107.831 total time=   1.7s
[CV 1/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-73.677 total time=   2.3s
[CV 2/5] END learning_rate=0.02, max_bin=300, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-54.984 total time=   2.5s
[CV 3/5] END learning_rate

[CV 5/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=400, objective=mae;, score=-111.104 total time=   1.3s
[CV 1/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-74.650 total time=   1.8s
[CV 2/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.627 total time=   2.0s
[CV 3/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.021 total time=   2.0s
[CV 4/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-63.027 total time=   2.0s
[CV 5/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-107.748 total time=   1.7s
[CV 1/5] END learning_rate=0.02, max_bin=400, max_depth=23, metric=mae, n_estimators=600, objective=mae;, score=-73.672 total time=   2.3s
[CV 2/5] END learning_rat

[CV 4/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=400, objective=mae;, score=-62.893 total time=   1.5s
[CV 5/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=400, objective=mae;, score=-106.433 total time=   1.3s
[CV 1/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-74.333 total time=   1.7s
[CV 2/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-55.267 total time=   1.8s
[CV 3/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-54.772 total time=   1.9s
[CV 4/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-63.350 total time=   1.9s
[CV 5/5] END learning_rate=0.02, max_bin=500, max_depth=23, metric=mae, n_estimators=500, objective=mae;, score=-108.578 total time=   1.5s
[CV 1/5] END learning_rat

In [264]:
lgb_preds_dinner = lgb_estimator_dinner.predict(test_dinner_features, axis=1)

In [265]:
submission["석식계"] = lgb_preds_dinner
submission

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,928.381263,283.134622
1,2021-01-28,853.453838,277.143326
2,2021-01-29,678.667068,207.382407
3,2021-02-01,1196.758527,462.385261
4,2021-02-02,945.413024,515.805604
5,2021-02-03,905.850266,287.259701
6,2021-02-04,909.356555,449.689777
7,2021-02-05,689.190698,122.060203
8,2021-02-08,1241.901178,552.012936
9,2021-02-09,962.706138,456.444371


In [266]:
submission.to_csv('submission_성동현_Word2Vec.csv', index = False, encoding = "UTF-8")