In [178]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import os
import logging

In [179]:
# load embedding model

from gensim.models import fasttext

model = fasttext.load_facebook_model("wiki.ko/wiki.ko.bin")

In [226]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.4.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [227]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Thu_Jun_11_22:26:48_Pacific_Daylight_Time_2020
Cuda compilation tools, release 11.0, V11.0.194
Build cuda_11.0_bu.relgpu_drvr445TC445_37.28540450_0


In [228]:
weather = pd.read_csv("data/진주기상정보.csv", encoding = "cp949")
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

In [229]:
weather.rename(columns = {"일시" : "일자"}, inplace = True)
train.describe()

Unnamed: 0,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계
count,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0
mean,2807.815768,157.913693,241.142739,274.117012,43.506224,890.33444,461.772614
std,171.264404,144.190572,43.532298,246.239651,109.9374,209.505057,139.179202
min,2601.0,23.0,41.0,0.0,0.0,296.0,0.0
25%,2645.0,71.0,217.0,4.0,0.0,758.0,406.0
50%,2760.0,105.0,245.0,299.0,0.0,879.0,483.0
75%,2962.0,185.0,272.0,452.0,0.0,1032.0,545.0
max,3305.0,1224.0,378.0,1044.0,533.0,1459.0,905.0


In [230]:
train_merge = pd.merge(train, weather, on = "일자", how = "left")
test_merge = pd.merge(test, weather, on = "일자", how = "left")
train_merge.describe()

Unnamed: 0,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm)
count,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0,1205.0
mean,2807.815768,157.913693,241.142739,274.117012,43.506224,890.33444,461.772614,8.032697,13.680463,20.120913,1.072402,3.312365,12.259833
std,171.264404,144.190572,43.532298,246.239651,109.9374,209.505057,139.179202,10.318334,9.326583,8.893296,0.538097,1.171812,11.916519
min,2601.0,23.0,41.0,0.0,0.0,296.0,0.0,-15.4,-9.1,-1.9,0.2,1.2,0.0
25%,2645.0,71.0,217.0,4.0,0.0,758.0,406.0,-1.0,5.7,12.6,0.7,2.5,7.233019
50%,2760.0,105.0,245.0,299.0,0.0,879.0,483.0,7.6,13.8,20.6,1.0,3.1,12.346667
75%,2962.0,185.0,272.0,452.0,0.0,1032.0,545.0,17.6,21.9,27.8,1.3,3.9,13.8768
max,3305.0,1224.0,378.0,1044.0,533.0,1459.0,905.0,26.8,30.6,37.9,4.4,8.7,139.6


In [231]:
train_merge.head()

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm)
0,2016-02-01,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 (쌀:국내산) 된장찌개 쥐...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 오징어찌개 쇠불고기 (쇠고기:호주산) 계란찜 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 육개장 자반고등어구이 두부조림 건파래무침 ...",1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768
1,2016-02-02,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 (쌀:국내산) 호박젓국찌...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무...","콩나물밥*양념장 (쌀,현미흑미:국내산) 어묵국 유산슬 (쇠고기:호주산) 아삭고추무...",867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768
2,2016-02-03,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 (쌀:국내산) 콩나물국...,"카레덮밥 (쌀,현미흑미:국내산) 팽이장국 치킨핑거 (닭고기:국내산) 쫄면야채무침 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 청국장찌개 황태양념구이 (황태:러시아산) 고기...",1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768
3,2016-02-04,목,2601,104,220,355,0.0,"모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 (쌀,닭:국내산) 근대국...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 쇠고기무국 주꾸미볶음 부추전 시금치나물 ...","미니김밥*겨자장 (쌀,현미흑미:국내산) 우동 멕시칸샐러드 군고구마 무피클 포...",978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768
4,2016-02-05,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 (쌀:국내산) 재첩국 방...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 떡국 돈육씨앗강정 (돼지고기:국내산) 우엉잡채...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 차돌박이찌개 (쇠고기:호주산) 닭갈비 (닭고기:...",925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768


In [232]:
!pip install lxml
import requests
from urllib import parse
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

def getHoliday(year: int) -> pd.DataFrame:
    url = "http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo"
    api_key_utf8 = "I2Mv6nH4CWf3VYYi83oy0EJO2upQgmp2GDI%2Fdbxh9ekXkRD4EjMSq0gWZxTy%2FNtAxnjV7soTkOW2xs1qYMdjBg%3D%3D"
    api_key_decode = parse.unquote(api_key_utf8)

    params = {
        "ServiceKey": api_key_decode,
        "solYear": year,
        "numOfRows": 100
    }

    response = requests.get(url, params=params)
    xml = BeautifulSoup(response.text, "html.parser")
    items = xml.find('items')
    item_list = []
    for item in items:
        item_dict = {
            "공휴일": 1, # 무슨 공휴일인지는 없어도 될 듯 하다
            "일자": datetime.strptime(item.find("locdate").text.strip(), '%Y%m%d')
        }
        item_list.append(item_dict)

    return pd.DataFrame(item_list)



In [233]:
holidays = pd.concat([getHoliday(year) for year in range(2016, 2022)])
holidays.describe()

Unnamed: 0,공휴일
count,103.0
mean,1.0
std,0.0
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [234]:
from datetime import datetime, timedelta

def one_day_before(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = -1))

def one_day_after(s):
    return str(datetime.strptime(str(s)[:10], "%Y-%m-%d").date() + timedelta(days = 1))

holidays["공휴일전날"] = holidays["일자"].apply(one_day_before)
holidays["공휴일다음날"] = holidays["일자"].apply(one_day_after)

In [235]:
new_holidays = pd.concat([holidays["공휴일전날"], holidays["공휴일다음날"]])

In [236]:
new_holidays = pd.DataFrame(new_holidays, columns = ["일자"])

In [237]:
new_holidays["휴일전후날여부"] = 1
new_holidays.head(110)

Unnamed: 0,일자,휴일전후날여부
0,2015-12-31,1
1,2016-02-06,1
2,2016-02-07,1
3,2016-02-08,1
4,2016-02-09,1
...,...,...
2,2016-02-09,1
3,2016-02-10,1
4,2016-02-11,1
5,2016-03-02,1


In [238]:
train_merge = pd.merge(train_merge, new_holidays, how = 'left', on  = '일자')
test_merge = pd.merge(test_merge, new_holidays, how = 'left', on = '일자')


In [239]:
train_merge["휴일전후날여부"] = train_merge["휴일전후날여부"].fillna(0)
test_merge["휴일전후날여부"] = test_merge["휴일전후날여부"].fillna(0)

train_merge.head(50)

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부
0,2016-02-01,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 (쌀:국내산) 된장찌개 쥐...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 오징어찌개 쇠불고기 (쇠고기:호주산) 계란찜 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 육개장 자반고등어구이 두부조림 건파래무침 ...",1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768,0.0
1,2016-02-02,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 (쌀:국내산) 호박젓국찌...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무...","콩나물밥*양념장 (쌀,현미흑미:국내산) 어묵국 유산슬 (쇠고기:호주산) 아삭고추무...",867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768,0.0
2,2016-02-03,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 (쌀:국내산) 콩나물국...,"카레덮밥 (쌀,현미흑미:국내산) 팽이장국 치킨핑거 (닭고기:국내산) 쫄면야채무침 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 청국장찌개 황태양념구이 (황태:러시아산) 고기...",1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768,0.0
3,2016-02-04,목,2601,104,220,355,0.0,"모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 (쌀,닭:국내산) 근대국...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 쇠고기무국 주꾸미볶음 부추전 시금치나물 ...","미니김밥*겨자장 (쌀,현미흑미:국내산) 우동 멕시칸샐러드 군고구마 무피클 포...",978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768,0.0
4,2016-02-05,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 (쌀:국내산) 재첩국 방...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 떡국 돈육씨앗강정 (돼지고기:국내산) 우엉잡채...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 차돌박이찌개 (쇠고기:호주산) 닭갈비 (닭고기:...",925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768,0.0
5,2016-02-11,목,2601,383,143,417,0.0,팬케익/찐빵 우유/두유/주스 계란후라이 견과류죽/쌀밥 (쌀:국내산) 감자찌개 ...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 시래기국 훈제오리구이 도토리묵무침 쌈무/양...","참치회덮밥 (쌀,현미흑미:국내산) 맑은국 군만두 과일샐러드 락교 포기김치 (...",1045.0,550.0,-3.0,6.1,14.4,1.0,3.7,13.8768,1.0
6,2016-02-12,금,2601,389,156,93,0.0,모닝롤/야채샌드 우유/두유/주스 계란후라이 고구마죽/쌀밥 (쌀:국내산) 봄동된...,"쌀밥/잡곡밥 (쌀,현미흑미:국내산) 꽃게탕 돈육굴소스볶음 옥수수전 유채나물 ...","쌀밥/잡곡밥 (쌀,현미흑미:국내산) 김치콩나물국 미니함박 어묵볶음 물파래무침 ...",909.0,598.0,5.8,9.2,14.4,1.1,3.1,62.8,0.0
7,2016-02-15,월,2601,87,204,482,0.0,모닝롤/치즈프레즐 우유/두유/주스 계란후라이 잣죽/쌀밥 (쌀:국내산) 민물새우...,쌀밥/잡곡밥 (쌀:국내산) 시금치국 닭감자조림 (닭고기:국내산) 연두부*양념장 ...,"쌀밥/잡곡밥 (쌀:국내산) 홍합미역국 등갈비김치찜 (돼지고기,김치:국내산) 임연수...",1268.0,672.0,-4.4,-1.5,2.3,1.8,4.7,13.8768,0.0
8,2016-02-16,화,2601,72,236,526,0.0,모닝롤/마늘빵 우유/두유/주스 계란후라이 단호박죽/쌀밥 (쌀:국내산) 어묵국 ...,쌀밥/잡곡밥 (쌀:국내산) 쇠고기무국 (쇠고기:호주산) 탕수어 (동태:러시아산) 오...,쌀밥/잡곡밥 (쌀:국내산) 된장찌개 쇠불고기 (쇠고기:호주산) 해파리겨자채 봄동...,1014.0,523.0,-6.3,0.2,5.4,1.8,7.3,13.8768,0.0
9,2016-02-17,수,2601,78,250,23,0.0,모닝롤/참치샌드 우유/두유/주스 계란후라이 흑임자죽/쌀밥 (쌀:국내산) 북어계...,쌀밥/잡곡밥 (쌀:국내산) 냉이된장국 쇠고기장조림 (쇠고기:호주산) 통도라지구이 ...,볶음밥*자장소스 (쌀:국내산) 맑은국 새우또띠아 쨔샤이무침 요플레 포기김치 ...,916.0,588.0,-3.4,1.7,8.1,1.1,3.4,13.8768,0.0


In [240]:
import re
def reg_modi(s):
    s = re.sub(pattern = r'\[[\S]*\]', repl = " ", string = s)
    s = re.sub(pattern = r'\([\S]*\)', repl = " ", string = s)
    s = re.sub(pattern = r'\s\s+', repl = " ", string = s)
    s = re.sub(pattern = r'\*', repl = " ", string = s)
    if len(s) < 20: # issue
        return "메뉴없음"
    return s

for menu in ["조식메뉴", "중식메뉴", "석식메뉴"]:
    train_merge[menu] = train_merge[menu].apply(reg_modi)
    test_merge[menu] = test_merge[menu].apply(reg_modi)
train_merge.head()

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부
0,2016-02-01,월,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,331.0,-4.1,-0.6,5.0,1.3,4.3,13.8768,0.0
1,2016-02-02,화,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,560.0,-9.1,-2.3,6.2,0.8,2.5,13.8768,0.0
2,2016-02-03,수,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포...,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,573.0,-9.3,-1.7,7.7,0.6,2.2,13.8768,0.0
3,2016-02-04,목,2601,104,220,355,0.0,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,525.0,-7.7,-0.2,7.7,0.7,3.5,13.8768,0.0
4,2016-02-05,금,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,330.0,-6.5,1.3,9.6,1.0,2.9,13.8768,0.0


In [241]:
train_merge["본사실근무자수"] = train_merge["본사정원수"] - train_merge["본사휴가자수"] - train_merge["본사출장자수"]/2 - train_merge["현본사소속재택근무자수"]
test_merge["본사실근무자수"] = test_merge["본사정원수"] - test_merge["본사휴가자수"] - test_merge["본사출장자수"]/2 - test_merge["현본사소속재택근무자수"]

del_features = ["본사정원수", "본사휴가자수", "본사출장자수", "현본사소속재택근무자수"]
train_merge.drop(del_features, inplace = True, axis = 1)
test_merge.drop(del_features, inplace = True, axis = 1)

In [242]:
train_merge = pd.get_dummies(train_merge, columns = ["요일"], prefix = '요일')
test_merge = pd.get_dummies(test_merge, columns = ["요일"], prefix = '요일')
train_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,...,평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부,본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화
0,2016-02-01,2601,50,150,238,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,...,1.3,4.3,13.8768,0.0,2476.0,0,0,0,1,0
1,2016-02-02,2601,50,173,319,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,...,0.8,2.5,13.8768,0.0,2464.5,0,0,0,0,1
2,2016-02-03,2601,56,180,111,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포...,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,...,0.6,2.2,13.8768,0.0,2455.0,0,0,1,0,0
3,2016-02-04,2601,104,220,355,0.0,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,...,0.7,3.5,13.8768,0.0,2387.0,0,1,0,0,0
4,2016-02-05,2601,278,181,34,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,...,1.0,2.9,13.8768,0.0,2232.5,1,0,0,0,0


In [243]:
from sklearn.preprocessing import RobustScaler

scaled = ["본사정원수", "본사휴가자수", "본사출장자수", "본사시간외근무명령서승인건수", 
            "현본사소속재택근무자수", "최저기온(°C)", "평균기온(°C)", "최고기온(°C)", "평균 풍속(m/s)", 
            "최대 풍속(m/s)", "일강수량(mm)", "본사실근무자수"]
train_merge[scaled] = RobustScaler().fit_transform(train_merge[scaled].values)
test_merge[scaled] = RobustScaler().fit_transform(test_merge[scaled].values)

In [244]:
train_merge

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,...,평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부,본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,모닝롤/찐빵 우유/두유/주스 계란후라이 호두죽/쌀밥 된장찌개 쥐어채무침 포기김치,쌀밥/잡곡밥 오징어찌개 쇠불고기 계란찜 청포묵무침 요구르트 포기김치,쌀밥/잡곡밥 육개장 자반고등어구이 두부조림 건파래무침 포기김치,1039.0,...,0.500000,0.857143,0.230311,0.0,0.011315,0,0,0,1,0
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,모닝롤/단호박샌드 우유/두유/주스 계란후라이 팥죽/쌀밥 호박젓국찌개 시래기조림 포기김치,쌀밥/잡곡밥 김치찌개 가자미튀김 모둠소세지구이 마늘쫑무침 요구르트 배추겉절이,콩나물밥 양념장 어묵국 유산슬 아삭고추무침 바나나 포기김치,867.0,...,-0.333333,-0.428571,0.230311,0.0,-0.053748,0,0,0,0,1
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,모닝롤/베이글 우유/두유/주스 계란후라이 표고버섯죽/쌀밥 콩나물국 느타리호박볶음 포...,카레덮밥 팽이장국 치킨핑거 쫄면야채무침 견과류조림 요구르트 포기김치,쌀밥/잡곡밥 청국장찌개 황태양념구이 고기전 새송이버섯볶음 포기김치,1017.0,...,-0.666667,-0.642857,0.230311,0.0,-0.107496,0,0,1,0,0
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,모닝롤/토마토샌드 우유/두유/주스 계란후라이 닭죽/쌀밥 근대국 멸치볶음 포기김치,쌀밥/잡곡밥 쇠고기무국 주꾸미볶음 부추전 시금치나물 요구르트 포기김치,미니김밥 겨자장 우동 멕시칸샐러드 군고구마 무피클 포기김치,978.0,...,-0.500000,0.285714,0.230311,0.0,-0.492221,0,1,0,0,0
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,모닝롤/와플 우유/두유/주스 계란후라이 쇠고기죽/쌀밥 재첩국 방풍나물 포기김치,쌀밥/잡곡밥 떡국 돈육씨앗강정 우엉잡채 청경채무침 요구르트 포기김치,쌀밥/잡곡밥 차돌박이찌개 닭갈비 감자소세지볶음 콩나물무침 포기김치,925.0,...,0.000000,-0.142857,0.230311,0.0,-1.366337,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,0.703470,-0.261438,-0.854545,-0.657366,391.0,모닝롤/페퍼로니피자 우유/주스 계란후라이/찐계란 크루통크림스프/흑미밥 아귀지리 마늘...,쌀밥/흑미밥/찰현미밥 아욱국 수제함박스테이크 견과류마카로니범벅 생깻잎지 단호박물김치...,김치볶음밥 미니쫄우동 맛살튀김 브로콜리깨소스무침 계란후라이 고들빼기무침 겉절이김치,1093.0,...,-0.833333,-0.642857,-0.581028,0.0,-0.316832,0,0,1,0,0
1202,2021-01-21,0.703470,-0.113290,-0.254545,0.364955,351.0,모닝롤/생크림단팥빵 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 떡국 해물땡굴소스볶...,쌀밥/수수밥/찰현미밥 냉이된장국 동파육 봄동전 청경채/버섯숙회 초장 무생채 양상추샐...,흑미밥 쇠고기무국 삼치양념구이 비엔나채소볶음 숙주나물당근무침 포기김치,832.0,...,-0.666667,-0.714286,-1.602501,0.0,-0.280057,0,1,0,0,0
1203,2021-01-22,0.703470,1.307190,0.054545,-0.664062,303.0,모닝롤/BLT샌드위치 우유/주스 계란후라이/찐계란 흑임자죽/흑미밥 바지락살국 두부조...,전주비빔밥 약고추장 계란파국 요거닭 올방개묵무침 파프리카해초무침 포기김치 양상추샐러...,흑미밥 수제비국 수제맛쵸킹탕수육 유부채소겨자냉채 참나물무침 갓김치/겉절이김치,579.0,...,-0.833333,-1.285714,-1.677759,0.0,-0.978784,1,0,0,0,0
1204,2021-01-25,0.703470,0.017429,-1.672727,0.708705,327.0,모닝롤/호박고구마오븐구이 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 감자양파국 분...,쌀밥/흑미밥/찰현미밥 전주식콩나물해장국 돈육간장불고기 깐풍연근 연두부 달래양념장 봄...,흑미밥 열무된장국 장어강정 데리야끼소스 깻잎쌈 생강채 오이선 포기김치,1145.0,...,0.000000,0.000000,-0.581028,0.0,-0.008487,0,0,0,1,0


In [245]:
when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동", 
             "찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료", 
             "무침/자반/잡채/나물",
             "강정/튀김/가스",
             "조림/볶음/구이/고기", 
             "김치/깍두기/박지/절이", 
             "러드/피클/생채", 
             "나머지"]

def dish_split_breakfast(s):
    s = s.split()
    global sub_menus
    l = {"조식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["조식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
        
    return l

def dish_split_lunch(s):
    s = s.split()
    global sub_menus
    l = {"중식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["중식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

def dish_split_dinner(s):
    s = s.split()
    global sub_menus
    l = {"석식/" + sm : [] for sm in sub_menus}
    for ss in s:
        chk = False
        for cateogory in l:
            for c in cateogory.split("/"):
                if c in ss[-2:]:
                    l[cateogory].append(ss)
                    chk = True
                    break
            if chk:
                break
        if not chk:
            l["석식/나머지"].append(ss)
    
    for key in l:
        l[key] = ' '.join(l[key])
    return l

train_merge["조식메뉴"] = train_merge["조식메뉴"].apply(dish_split_breakfast)
test_merge["조식메뉴"] = test_merge["조식메뉴"].apply(dish_split_breakfast)

In [246]:
train_merge["중식메뉴"] = train_merge["중식메뉴"].apply(dish_split_lunch)
test_merge["중식메뉴"] = test_merge["중식메뉴"].apply(dish_split_lunch)
train_merge["석식메뉴"] = train_merge["석식메뉴"].apply(dish_split_dinner)
test_merge["석식메뉴"] = test_merge["석식메뉴"].apply(dish_split_dinner)

In [247]:
train_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴,중식계,...,평균 풍속(m/s),최대 풍속(m/s),일강수량(mm),휴일전후날여부,본사실근무자수,요일_금,요일_목,요일_수,요일_월,요일_화
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,"{'조식/밥/빵/죽/국수/닝롤/게티/이스/우동': '모닝롤/찐빵 호두죽/쌀밥', '...","{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '중식/찌개/탕...","{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '석식/찌개/탕...",1039.0,...,0.5,0.857143,0.230311,0.0,0.011315,0,0,0,1,0
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,"{'조식/밥/빵/죽/국수/닝롤/게티/이스/우동': '팥죽/쌀밥', '조식/찌개/탕/...","{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '중식/찌개/탕...","{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동': '콩나물밥', '석식/찌개/탕/국...",867.0,...,-0.333333,-0.428571,0.230311,0.0,-0.053748,0,0,0,0,1
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,"{'조식/밥/빵/죽/국수/닝롤/게티/이스/우동': '표고버섯죽/쌀밥', '조식/찌개...","{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동': '카레덮밥', '중식/찌개/탕/국...","{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '석식/찌개/탕...",1017.0,...,-0.666667,-0.642857,0.230311,0.0,-0.107496,0,0,1,0,0
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,"{'조식/밥/빵/죽/국수/닝롤/게티/이스/우동': '닭죽/쌀밥', '조식/찌개/탕/...","{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '중식/찌개/탕...","{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동': '미니김밥 우동', '석식/찌개/...",978.0,...,-0.5,0.285714,0.230311,0.0,-0.492221,0,1,0,0,0
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,"{'조식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쇠고기죽/쌀밥', '조식/찌개/...","{'중식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '중식/찌개/탕...","{'석식/밥/빵/죽/국수/닝롤/게티/이스/우동': '쌀밥/잡곡밥', '석식/찌개/탕...",925.0,...,0.0,-0.142857,0.230311,0.0,-1.366337,1,0,0,0,0


In [248]:
menus = ["조식메뉴", "중식메뉴", "석식메뉴"]
# Convert the column of stringified dicts to dicts
# skip this line, if the column contains dicts
for menu in menus:
    train_merge = pd.concat([train_merge, train_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)
    test_merge = pd.concat([test_merge, test_merge[menu].apply(pd.Series)], axis = 1).drop(menu, axis = 1)

In [249]:
train_merge

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),...,중식/나머지,석식/밥/빵/죽/국수/닝롤/게티/이스/우동,석식/찌개/탕/국/스프/계장/개장,석식/우유/두유/주스/음료,석식/무침/자반/잡채/나물,석식/강정/튀김/가스,석식/조림/볶음/구이/고기,석식/김치/깍두기/박지/절이,석식/러드/피클/생채,석식/나머지
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,1039.0,331.0,-0.629879,-0.891975,...,계란찜 요구르트,쌀밥/잡곡밥,육개장,,건파래무침,,자반고등어구이 두부조림,포기김치,,
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,867.0,560.0,-0.899058,-0.996914,...,요구르트,콩나물밥,어묵국,,아삭고추무침,,,포기김치,,양념장 유산슬 바나나
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,1017.0,573.0,-0.909825,-0.959877,...,치킨핑거 요구르트,쌀밥/잡곡밥,청국장찌개,,,,황태양념구이 새송이버섯볶음,포기김치,,고기전
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,978.0,525.0,-0.823688,-0.867284,...,부추전 요구르트,미니김밥 우동,,,,,,포기김치,멕시칸샐러드 무피클,겨자장 군고구마
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,925.0,330.0,-0.759085,-0.774691,...,요구르트,쌀밥/잡곡밥,차돌박이찌개,,콩나물무침,,감자소세지볶음,포기김치,,닭갈비
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,0.703470,-0.261438,-0.854545,-0.657366,391.0,1093.0,421.0,-0.931359,-0.879630,...,수제함박스테이크 견과류마카로니범벅 생깻잎지 오리엔탈D,김치볶음밥 미니쫄우동,,,브로콜리깨소스무침 고들빼기무침,맛살튀김,,겉절이김치,,계란후라이
1202,2021-01-21,0.703470,-0.113290,-0.254545,0.364955,351.0,832.0,353.0,-0.549125,-0.533951,...,동파육 봄동전 청경채/버섯숙회 초장 파인D,흑미밥,쇠고기무국,,숙주나물당근무침,,삼치양념구이 비엔나채소볶음,포기김치,,
1203,2021-01-22,0.703470,1.307190,0.054545,-0.664062,303.0,579.0,217.0,-0.134590,-0.422840,...,약고추장 요거닭 자몽D,흑미밥,수제비국,,참나물무침,,,갓김치/겉절이김치,,수제맛쵸킹탕수육 유부채소겨자냉채
1204,2021-01-25,0.703470,0.017429,-1.672727,0.708705,327.0,1145.0,502.0,-0.473755,-0.466049,...,깐풍연근 연두부 달래양념장 봄동겉절이/양념김 오렌지D,흑미밥,열무된장국,,,장어강정,,포기김치,,데리야끼소스 깻잎쌈 생강채 오이선


In [250]:
def embedding(s):
    return model.wv[s]

when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동", 
             "찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료", 
             "무침/자반/잡채/나물",
             "강정/튀김/가스",
             "조림/볶음/구이/고기", 
             "김치/깍두기/박지/절이", 
             "러드/피클/생채", 
             "나머지"]
for w in when:
    for sm in sub_menus:
        train_merge[w + "/" + sm] = train_merge[w + "/" + sm].apply(embedding)
        test_merge[w + "/" + sm] = test_merge[w + "/" + sm].apply(embedding)
        
# for menu in menus:
#     for sm in sub_menus:
#         train_merge[menu + "_" + sm] = train_merge[menu].apply(dish_split)
#         test_merge[menu] = test_merge[menu].apply(dish_split)

In [251]:
train_merge

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),...,중식/나머지,석식/밥/빵/죽/국수/닝롤/게티/이스/우동,석식/찌개/탕/국/스프/계장/개장,석식/우유/두유/주스/음료,석식/무침/자반/잡채/나물,석식/강정/튀김/가스,석식/조림/볶음/구이/고기,석식/김치/깍두기/박지/절이,석식/러드/피클/생채,석식/나머지
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,1039.0,331.0,-0.629879,-0.891975,...,"[0.06123111, 0.24215324, -0.23535733, 0.049104...","[0.039512787, 0.011914028, -0.19772388, 0.3084...","[0.42454818, 0.19554946, -0.41583803, 0.105381...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.0711304, 0.050526805, -0.052438974, 0.03571...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.110009655, 0.037517834, -0.11880417, -0.039...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ..."
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,867.0,560.0,-0.899058,-0.996914,...,"[0.18864071, 0.2874925, -0.40239516, 0.0505694...","[-0.06617575, 0.19931826, -0.19965002, 0.30812...","[-0.06871803, 0.03420794, -0.17168728, 0.15848...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.054877255, 0.1008328, -0.15888143, 0.057159...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.013614337, 0.083734766, -0.112081654, 0.136..."
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,1017.0,573.0,-0.909825,-0.959877,...,"[0.033488862, 0.18660572, -0.399411, 0.0566318...","[0.039512787, 0.011914028, -0.19772388, 0.3084...","[0.006723575, -0.09356738, 0.15689348, 0.42875...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.03827649, 0.080033176, -0.14924063, 0.08249...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.2831434, 0.09640077, 0.108128935, 0.2683646..."
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,978.0,525.0,-0.823688,-0.867284,...,"[-0.013874763, 0.24031845, -0.33599004, 0.1554...","[-0.0147821745, 0.1817861, -0.5566889, 0.32566...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[0.036774207, 0.19492507, -0.15549977, 0.09515...","[-0.025903594, 0.082926884, -0.2789544, 0.0926..."
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,925.0,330.0,-0.759085,-0.774691,...,"[0.18864071, 0.2874925, -0.40239516, 0.0505694...","[0.039512787, 0.011914028, -0.19772388, 0.3084...","[-0.043565013, -0.053065214, -0.17032932, -0.0...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.0076644006, 0.22639814, -0.038856167, 0.124...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.0025133418, -0.097640745, -0.097693644, 0.1...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[-0.07495984, 0.18634775, -0.14925787, 0.13923..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,2021-01-20,0.703470,-0.261438,-0.854545,-0.657366,391.0,1093.0,421.0,-0.931359,-0.879630,...,"[0.037776615, 0.08447772, -0.26715237, 0.02527...","[0.003475602, 0.15442632, -0.34556776, 0.24280...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.09540566, 0.14371218, -0.2208508, -0.041702...","[-0.15285681, 0.22897917, -0.32374302, 0.20328...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.12015064, 0.037288763, 0.09474858, 0.202008...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.078759074, 0.06197614, -0.5235041, -0.12553..."
1202,2021-01-21,0.703470,-0.113290,-0.254545,0.364955,351.0,832.0,353.0,-0.549125,-0.533951,...,"[0.03587674, 0.06961125, -0.19207256, 0.075264...","[-0.23638344, -0.14273831, -0.62805855, 0.1867...","[0.25999764, 0.01204755, 0.2204451, 0.3623595,...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.011127616, 0.29098678, 0.0058502913, 0.1886...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.04606033, 0.08580454, -0.13922659, 0.229882...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ..."
1203,2021-01-22,0.703470,1.307190,0.054545,-0.664062,303.0,579.0,217.0,-0.134590,-0.422840,...,"[-0.10242853, 0.0016085552, -0.21431574, 0.142...","[-0.23638344, -0.14273831, -0.62805855, 0.1867...","[0.08117783, 0.10598267, 0.07819572, 0.3265746...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.11265408, 0.21106789, 0.028074576, -0.02937...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.11358119, 0.15188475, 0.061553266, 0.176443...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.062350705, -0.010787534, -0.050887395, 0.14..."
1204,2021-01-25,0.703470,0.017429,-1.672727,0.708705,327.0,1145.0,502.0,-0.473755,-0.466049,...,"[0.057585523, 0.06254523, -0.2235472, 0.030020...","[-0.23638344, -0.14273831, -0.62805855, 0.1867...","[0.09355379, 0.12145938, -0.08495324, 0.253330...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.15787554, -0.05186699, -0.09553395, -0.1396...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.17553972, 0.05005726, 0.080552146, 0.199080...","[18.42995, -5.236802, -10.946865, -18.501957, ...","[0.02545427, 0.13196276, -0.19843788, -0.06703..."


In [252]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# std scaling embedded vecs

# scaled_menus_train = {}
# scaled_menus_test = {}

# menus = ["조식메뉴", "중식메뉴", "석식메뉴"]

# for menu in menus:
#     scaled_menus_train[menu] = StandardScaler().fit_transform(train_merge[menu].to_list())
#     scaled_menus_test[menu] = StandardScaler().fit_transform(test_merge[menu].to_list())
#     for idx, val in enumerate(scaled_menus_train[menu]):
#         train_merge[menu][idx] = val
#     for idx, val in enumerate(scaled_menus_test[menu]):
#         test_merge[menu][idx] = val

In [253]:
# seperate each vecs to new cols

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge[tmp] = pd.DataFrame(train_merge[tmp_col].to_list(), columns = tmp)
        test_merge[tmp] = pd.DataFrame(test_merge[tmp_col].to_list(), columns = tmp)
        train_merge.drop([tmp_col], inplace = True, axis = 1)
        test_merge.drop([tmp_col], inplace = True, axis = 1)

In [254]:
list(train_merge.columns)

['일자',
 '본사정원수',
 '본사휴가자수',
 '본사출장자수',
 '본사시간외근무명령서승인건수',
 '현본사소속재택근무자수',
 '중식계',
 '석식계',
 '최저기온(°C)',
 '평균기온(°C)',
 '최고기온(°C)',
 '평균 풍속(m/s)',
 '최대 풍속(m/s)',
 '일강수량(mm)',
 '휴일전후날여부',
 '본사실근무자수',
 '요일_금',
 '요일_목',
 '요일_수',
 '요일_월',
 '요일_화',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동0',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동1',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동2',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동3',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동4',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동5',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동6',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동7',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동8',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동9',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동10',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동11',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동12',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동13',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동14',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동15',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동16',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동17',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동18',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동19',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동20',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동21',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동22',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동23',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동24',
 '조식/밥/빵/죽/국수/닝롤/게티

In [255]:
from sklearn.decomposition import PCA

# PCA 300 -> 20
pca_target = 20
pca = PCA(n_components = pca_target)
for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        target = [tmp_col + "_pca" + str(i) for i in range(pca_target)]
        train_merge[target] = pca.fit_transform(train_merge[tmp])
        test_merge[target] = pca.fit_transform(test_merge[tmp])
        

In [256]:
train_merge.describe()

Unnamed: 0,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),최고기온(°C),...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
count,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,...,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0,1206.0
mean,0.150392,0.468825,-0.073149,-0.05485,43.470149,889.913765,461.606965,0.023298,-0.01024929,-0.0344931,...,-3.322075e-15,6.371332e-15,5.802925e-15,-1.336758e-14,-1.059323e-14,-4.39125e-15,-9.613961e-15,9.693324e-15,-5.482589e-15,2.767008e-15
std,0.540265,1.284216,0.798077,0.549611,109.898914,209.927051,139.240321,0.555265,0.575525,0.5849449,...,0.3080067,0.29431,0.2925154,0.285154,0.2714847,0.2631044,0.2526804,0.2433675,0.2337963,0.2256521
min,-0.501577,-0.714597,-3.709091,-0.666295,0.0,296.0,0.0,-1.238223,-1.416667,-1.483553,...,-0.9007512,-1.016102,-1.018988,-0.9130234,-1.120406,-1.010482,-1.107076,-0.9694092,-1.253943,-0.949683
25%,-0.362776,-0.296296,-0.509091,-0.657366,0.0,758.0,406.0,-0.462988,-0.5030864,-0.5296053,...,-0.1807644,-0.1562152,-0.1406833,-0.1529045,-0.08248253,-0.1087329,-0.1001035,-0.1008346,-0.08211277,-0.08955595
50%,0.0,0.0,0.0,0.0,0.0,879.0,483.0,0.0,-5.486063e-17,1.16877e-16,...,6.848527e-05,-0.0001074603,-0.0003226199,8.002243e-05,3.261128e-05,-0.0001280674,-0.0002540042,-1.469317e-05,-1.17954e-05,-6.376249e-06
75%,0.637224,0.703704,0.490909,0.342634,0.0,1031.75,545.0,0.537012,0.4969136,0.4703947,...,0.1178332,0.1088095,0.1100736,0.08099198,0.1145211,0.1158508,0.1165836,0.1180238,0.1125372,0.08175675
max,1.719243,9.751634,2.418182,1.664062,533.0,1459.0,905.0,1.033647,1.033951,1.134868,...,1.398756,1.191849,1.87737,1.231177,1.290568,1.155561,1.410831,1.171737,1.417686,1.086008


In [257]:
# del cols before PCA

for w in when:
    for sm in sub_menus:
        tmp_col = w + "/" + sm
        tmp = [tmp_col + str(i) for i in range(300)]
        train_merge.drop(tmp, inplace = True, axis = 1)
        test_merge.drop(tmp, inplace = True, axis = 1)
train_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,1039.0,331.0,-0.629879,-0.891975,...,6.8e-05,-0.000107,-0.000323,8e-05,3.3e-05,-0.000128,-0.000254,-1.5e-05,-1.2e-05,-6e-06
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,867.0,560.0,-0.899058,-0.996914,...,-0.158692,0.05435,-0.175273,-0.180672,-0.042417,0.379392,-0.137232,0.011974,-0.372467,-0.05472
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,1017.0,573.0,-0.909825,-0.959877,...,1.043669,-0.017744,-0.080671,-0.506048,0.593968,0.287644,-0.351895,0.118024,0.079964,0.079414
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,978.0,525.0,-0.823688,-0.867284,...,0.04345,0.0881,0.235829,-0.196727,0.226014,-0.033359,0.067479,0.275412,0.010523,-0.182771
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,925.0,330.0,-0.759085,-0.774691,...,-0.246735,1.191849,0.718062,0.053783,-0.209676,-1.010482,0.110405,0.081126,0.429128,0.221087


In [258]:
list(train_merge.columns)

['일자',
 '본사정원수',
 '본사휴가자수',
 '본사출장자수',
 '본사시간외근무명령서승인건수',
 '현본사소속재택근무자수',
 '중식계',
 '석식계',
 '최저기온(°C)',
 '평균기온(°C)',
 '최고기온(°C)',
 '평균 풍속(m/s)',
 '최대 풍속(m/s)',
 '일강수량(mm)',
 '휴일전후날여부',
 '본사실근무자수',
 '요일_금',
 '요일_목',
 '요일_수',
 '요일_월',
 '요일_화',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca0',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca1',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca2',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca3',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca4',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca5',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca6',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca7',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca8',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca9',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca10',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca11',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca12',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca13',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca14',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca15',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca16',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca17',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca18',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca19',
 '조식/찌개/탕/국/스프/계장/개장_pca0',
 '조식/찌개/탕/국/스프/계장/개장_pca1',
 '조식/찌개/탕/국/스프/계장/개장_pca2',
 '조식/

In [259]:
import pandas_profiling
from pandas_profiling import ProfileReport

# profile = train_merge.profile_report()
# profile

In [260]:
train_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,중식계,석식계,최저기온(°C),평균기온(°C),...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
0,2016-02-01,-0.501577,-0.479303,-1.727273,-0.135045,0.0,1039.0,331.0,-0.629879,-0.891975,...,6.8e-05,-0.000107,-0.000323,8e-05,3.3e-05,-0.000128,-0.000254,-1.5e-05,-1.2e-05,-6e-06
1,2016-02-02,-0.501577,-0.479303,-1.309091,0.045759,0.0,867.0,560.0,-0.899058,-0.996914,...,-0.158692,0.05435,-0.175273,-0.180672,-0.042417,0.379392,-0.137232,0.011974,-0.372467,-0.05472
2,2016-02-03,-0.501577,-0.427015,-1.181818,-0.418527,0.0,1017.0,573.0,-0.909825,-0.959877,...,1.043669,-0.017744,-0.080671,-0.506048,0.593968,0.287644,-0.351895,0.118024,0.079964,0.079414
3,2016-02-04,-0.501577,-0.008715,-0.454545,0.126116,0.0,978.0,525.0,-0.823688,-0.867284,...,0.04345,0.0881,0.235829,-0.196727,0.226014,-0.033359,0.067479,0.275412,0.010523,-0.182771
4,2016-02-05,-0.501577,1.507625,-1.163636,-0.590402,0.0,925.0,330.0,-0.759085,-0.774691,...,-0.246735,1.191849,0.718062,0.053783,-0.209676,-1.010482,0.110405,0.081126,0.429128,0.221087


In [261]:
test_merge.head()

Unnamed: 0,일자,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,최저기온(°C),평균기온(°C),최고기온(°C),평균 풍속(m/s),...,석식/나머지_pca10,석식/나머지_pca11,석식/나머지_pca12,석식/나머지_pca13,석식/나머지_pca14,석식/나머지_pca15,석식/나머지_pca16,석식/나머지_pca17,석식/나머지_pca18,석식/나머지_pca19
0,2021-01-27,0.176471,-0.118143,-0.298182,-0.677206,0.763158,-0.271429,-0.559748,-0.362667,-0.102564,...,-0.208838,-0.54074,-0.297357,-0.271108,0.17709,-0.156607,0.035215,-0.347656,0.154269,-0.112276
1,2021-01-28,0.176471,0.151899,0.138182,-0.083088,0.631579,-0.528571,-0.899371,-0.437333,1.333333,...,-1.4e-05,4e-05,2e-06,-6.3e-05,-0.00012,-6.7e-05,0.000107,-1.8e-05,6.3e-05,4.3e-05
2,2021-01-29,0.176471,2.953586,0.676364,-0.684559,-0.078947,-0.957143,-1.36478,-1.258667,1.128205,...,-0.131708,-0.087818,0.17444,-0.089296,0.279166,-0.138615,0.053319,-0.158324,-0.101789,0.219498
3,2021-02-01,-0.980392,0.219409,-0.705455,0.106618,0.289474,0.514286,-0.132075,-0.448,0.0,...,-1.4e-05,4e-05,2e-06,-6.3e-05,-0.00012,-6.7e-05,0.000107,-1.8e-05,6.3e-05,4.3e-05
4,2021-02-02,-0.980392,-0.556962,-0.24,-0.015441,0.184211,-0.842857,-1.025157,-0.778667,1.333333,...,-1.4e-05,4e-05,2e-06,-6.3e-05,-0.00012,-6.7e-05,0.000107,-1.8e-05,6.3e-05,4.3e-05


In [262]:
submission = test_merge[["일자"]]
submission.head()

Unnamed: 0,일자
0,2021-01-27
1,2021-01-28
2,2021-01-29
3,2021-02-01
4,2021-02-02


In [263]:
from pycaret.regression import *

when = ["조식", "중식", "석식"]
sub_menus = ["밥/빵/죽/국수/닝롤/게티/이스/우동", 
             "찌개/탕/국/스프/계장/개장", 
             "우유/두유/주스/음료", 
             "무침/자반/잡채/나물",
             "강정/튀김/가스",
             "조림/볶음/구이/고기", 
             "김치/깍두기/박지/절이", 
             "러드/피클/생채", 
             "나머지"]

exp_lunch = setup(train_merge, target = '중식계', 
                  ignore_features = ['석식계'])

Unnamed: 0,Description,Value
0,session_id,7833
1,Target,중식계
2,Original Data,"(1206, 561)"
3,Missing Values,False
4,Numeric Features,557
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(844, 571)"


In [264]:
best_4_lunch = compare_models(sort = 'MAE', n_select = 4)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,75.8026,10076.3677,99.922,0.7692,0.1264,0.0944,0.374
lasso,Lasso Regression,78.0775,10796.6929,103.331,0.7528,0.1292,0.0955,0.021
et,Extra Trees Regressor,79.0343,11373.611,106.2231,0.7407,0.1346,0.0984,0.491
lightgbm,Light Gradient Boosting Machine,80.3215,11192.3941,105.4372,0.743,0.1324,0.0991,0.234
rf,Random Forest Regressor,82.4554,12109.9955,109.5381,0.7238,0.1388,0.1032,0.709
ada,AdaBoost Regressor,85.3708,11864.2213,108.5136,0.7283,0.1369,0.1064,0.178
omp,Orthogonal Matching Pursuit,88.0626,14073.602,118.1048,0.6789,0.1519,0.1071,0.244
llar,Lasso Least Angle Regression,90.7787,13697.9331,116.7052,0.6868,0.1492,0.1147,0.376
en,Elastic Net,103.5923,16933.5288,129.9169,0.6117,0.1608,0.1284,0.019
dt,Decision Tree Regressor,113.1468,22797.3475,150.675,0.4746,0.1919,0.1378,0.034


In [265]:
blended_lunch = blend_models(estimator_list = best_4_lunch, fold = 5)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,72.4227,8903.216,94.3569,0.7719,0.1087,0.0867
1,76.359,8966.4806,94.6915,0.7933,0.112,0.089
2,66.4717,8078.5324,89.8807,0.8228,0.1144,0.0828
3,74.7194,9771.3121,98.8499,0.7826,0.1397,0.0994
4,76.7286,11587.4089,107.6448,0.75,0.1374,0.0955
Mean,73.3403,9461.39,97.0848,0.7841,0.1225,0.0907
SD,3.7554,1190.361,5.995,0.0241,0.0133,0.006


In [266]:
prediction_lunch = predict_model(blended_lunch)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,71.5521,9078.7369,95.2824,0.7939,0.1145,0.0861


In [267]:
final_lunch = finalize_model(blended_lunch)

In [268]:
final_prediction_lunch = predict_model(final_lunch, data = test_merge)

In [269]:
submission["중식계"] = final_prediction_lunch["Label"]

In [270]:
from pycaret.regression import *
exp_dinner = setup(train_merge, target = '석식계', 
                   ignore_features = ['중식계'])

Unnamed: 0,Description,Value
0,session_id,793
1,Target,석식계
2,Original Data,"(1206, 561)"
3,Missing Values,False
4,Numeric Features,557
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(844, 572)"


In [271]:
best_4_dinner = compare_models(sort = 'MAE', n_select = 4)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,52.3923,5391.8921,72.4445,0.7043,0.4898,0.1273,0.461
gbr,Gradient Boosting Regressor,55.3174,5840.84,75.7338,0.6791,0.6467,0.1327,0.381
lightgbm,Light Gradient Boosting Machine,55.3363,5886.4911,76.257,0.6754,0.5118,0.1349,0.223
rf,Random Forest Regressor,55.9422,6176.6663,77.8333,0.6588,0.4817,0.1364,0.699
lasso,Lasso Regression,58.2889,6017.9754,76.9542,0.6688,0.7254,0.137,0.021
ada,AdaBoost Regressor,65.1512,7328.5151,85.0992,0.5974,0.7887,0.1539,0.165
omp,Orthogonal Matching Pursuit,66.3398,7865.0786,87.8159,0.5696,0.714,0.1555,0.02
en,Elastic Net,67.5438,8128.5458,89.7193,0.552,0.8238,0.1595,0.018
llar,Lasso Least Angle Regression,77.4428,10370.7874,101.3232,0.4449,0.977,0.1741,0.02
dt,Decision Tree Regressor,79.6557,11734.8377,107.7802,0.3509,0.4637,0.19,0.035


In [272]:
blended_dinner = blend_models(estimator_list = best_4_dinner, fold = 5)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,52.0473,4790.2573,69.2117,0.7785,0.7584,0.1434
1,49.2283,4776.4404,69.1118,0.718,0.3819,0.1059
2,52.8764,5203.4255,72.1348,0.7411,0.6029,0.1404
3,58.7946,6801.1445,82.4691,0.6606,0.7125,0.1257
4,57.3905,6907.905,83.1138,0.5995,0.3707,0.1408
Mean,54.0674,5695.8346,75.2082,0.6995,0.5653,0.1313
SD,3.53,959.0253,6.2895,0.063,0.1624,0.0141


In [273]:
prediction_dinner = predict_model(blended_dinner)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,50.4052,4466.7099,66.8334,0.771,0.7478,0.1214


In [274]:
final_dinner = finalize_model(blended_dinner)

In [275]:
final_prediction_dinner = predict_model(final_dinner, data = test_merge)

In [276]:
submission["석식계"] = final_prediction_dinner["Label"]

In [277]:
submission.head(20)

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,905.919532,427.03951
1,2021-01-28,780.890351,474.448908
2,2021-01-29,529.935072,348.885352
3,2021-02-01,1242.7255,544.433086
4,2021-02-02,978.048003,537.721934
5,2021-02-03,976.119536,496.408024
6,2021-02-04,931.965744,519.918156
7,2021-02-05,673.750562,375.913919
8,2021-02-08,1213.420807,547.155117
9,2021-02-09,953.415957,520.741236


In [278]:
submission.to_csv('submission_성동현.csv', index = False, encoding = "UTF-8")

In [279]:
test.head(20)

Unnamed: 0,일자,요일,본사정원수,본사휴가자수,본사출장자수,본사시간외근무명령서승인건수,현본사소속재택근무자수,조식메뉴,중식메뉴,석식메뉴
0,2021-01-27,수,2983,88,182,5,358.0,모닝롤/연유버터베이글 우유/주스 계란후라이/찐계란 단호박죽/흑미밥 우거지국 고기완자...,쌀밥/흑미밥/찰현미밥 대구지리 매운돈갈비찜 오꼬노미계란말이 상추무침 포기김치 양상추...,흑미밥 얼큰순두부찌개 쇠고기우엉볶음 버섯햄볶음 (New)아삭이고추무절임 포기김치
1,2021-01-28,목,2983,104,212,409,348.0,모닝롤/대만샌드위치 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 황태국 시래기지짐 ...,쌀밥/보리밥/찰현미밥 우렁된장찌개 오리주물럭 청양부추전 수제삼색무쌈 겉절이김치 양상...,충무김밥 우동국물 오징어무침 꽃맛살샐러드 얼갈이쌈장무침 석박지
2,2021-01-29,금,2983,270,249,0,294.0,모닝롤/핫케익 우유/주스 계란후라이/찐계란 오곡죽/흑미밥 매생이굴국 고구마순볶음 양...,쌀밥/흑미밥/찰현미밥 팽이장국 수제돈까스*소스 가자미조림 동초나물무침 포기김치 양상...,흑미밥 물만둣국 카레찜닭 숯불양념꼬지어묵 꼬시래기무침 포기김치
3,2021-02-01,월,2924,108,154,538,322.0,모닝롤/촉촉한치즈케익 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 두부김칫국 새우완...,쌀밥/흑미밥/찰현미밥 배추들깨국 오리대패불고기 시금치프리타타 부추고추장무침 포기김치...,흑미밥 동태탕 돈육꽈리고추장조림 당면채소무침 모자반무침 포기김치
4,2021-02-02,화,2924,62,186,455,314.0,모닝롤/토마토샌드 우유/주스 계란후라이/찐계란 채소죽/흑미밥 호박맑은국 오이생채 양...,쌀밥/팥밥/찰현미밥 부대찌개 닭살데리야끼조림 버섯탕수 세발나물무침 알타리김치/사과푸...,흑미밥 바지락살국 쇠고기청경채볶음 두부구이*볶은김치 머위된장무침 백김치
5,2021-02-03,수,2924,59,199,5,286.0,모닝롤/게살모닝샌드 우유/주스 계란후라이/찐계란 소고기죽/흑미밥 시래기된장국 베이컨...,쌀밥/흑미밥/찰현미밥 아욱국 매콤해물볶음 감자조림 미나리나물 포기김치 콥샐러드*렌치D,오므라이스 가쓰오장국 빌소세지구이*구운채소 단감치커리무침 양념고추지 겉절이김치
6,2021-02-04,목,2924,61,211,476,288.0,모닝롤/사과파이 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 아욱국 새송이버섯곤약장...,쌀밥/차조밥/찰현미밥 설렁탕 고등어김치말이찜 볼어묵굴소스볶음 브로콜리숙회*초장 석박...,흑미밥 계란파국 돈육두루치기 감자채파프리카볶음 세발나물오리엔탈무침 포기김치
7,2021-02-05,금,2924,169,252,0,256.0,모닝롤/앙버터모닝빵 우유/주스 계란후라이/찐계란 고구마죽/흑미밥 옹심이국 머위나물무...,쌀밥/흑미밥/찰현미밥 북엇국 닭볶음탕 채소전*장 솎음열무나물무침 포기김치 양상추샐러...,유부초밥/추가밥 온메밀소바 국물떡볶이 순대찜*소금 청경채겉절이 포기김치
8,2021-02-08,월,2924,88,174,690,329.0,모닝롤/스콘 우유/주스 계란후라이/찐계란 누룽지탕/흑미밥 꽃게탕 근대나물무침 연두부...,쌀밥/흑미밥/찰현미밥 감자양파국 돈수육*씨앗쌈장 매콤어묵볶음 콩나물파채무침 포기김치...,흑미밥 냉이국 반반치킨 꼬막채소무침 청경채찜 포기김치
9,2021-02-09,화,2924,94,183,542,329.0,모닝롤/치즈팡샌드 우유/주스 계란후라이/찐계란 팥죽/흑미밥 맑은버섯국 시금치나물무침...,쌀밥/기장밥/찰현미밥 장각백숙 적어양념장구이 채소스틱*쌈장 도라지오이초무침 겉절이김...,흑미밥 미역국 매운소불고기 단호박두부탕수 메추리알장조림 석박지


In [282]:
list(train_merge.columns)

['일자',
 '본사정원수',
 '본사휴가자수',
 '본사출장자수',
 '본사시간외근무명령서승인건수',
 '현본사소속재택근무자수',
 '중식계',
 '석식계',
 '최저기온(°C)',
 '평균기온(°C)',
 '최고기온(°C)',
 '평균 풍속(m/s)',
 '최대 풍속(m/s)',
 '일강수량(mm)',
 '휴일전후날여부',
 '본사실근무자수',
 '요일_금',
 '요일_목',
 '요일_수',
 '요일_월',
 '요일_화',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca0',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca1',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca2',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca3',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca4',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca5',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca6',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca7',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca8',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca9',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca10',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca11',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca12',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca13',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca14',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca15',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca16',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca17',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca18',
 '조식/밥/빵/죽/국수/닝롤/게티/이스/우동_pca19',
 '조식/찌개/탕/국/스프/계장/개장_pca0',
 '조식/찌개/탕/국/스프/계장/개장_pca1',
 '조식/찌개/탕/국/스프/계장/개장_pca2',
 '조식/

In [284]:
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
import lightgbm as lgb

#Validation function
n_folds = 5

def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse= np.sqrt(-cross_val_score(model, train.values, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5)

In [285]:
score = rmsle_cv(GBoost)
print("Gradient Boosting score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))

NameError: name 'y_train' is not defined