In [1]:
from typing import Tuple

import pandas as pd


def load_dataset(path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    train = pd.read_csv(path + "train.csv")
    test = pd.read_csv(path + "test.csv")

    drops = ["조식메뉴", "중식메뉴", "석식메뉴"]

    train = train.drop(drops, axis=1)
    test = test.drop(drops, axis=1)

    train["월"] = pd.DatetimeIndex(train["일자"]).month
    test["월"] = pd.DatetimeIndex(test["일자"]).month

    train["일"] = pd.DatetimeIndex(train["일자"]).day
    test["일"] = pd.DatetimeIndex(test["일자"]).day

    weekday = {"월": 1, "화": 2, "수": 3, "목": 4, "금": 5}

    train["요일"] = train["요일"].map(weekday)
    test["요일"] = test["요일"].map(weekday)

    train["식사가능자수"] = train["본사정원수"] - train["본사휴가자수"] - train["현본사소속재택근무자수"]
    test["식사가능자수"] = test["본사정원수"] - test["본사휴가자수"] - test["현본사소속재택근무자수"]

    train["중식참여율"] = train["중식계"] / train["식사가능자수"]
    train["석식참여율"] = train["석식계"] / train["식사가능자수"]

    features = ["월", "일", "요일", "식사가능자수", "본사출장자수", "본사시간외근무명령서승인건수"]
    labels = ["중식계", "석식계", "중식참여율", "석식참여율"]

    train = train[features + labels]
    test = test[features]

    # 요일을 석식 rank에 맞춰 mapping한 요일(석식) 칼럼 만들기.

    weekday_rank4dinner = {
        1: 1,
        2: 2,
        3: 5,
        4: 3,
        5: 4,
    }

    train["요일(석식)"] = train["요일"].map(weekday_rank4dinner)
    test["요일(석식)"] = test["요일"].map(weekday_rank4dinner)

    return train, test


In [2]:
import warnings

import neptune.new as neptune
import pandas as pd
from neptune.new.integrations.xgboost import NeptuneCallback
from xgboost import XGBRegressor

warnings.filterwarnings("ignore")

In [3]:
from sklearn.model_selection import train_test_split

train, test = load_dataset("../input/predict-meals/")


X_lunch = train[["월", "일", "요일", "식사가능자수", "본사출장자수", "본사시간외근무명령서승인건수"]]
y_lunch = train["중식계"]
X_test = test[["월", "일", "요일", "식사가능자수", "본사출장자수", "본사시간외근무명령서승인건수"]]

x_train, x_valid, y_train, y_valid = train_test_split(
    X_lunch, y_lunch, test_size=0.15, random_state=42
)

In [4]:
run = neptune.init(
    project="ds-wook/predict-meals",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmM2QyYmRjMy1mYmU5LTQ3ZmEtYTg0YS1hNWFhZmE1ZmVmZTQifQ==",
)
neptune_callback = NeptuneCallback(run=run, log_tree=[0, 1, 2, 3])

Info (NVML): NVML Shared Library Not Found. GPU usage metrics may not be reported. For more information, see https://docs-legacy.neptune.ai/logging-and-managing-experiment-results/logging-experiment-data.html#hardware-consumption 


https://app.neptune.ai/ds-wook/predict-meals/e/PRED-7
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [5]:
lunch_params = {
    "learning_rate": 0.09,
    "eval_metric": "mae",
}

lunch_model = XGBRegressor(**lunch_params)
lunch_model.fit(
    x_train,
    y_train,
    eval_set=[(x_train, y_train), (x_valid, y_valid)],
    early_stopping_rounds=100,
    verbose=100,
    callbacks=[neptune_callback],
)

[0]	validation_0-mae:811.55786	validation_1-mae:802.60211
[99]	validation_0-mae:36.03213	validation_1-mae:76.14484


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, eval_metric='mae', gamma=0,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.09, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [6]:
X_dinner = train[["월", "일", "요일(석식)", "식사가능자수", "본사출장자수", "본사시간외근무명령서승인건수"]]
y_dinner = train["석식계"]
run = neptune.init(
    project="ds-wook/predict-meals",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmM2QyYmRjMy1mYmU5LTQ3ZmEtYTg0YS1hNWFhZmE1ZmVmZTQifQ==",
)
neptune_callback = NeptuneCallback(run=run, log_tree=[0, 1, 2, 3])

https://app.neptune.ai/ds-wook/predict-meals/e/PRED-8
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [7]:
x_train, x_valid, y_train, y_valid = train_test_split(
    X_dinner, y_dinner, test_size=0.15, random_state=42
)

dinner_params = {
    "learning_rate": 0.089,
    "eval_metric": "mae",
}

dinner_model = XGBRegressor(**dinner_params)
dinner_model.fit(
    x_train,
    y_train,
    eval_set=[(x_train, y_train), (x_valid, y_valid)],
    early_stopping_rounds=100,
    verbose=100,
    callbacks=[neptune_callback],
)

[0]	validation_0-mae:421.87366	validation_1-mae:418.33295
[99]	validation_0-mae:24.93787	validation_1-mae:57.77809


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, eval_metric='mae', gamma=0,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.089, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)