In [29]:
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

path_gothic = "/usr/share/fonts/truetype/nanum/NanumGothic.ttf"
fontprop = fm.FontProperties(fname=path_gothic, size=20)

In [30]:
path = "../input/predict-meals/"
train = pd.read_csv(path + "train.csv")
test = pd.read_csv(path + "test.csv")
submit = pd.read_csv(path + "sample_submission.csv")

In [31]:
train[["현본사소속재택근무자수", "중식계", "석식계"]] = train[["현본사소속재택근무자수", "중식계", "석식계"]].astype(
    "int"
)
test["현본사소속재택근무자수"] = test["현본사소속재택근무자수"].astype("int")

train["일자"] = pd.to_datetime(train["일자"])
test["일자"] = pd.to_datetime(test["일자"])

train["년"] = train["일자"].dt.year
train["월"] = train["일자"].dt.month
train["일"] = train["일자"].dt.day
train["주"] = train["일자"].dt.week
train["요일"] = train["일자"].dt.weekday
train["출근"] = train["본사정원수"] - (
    train["본사휴가자수"] + train["본사출장자수"] + train["현본사소속재택근무자수"]
)
train["휴가비율"] = train["본사휴가자수"] / train["본사정원수"]
train["출장비율"] = train["본사출장자수"] / train["본사정원수"]
train["야근비율"] = train["본사시간외근무명령서승인건수"] / train["출근"]
train["재택비율"] = train["현본사소속재택근무자수"] / train["본사정원수"]
train["식사가능자수"] = train["본사정원수"] - train["본사휴가자수"] - train["현본사소속재택근무자수"]

test["식사가능자수"] = test["본사정원수"] - test["본사휴가자수"] - test["현본사소속재택근무자수"]
test["년"] = test["일자"].dt.year
test["월"] = test["일자"].dt.month
test["일"] = test["일자"].dt.day
test["주"] = test["일자"].dt.week
test["요일"] = test["일자"].dt.weekday
test["출근"] = test["본사정원수"] - (test["본사휴가자수"] + test["본사출장자수"] + test["현본사소속재택근무자수"])
test["휴가비율"] = test["본사휴가자수"] / test["본사정원수"]
test["출장비율"] = test["본사출장자수"] / test["본사정원수"]
test["야근비율"] = test["본사시간외근무명령서승인건수"] / test["출근"]
test["재택비율"] = test["현본사소속재택근무자수"] / test["본사정원수"]

  train["주"] = train["일자"].dt.week
  test["주"] = test["일자"].dt.week


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
# 대회 규칙
# 평가산식 : MAE(Mean Absolute Error)
lunch_model = RandomForestRegressor(criterion="mae", verbose=True)
dinner_model = BayesianRidge(n_iter=300, verbose=True)

In [5]:
x = train[
    [
        "년",
        "월",
        "일",
        "식사가능자수",
        "주",
        "요일",
        "출근",
        "휴가비율",
        "출장비율",
        "야근비율",
        "재택비율",
    ]
]
y = train["중식계"]
lunch_model.fit(x, y)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   10.5s finished


RandomForestRegressor(criterion='mae', verbose=True)

In [6]:
test_x = test[
    [
        "년",
        "월",
        "일",
        "주",
        "요일",
        "식사가능자수",
        "출근",
        "휴가비율",
        "출장비율",
        "야근비율",
        "재택비율",
    ]
]
y_pred = lunch_model.predict(test_x)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [7]:
submit["중식계"] = y_pred

In [8]:
weekday_rank4dinner = {
    0: 0,
    1: 1,
    2: 4,
    3: 2,
    4: 3,
}

train["요일(석식)"] = train["요일"].map(weekday_rank4dinner)
test["요일(석식)"] = test["요일"].map(weekday_rank4dinner)

In [9]:
x = train[
    [
        "년",
        "월",
        "일",
        "요일(석식)",
        "주",
        "식사가능자수",
        "출근",
        "휴가비율",
        "출장비율",
        "야근비율",
        "재택비율",
    ]
]
y = train["석식계"]
dinner_model.fit(x, y)

Convergence after  11  iterations


BayesianRidge(verbose=True)

In [10]:
test_x = test[
    [
        "년",
        "월",
        "일",
        "요일(석식)",
        "주",
        "식사가능자수",
        "출근",
        "휴가비율",
        "출장비율",
        "야근비율",
        "재택비율",
    ]
]
y_pred = dinner_model.predict(test_x)
submit["석식계"] = y_pred

In [11]:
submit.head()

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,751.26,306.32712
1,2021-01-28,850.17,433.161461
2,2021-01-29,623.17,274.084131
3,2021-02-01,869.31,538.020333
4,2021-02-02,889.18,507.284151


In [13]:
submit

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,751.26,306.32712
1,2021-01-28,850.17,433.161461
2,2021-01-29,623.17,274.084131
3,2021-02-01,869.31,538.020333
4,2021-02-02,889.18,507.284151
5,2021-02-03,770.57,351.758944
6,2021-02-04,887.15,499.320365
7,2021-02-05,671.54,331.240307
8,2021-02-08,877.49,569.251954
9,2021-02-09,868.12,515.371264


In [12]:
submit.to_csv("baseline_submit.csv", index=False)