## Import

In [None]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings(action='ignore') 

## Fixed Random-Seed

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load Data

In [3]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

## Train Data Pre-Processing

In [4]:
#결측값을 0으로 채웁니다
train_df = train_df.fillna(0)

In [5]:
#시계열 특성을 학습에 반영하기 위해 일시를 월, 일, 시간으로 나눕니다
train_df['month'] = train_df['일시'].apply(lambda x : int(x[4:6]))
train_df['day'] = train_df['일시'].apply(lambda x : int(x[6:8]))
train_df['time'] = train_df['일시'].apply(lambda x : int(x[9:11]))

In [6]:
train_x = train_df.drop(columns=['num_date_time', '일시', '일조(hr)', '일사(MJ/m2)', '전력소비량(kWh)'])
train_y = train_df['전력소비량(kWh)']

## Regression Model Fit

In [7]:
model = RandomForestRegressor()
model.fit(train_x, train_y)

RandomForestRegressor()

## Test Data Pre-Processing

In [8]:
test_df['month'] = test_df['일시'].apply(lambda x : int(x[4:6]))
test_df['day'] = test_df['일시'].apply(lambda x : int(x[6:8]))
test_df['time'] = test_df['일시'].apply(lambda x : int(x[9:11]))

In [9]:
test_x = test_df.drop(columns=['num_date_time', '일시'])

## Inference

In [10]:
preds = model.predict(test_x)

## Submission

In [11]:
submission = pd.read_csv('./sample_submission.csv')
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,0
1,1_20220825 01,0
2,1_20220825 02,0
3,1_20220825 03,0
4,1_20220825 04,0
...,...,...
16795,100_20220831 19,0
16796,100_20220831 20,0
16797,100_20220831 21,0
16798,100_20220831 22,0


In [12]:
submission['answer'] = preds
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,2127.2688
1,1_20220825 01,2090.5008
2,1_20220825 02,2009.9712
3,1_20220825 03,1981.9440
4,1_20220825 04,1946.7744
...,...,...
16795,100_20220831 19,893.0712
16796,100_20220831 20,784.7448
16797,100_20220831 21,748.2216
16798,100_20220831 22,654.1728


In [13]:
submission.to_csv('./baseline_submission.csv', index=False)