In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping

In [54]:
total = pd.read_csv("../CSV/train_data.csv")
test = pd.read_csv("../CSV/pred_test.csv")

print("total shape : {}, test shape : {}".format(total.shape, test.shape))

total shape : (368088, 12), test shape : (15120, 12)


In [55]:
def nmae(true_df, pred_df):
    target_idx = true_df.iloc[:,0]
    pred_df = pred_df[pred_df.iloc[:,0].isin(target_idx)]
    pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True])
    true_df = true_df.sort_values(by=[true_df.columns[0]], ascending=[True])
    
    true = true_df.iloc[:,1].to_numpy()
    pred = pred_df.iloc[:,1].to_numpy()
    
    score = np.mean((np.abs(true-pred))/true)
    
    return score

In [56]:
train_years = [2013,2014,2015,2016,2017]
val_years = [2018]

train = total[total['year'].isin(train_years)]
val = total[total['year'].isin(val_years)]

# "year", "month", "day", "hour", "weekday", "구분_int", "기온(°C)"
features = ["month", "hour", "구분_int", "기온(°C)"]
# features = ["year", "month", "day", "hour", "weekday", "구분_int", "기온(°C)"]
train_x = train[features]
train_y = train['공급량']

val_x = val[features]
val_y = val['공급량']

X = total[features]
y = total["공급량"]

In [57]:
print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)
print(test.shape)

(306768, 4) (306768,)
(61320, 4) (61320,)
(15120, 12)


In [58]:
model = Sequential()
model.add(Dense(units = 16, input_dim = 4, activation = "relu"))
model.add(Dense(32, activation = "relu"))
model.add(Dense(64, activation = "relu"))
model.add(Dense(32, activation = "relu"))
model.add(Dense(16, activation = "relu"))
model.add(Dense(1))

model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 16)                80        
                                                                 
 dense_25 (Dense)            (None, 32)                544       
                                                                 
 dense_26 (Dense)            (None, 64)                2112      
                                                                 
 dense_27 (Dense)            (None, 32)                2080      
                                                                 
 dense_28 (Dense)            (None, 16)                528       
                                                                 
 dense_29 (Dense)            (None, 1)                 17        
                                                                 
Total params: 5,361
Trainable params: 5,361
Non-traina

In [59]:
model.compile(loss = "mean_squared_error", 
              optimizer = "rmsprop", 
              metrics = ["accuracy"])

early_stopping = EarlyStopping(patience = 30)

hist = model.fit(train_x, train_y, 
                 epochs = 100, 
                 batch_size = 10, 
                 validation_data = (val_x, val_y), 
                 callbacks = [early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100


Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


In [60]:
model.evaluate(val_x, val_y)



[32059.9921875, 0.0]

In [61]:
pred = model.predict(val_x)

true_y = pd.DataFrame({"y" : val_y})
pred_y = pd.DataFrame(pred)

true_y.reset_index(drop = True, inplace = True)
true_y.reset_index(inplace = True)
pred_y.reset_index(inplace = True)

print("NMAE : {:.6f}".format(nmae(true_y, pred_y)))

NMAE : 0.486311


In [63]:
pred_y

Unnamed: 0,index,0
0,0,1972.101318
1,1,1756.432861
2,2,1608.161865
3,3,1694.288208
4,4,1804.988403
...,...,...
61315,61315,649.565613
61316,61316,657.562561
61317,61317,634.136536
61318,61318,610.956726


In [64]:
true_y

Unnamed: 0,y
0,1765.008
1,1679.186
2,1610.885
3,1604.123
4,1711.506
...,...
61315,681.033
61316,669.961
61317,657.941
61318,610.953


In [65]:
submission = pd.read_csv('../CSV/가스공급량 수요예측 모델개발 data/sample_submission.csv')

In [66]:
test_x = test[features]
preds = model.predict(test_x)
submission['공급량'] = preds
submission.to_csv("../CSV/Submission/12_4_features_DeepLearning_model.csv", index = False)