In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

In [29]:
total = pd.read_csv("../CSV/train_data.csv")
test = pd.read_csv("../CSV/pred_test.csv")

print("total shape : {}, test shape : {}".format(total.shape, test.shape))

total shape : (368088, 12), test shape : (15120, 12)


In [30]:
def nmae(true_df, pred_df):
    target_idx = true_df.iloc[:,0]
    pred_df = pred_df[pred_df.iloc[:,0].isin(target_idx)]
    pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True])
    true_df = true_df.sort_values(by=[true_df.columns[0]], ascending=[True])
    
    true = true_df.iloc[:,1].to_numpy()
    pred = pred_df.iloc[:,1].to_numpy()
    
    score = np.mean((np.abs(true-pred))/true)
    
    return score

In [31]:
train_years = [2013,2014,2015,2016,2017]
val_years = [2018]

train = total[total['year'].isin(train_years)]
val = total[total['year'].isin(val_years)]

# "year", "month", "day", "hour", "weekday", "구분_int", "기온(°C)"
features = ["month", "hour", "구분_int", "기온(°C)"]
# features = ["year", "month", "day", "hour", "weekday", "구분_int", "기온(°C)"]
train_x = train[features]
train_y = train['공급량']

val_x = val[features]
val_y = val['공급량']

X = total[features]
y = total["공급량"]

In [32]:
print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)
print(test.shape)

(306768, 4) (306768,)
(61320, 4) (61320,)
(15120, 12)


In [33]:
model = Sequential()
model.add(Dense(units = 16, input_dim = 4, activation = "relu"))
model.add(Dense(32, activation = "relu"))
model.add(Dense(64, activation = "relu"))
model.add(Dense(32, activation = "relu"))
model.add(Dense(16, activation = "relu"))
model.add(Dense(1))

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 16)                80        
                                                                 
 dense_13 (Dense)            (None, 32)                544       
                                                                 
 dense_14 (Dense)            (None, 64)                2112      
                                                                 
 dense_15 (Dense)            (None, 32)                2080      
                                                                 
 dense_16 (Dense)            (None, 16)                528       
                                                                 
 dense_17 (Dense)            (None, 1)                 17        
                                                                 
Total params: 5,361
Trainable params: 5,361
Non-traina

In [34]:
model.compile(loss = "mean_squared_error", 
              optimizer = "rmsprop", 
              metrics = ["accuracy"])

early_stopping = EarlyStopping(patience = 30)

hist = model.fit(train_x, train_y, 
                 epochs = 200, 
                 batch_size = 100, 
                 validation_data = (val_x, val_y), 
                 callbacks = [early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200


Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200


In [35]:
model.evaluate(val_x, val_y)



[32716.486328125, 0.0]

In [36]:
pred = model.predict(val_x)

true_y = pd.DataFrame({"y" : val_y})
pred_y = pd.DataFrame(pred)

true_y.reset_index(drop = True, inplace = True)
true_y.reset_index(inplace = True)
pred_y.reset_index(inplace = True)

print("NMAE : {:.6f}".format(nmae(true_y, pred_y)))

NMAE : 0.528145


In [24]:
pred_y

Unnamed: 0,index,0
0,0,2071.359375
1,1,1840.957764
2,2,1671.306763
3,3,1761.066162
4,4,1912.756104
...,...,...
61315,61315,629.390991
61316,61316,608.711670
61317,61317,592.297058
61318,61318,558.401062


In [25]:
true_y

Unnamed: 0,index,y
0,0,1765.008
1,1,1679.186
2,2,1610.885
3,3,1604.123
4,4,1711.506
...,...,...
61315,61315,681.033
61316,61316,669.961
61317,61317,657.941
61318,61318,610.953


In [26]:
submission = pd.read_csv('../CSV/가스공급량 수요예측 모델개발 data/sample_submission.csv')

In [27]:
test_x = test[features]
preds = model.predict(test_x)
submission['공급량'] = preds
submission.to_csv("../CSV/Submission/19_4_features_DeepLearning_model.csv", index = False)