In [1]:
import pandas as pd
import numpy as np
import lightgbm
from tqdm import tqdm
import warnings

import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, LSTM, Dropout, Activation

%matplotlib inline
# 경고 끄기
pd.set_option('mode.chained_assignment', None)
warnings.filterwarnings(action='ignore')

# 경로 지정
path = './data/public_data/'

In [2]:
train = pd.read_csv(path +'train.csv')
train.head(5)

Unnamed: 0,date,요일,배추_거래량(kg),배추_가격(원/kg),무_거래량(kg),무_가격(원/kg),양파_거래량(kg),양파_가격(원/kg),건고추_거래량(kg),건고추_가격(원/kg),...,청상추_거래량(kg),청상추_가격(원/kg),백다다기_거래량(kg),백다다기_가격(원/kg),애호박_거래량(kg),애호박_가격(원/kg),캠벨얼리_거래량(kg),캠벨얼리_가격(원/kg),샤인마스캇_거래량(kg),샤인마스캇_가격(원/kg)
0,2016-01-01,금요일,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2016-01-02,토요일,80860.0,329.0,80272.0,360.0,122787.5,1281.0,3.0,11000.0,...,5125.0,9235.0,434.0,2109.0,19159.0,2414.0,880.0,2014.0,0.0,0.0
2,2016-01-03,일요일,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2016-01-04,월요일,1422742.5,478.0,1699653.7,382.0,2315079.0,1235.0,699.0,4464.0,...,38525.5,7631.0,500702.0,2046.0,620539.0,2018.0,2703.8,3885.0,0.0,0.0
4,2016-01-05,화요일,1167241.0,442.0,1423482.3,422.0,2092960.1,1213.0,1112.6,4342.0,...,32615.0,6926.0,147638.0,2268.0,231958.0,2178.0,8810.0,2853.0,0.0,0.0


In [28]:
def preprocessing(temp_df, pum) :
    
    # 거래가 없는 날짜 제외
    temp_df = temp_df[(temp_df[f'{pum}_가격(원/kg)'] > 0) & (temp_df[f'{pum}_거래량(kg)'] > 0)].reset_index(drop = True)
    
    # 이동평균(5일,10일,20일) 추가 
    temp_df['p_ma5'] = temp_df[f'{pum}_가격(원/kg)'].rolling(window=5).mean().fillna(0)
    temp_df['p_ma10'] = temp_df[f'{pum}_가격(원/kg)'].rolling(window=10).mean().fillna(0)
    temp_df['p_ma20'] = temp_df[f'{pum}_가격(원/kg)'].rolling(window=20).mean().fillna(0)
    
    temp_df['q_ma5'] = temp_df[f'{pum}_거래량(kg)'].rolling(window=5).mean().fillna(0)
    temp_df['q_ma10'] = temp_df[f'{pum}_거래량(kg)'].rolling(window=10).mean().fillna(0)
    temp_df['q_ma20'] = temp_df[f'{pum}_거래량(kg)'].rolling(window=20).mean().fillna(0)
    
     # p_lag, q_lag 추가
    for lag in range(1,6) :
        temp_df[f'p_lag_{lag}'] = -1
        temp_df[f'q_lag_{lag}'] = -1
        for index in range(lag, len(temp_df)) :
            temp_df.loc[index, f'p_lag_{lag}'] = temp_df[f'{pum}_가격(원/kg)'][index-lag] #1일전, 2일전, ... 가격을 feature로 추가
            temp_df.loc[index, f'q_lag_{lag}'] = temp_df[f'{pum}_거래량(kg)'][index-lag] #1일전, 2일전, ... 거래량을 feature로 추가    
    
    # 예측 대상(1w,2w,4w) 추가
    for week in ['1_week','2_week','4_week'] :
        temp_df[week] = 0
        n_week = int(week[0])
        for index in range(len(temp_df)) :
            try : temp_df[week][index] = temp_df[f'{pum}_가격(원/kg)'][index+7*n_week]
            except : continue

    # 불필요한 column 제거        
    temp_df = temp_df.drop(['date'], axis=1)
    
    return temp_df

In [29]:
# preprocessing 함수 예시
pum = '배추'
temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
pp = preprocessing(temp_df, pum)

In [31]:
pp.tail(30)

Unnamed: 0,배추_거래량(kg),배추_가격(원/kg),p_ma5,p_ma10,p_ma20,q_ma5,q_ma10,q_ma20,p_lag_1,q_lag_1,...,q_lag_2,p_lag_3,q_lag_3,p_lag_4,q_lag_4,p_lag_5,q_lag_5,1_week,2_week,4_week
1450,1020033.2,1561.0,1457.8,1385.6,1244.9,1007047.9,1013679.31,1040226.37,1373.0,1124756.3,...,1119462.3,1538.0,1081785.4,1401.0,689202.3,1290.0,965833.7,1358,1748,3066
1451,763266.0,1564.0,1490.4,1393.3,1272.55,1021860.64,989366.01,1021159.44,1561.0,1020033.2,...,1124756.3,1416.0,1119462.3,1538.0,1081785.4,1401.0,689202.3,1329,2042,1867
1452,760499.0,1476.0,1478.0,1410.4,1299.4,957603.36,958628.44,1002757.87,1564.0,763266.0,...,1020033.2,1373.0,1124756.3,1416.0,1119462.3,1538.0,1081785.4,1614,2017,0
1453,1441152.8,1133.0,1421.4,1405.5,1300.8,1021941.46,992168.42,1030357.39,1476.0,760499.0,...,763266.0,1561.0,1020033.2,1373.0,1124756.3,1416.0,1119462.3,1994,1939,0
1454,1279591.6,1093.0,1365.4,1384.5,1302.45,1052908.52,1024558.26,1044450.16,1133.0,1441152.8,...,760499.0,1564.0,763266.0,1561.0,1020033.2,1373.0,1124756.3,1585,1983,0
1455,1144746.8,1150.0,1283.2,1370.5,1309.35,1077851.24,1042449.57,1036374.48,1093.0,1279591.6,...,1441152.8,1476.0,760499.0,1564.0,763266.0,1561.0,1020033.2,1542,1839,0
1456,895628.0,1445.0,1259.4,1374.9,1325.25,1104323.64,1063092.14,1025506.785,1150.0,1144746.8,...,1279591.6,1133.0,1441152.8,1476.0,760499.0,1564.0,763266.0,1576,1812,0
1457,698187.5,1358.0,1235.8,1356.9,1338.1,1091861.34,1024732.35,1019214.04,1445.0,895628.0,...,1144746.8,1093.0,1279591.6,1133.0,1441152.8,1476.0,760499.0,1748,2925,0
1458,1104424.8,1329.0,1275.0,1348.2,1344.8,1024515.74,1023228.6,1015096.78,1358.0,698187.5,...,895628.0,1150.0,1144746.8,1093.0,1279591.6,1133.0,1441152.8,2042,1813,0
1459,975020.2,1614.0,1379.2,1372.3,1364.85,963601.46,1008254.99,1006309.11,1329.0,1104424.8,...,698187.5,1445.0,895628.0,1150.0,1144746.8,1093.0,1279591.6,2017,1838,0


In [4]:
def nmae(week_answer, week_submission):
    answer = week_answer#.to_numpy()
    target_idx = np.where(answer!=0)
    true = answer[target_idx]
    pred = week_submission[target_idx]
    score = np.mean(np.abs(true-pred)/true)
    
    return score


def at_nmae(pred, dataset):
    y_true = dataset.get_label()
    week_1_answer = y_true[0::3]
    week_2_answer = y_true[1::3]
    week_4_answer = y_true[2::3]
    
    week_1_submission = pred[0::3]
    week_2_submission = pred[1::3]
    week_4_submission = pred[2::3]
    
    score1 = nmae(week_1_answer, week_1_submission)
    score2 = nmae(week_2_answer, week_2_submission)
    score4 = nmae(week_4_answer, week_4_submission)
    
    score = (score1+score2+score4)/3
    
    return 'score', score, False

In [5]:
unique_pum = [
    '배추', '무', '양파', '건고추','마늘',
    '대파', '얼갈이배추', '양배추', '깻잎',
    '시금치', '미나리', '당근',
    '파프리카', '새송이', '팽이버섯', '토마토',
]

unique_kind = [
    '청상추', '백다다기', '애호박', '캠벨얼리', '샤인마스캇'
]

In [6]:
def make_dataset(data, label, window_size=4):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [35]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

model_dict = {}
ts = 28
data_dim = 18
scaler_dict = {}

for pum in tqdm(unique_pum + unique_kind):
    # 품목 품종별 전처리
    temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
    temp_df = preprocessing(temp_df, pum).iloc[20:-28,:]
    
    # 주차별(1,2,4w) 학습
    for week_num in [1,2,4] :
        x = temp_df[temp_df[f'{week_num}_week']>0].iloc[:,:-3]
        y = temp_df[temp_df[f'{week_num}_week']>0][f'{week_num}_week']
        
        scaler = StandardScaler()
        X_scaler = scaler.fit(x)       
        scaler_dict[f'{pum}_model_{week_num}'] = X_scaler     
        x = pd.DataFrame(X_scaler.transform(x))
        
        X, Y = make_dataset(x,y,4)
        
        #train, test split
        x_train, x_valid, y_train, y_valid = train_test_split(X, Y, test_size=0.2,shuffle = False, stratify = None)
        print(x_train.shape, x_valid.shape, y_train.shape, y_valid.shape)
        
        model = tf.keras.models.Sequential([LSTM(128,
                                                 input_shape=(X.shape[1], X.shape[2]),
                                                 activation='relu', 
                                                 return_sequences=True),
                                            Dense(128),
                                            LSTM(128,
                                                 input_shape=(X.shape[1], X.shape[2]),
                                                 activation='relu'),
                                            Dense(1)
            ])
        
        model.compile(loss='mean_squared_error',optimizer='adam')
        model.fit(x_train,y_train, epochs = 10 , batch_size= 7, validation_data = (x_valid, y_valid))
        model_dict[f'{pum}_model_{week_num}'] = model

  0%|                                                                                           | 0/21 [00:00<?, ?it/s]

(1142, 4, 18) (286, 4, 18) (1142,) (286,)
Train on 1142 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1142, 4, 18) (286, 4, 18) (1142,) (286,)
Train on 1142 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1142, 4, 18) (286, 4, 18) (1142,) (286,)
Train on 1142 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  5%|███▉                                                                              | 1/21 [02:02<40:47, 122.39s/it]

(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 10%|███████▊                                                                          | 2/21 [03:52<36:30, 115.28s/it]

(1140, 4, 18) (286, 4, 18) (1140,) (286,)
Train on 1140 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1140, 4, 18) (286, 4, 18) (1140,) (286,)
Train on 1140 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1140, 4, 18) (286, 4, 18) (1140,) (286,)
Train on 1140 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 14%|███████████▋                                                                      | 3/21 [05:52<35:09, 117.20s/it]

(1125, 4, 18) (282, 4, 18) (1125,) (282,)
Train on 1125 samples, validate on 282 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1125, 4, 18) (282, 4, 18) (1125,) (282,)
Train on 1125 samples, validate on 282 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1125, 4, 18) (282, 4, 18) (1125,) (282,)
Train on 1125 samples, validate on 282 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 19%|███████████████▌                                                                  | 4/21 [07:47<32:57, 116.33s/it]

(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 24%|███████████████████▌                                                              | 5/21 [09:44<31:08, 116.81s/it]

(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 29%|███████████████████████▍                                                          | 6/21 [11:31<28:19, 113.29s/it]

(1136, 4, 18) (284, 4, 18) (1136,) (284,)
Train on 1136 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1136, 4, 18) (284, 4, 18) (1136,) (284,)
Train on 1136 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1136, 4, 18) (284, 4, 18) (1136,) (284,)
Train on 1136 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 33%|███████████████████████████▎                                                      | 7/21 [13:27<26:41, 114.38s/it]

(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 38%|███████████████████████████████▏                                                  | 8/21 [15:24<24:54, 114.99s/it]

(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1141, 4, 18) (286, 4, 18) (1141,) (286,)
Train on 1141 samples, validate on 286 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 43%|███████████████████████████████████▏                                              | 9/21 [17:07<22:15, 111.28s/it]

(1139, 4, 18) (285, 4, 18) (1139,) (285,)
Train on 1139 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1139, 4, 18) (285, 4, 18) (1139,) (285,)
Train on 1139 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1139, 4, 18) (285, 4, 18) (1139,) (285,)
Train on 1139 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 48%|██████████████████████████████████████▌                                          | 10/21 [19:07<20:53, 113.93s/it]

(1136, 4, 18) (285, 4, 18) (1136,) (285,)
Train on 1136 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1136, 4, 18) (285, 4, 18) (1136,) (285,)
Train on 1136 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1136, 4, 18) (285, 4, 18) (1136,) (285,)
Train on 1136 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 52%|██████████████████████████████████████████▍                                      | 11/21 [21:02<19:02, 114.23s/it]

(1135, 4, 18) (284, 4, 18) (1135,) (284,)
Train on 1135 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1135, 4, 18) (284, 4, 18) (1135,) (284,)
Train on 1135 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1135, 4, 18) (284, 4, 18) (1135,) (284,)
Train on 1135 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 57%|██████████████████████████████████████████████▎                                  | 12/21 [22:57<17:10, 114.46s/it]

(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 62%|██████████████████████████████████████████████████▏                              | 13/21 [24:49<15:11, 113.98s/it]

(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 67%|██████████████████████████████████████████████████████                           | 14/21 [26:43<13:17, 113.87s/it]

(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1132, 4, 18) (284, 4, 18) (1132,) (284,)
Train on 1132 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 71%|█████████████████████████████████████████████████████████▊                       | 15/21 [28:40<11:28, 114.83s/it]

(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 76%|█████████████████████████████████████████████████████████████▋                   | 16/21 [30:41<09:42, 116.50s/it]

(1137, 4, 18) (285, 4, 18) (1137,) (285,)
Train on 1137 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1137, 4, 18) (285, 4, 18) (1137,) (285,)
Train on 1137 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1137, 4, 18) (285, 4, 18) (1137,) (285,)
Train on 1137 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 81%|█████████████████████████████████████████████████████████████████▌               | 17/21 [32:39<07:48, 117.11s/it]

(1134, 4, 18) (284, 4, 18) (1134,) (284,)
Train on 1134 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1134, 4, 18) (284, 4, 18) (1134,) (284,)
Train on 1134 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1134, 4, 18) (284, 4, 18) (1134,) (284,)
Train on 1134 samples, validate on 284 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 86%|█████████████████████████████████████████████████████████████████████▍           | 18/21 [34:21<05:37, 112.57s/it]

(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1138, 4, 18) (285, 4, 18) (1138,) (285,)
Train on 1138 samples, validate on 285 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 90%|█████████████████████████████████████████████████████████████████████████▎       | 19/21 [36:17<03:47, 113.58s/it]

(1100, 4, 18) (276, 4, 18) (1100,) (276,)
Train on 1100 samples, validate on 276 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1100, 4, 18) (276, 4, 18) (1100,) (276,)
Train on 1100 samples, validate on 276 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(1100, 4, 18) (276, 4, 18) (1100,) (276,)
Train on 1100 samples, validate on 276 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 95%|█████████████████████████████████████████████████████████████████████████████▏   | 20/21 [38:13<01:54, 114.25s/it]

(696, 4, 18) (174, 4, 18) (696,) (174,)
Train on 696 samples, validate on 174 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(696, 4, 18) (174, 4, 18) (696,) (174,)
Train on 696 samples, validate on 174 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(696, 4, 18) (174, 4, 18) (696,) (174,)
Train on 696 samples, validate on 174 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


100%|█████████████████████████████████████████████████████████████████████████████████| 21/21 [39:28<00:00, 112.77s/it]


In [36]:
submission = pd.read_csv('./data/sample_submission.csv')
public_date_list = submission[submission['예측대상일자'].str.contains('2020')]['예측대상일자'].str.split('+').str[0].unique()
# ['2020-09-29', ...]

for date in tqdm(public_date_list) :
    test = pd.read_csv(f'./data/public_data/test_files/test_{date}.csv')
    for pum in unique_pum + unique_kind:
        # 예측기준일에 대해 전처리
        temp_test = pd.DataFrame([{'date' : date}]) #예측기준일
        alldata = pd.concat([train, test, temp_test], sort=False).reset_index(drop=True)
        alldata = alldata[['date', f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']].fillna(0)
        alldata = alldata.iloc[-28:].reset_index(drop=True)
        alldata = preprocessing(alldata, pum)
        temp_test = alldata
        
        x = temp_test.iloc[:,:-3]
        y = temp_test.iloc[:,-3:]
       
       
        # 개별 모델을 활용하여 1,2,4주 후 가격 예측
        for week_num in [1,2,4] :
            sc = scaler_dict[f'{pum}_model_{week_num}']
            x = pd.DataFrame(sc.transform(x))
            X, Y = make_dataset(x,y,4)
            
            temp_model = model_dict[f'{pum}_model_{week_num}']
            result = temp_model.predict(X)
            
            condition = (submission['예측대상일자']==f'{date}+{week_num}week')
            idx = submission[condition].index
            submission.loc[idx, f'{pum}_가격(원/kg)'] = result[0]

100%|██████████████████████████████████████████████████████████████████████████████████| 38/38 [03:19<00:00,  5.24s/it]


In [37]:
submission.to_csv('baseline_1028.csv',index=False)