In [None]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder, RobustScaler, MinMaxScaler
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, recall_score, roc_curve, precision_score, f1_score, auc
import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow as tf
import keras
import keras.backend as K
from keras import metrics
from tensorflow.keras.models import Sequential, Model,load_model
from tensorflow.keras.layers import Permute,multiply,Add,Multiply,BatchNormalization,Dropout, Conv1D, Input, Flatten, Bidirectional, MaxPooling1D, Activation, Flatten, Dense, Dropout, BatchNormalization, LSTM, TimeDistributed, SpatialDropout1D, GaussianNoise
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.merge import concatenate
from sklearn.metrics import mean_absolute_error

# 데이터 불러오기

In [None]:
train=pd.read_csv('/content/drive/MyDrive/dacon/solar/train/train.csv')

In [None]:
for i in range(0,81):
    test = "test_%d = pd.read_csv('/content/drive/MyDrive/dacon/solar/test/%d.csv')"%(i,i)
    exec(test)

In [None]:
# DHI, DNI, T 변수만 활용
X_train=train.drop(['Day','Hour','Minute','WS','RH'],axis=1)
y_train=train[['TARGET']]

# 데이터 전처리

In [None]:
# Scaling 
scaling=RobustScaler()
X_train[X_train.columns]=scaling.fit_transform(X_train)

In [None]:
# X(7일), y1(1일 뒤 예측), y2(2일 뒤 예측) 데이터 분할
def multivariate_split(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
  data = []
  labels1 = []
  labels2 = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in tqdm(range(start_index, end_index)):
    indices = range(i-history_size, i, step)
    data.append(dataset.loc[indices])

    if single_step:
      labels.append(target.loc[i+target_size-1,:])
    else:
      labels1.append(target.loc[i:i+target_size-1-48,:])
      labels2.append(target.loc[i+48:i+target_size-1,:])

  return np.array(data), np.array(labels1), np.array(labels2)

In [None]:
history_size=336
target_size=96
step=1

X_train1, y_train1, y_train2 = multivariate_split(X_train,y_train,
                                      0, len(X_train)-96, 
                                      history_size, target_size, step)

HBox(children=(FloatProgress(value=0.0, max=52128.0), HTML(value='')))




In [None]:
# 연속된 Sequence Dataset Shuffle
s = np.arange(X_train1.shape[0])
np.random.shuffle(s)

X_train_1 = X_train1[s]
y_train_1 = y_train1[s]
y_train_2 = y_train2[s]

In [None]:
print(X_train_1.shape,y_train_1.shape,y_train_2.shape)

(52128, 336, 4) (52128, 48, 1) (52128, 48, 1)


# CNN Modeling

In [None]:
# custom loss function
def quantile_loss(q,y,f):
    e = (y-f)
    return K.mean(K.maximum(q*e, (q-1)*e), axis=-1)

In [None]:
def solar_model():
  num=64
  strides_size=1
  model_input = Input(shape=(336,4))
  model = Conv1D(num,3,padding='same',activation='relu')(model_input)
  model = Conv1D(num,7,padding='same',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num/2,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(num/4,2,padding='valid',activation='relu',dilation_rate=48)(model)
  model = Conv1D(1,1,activation='relu',dilation_rate=48)(model)
  model = Flatten()(model)
  model = Model(inputs=model_input, outputs = model)
  return model

In [None]:
qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
prediction=[]

for q in tqdm(qs):
  print('='*20 + ' ' + str(q) + 'day1' +' ' + '='*20)
  model_1 = solar_model()
  adam=keras.optimizers.Adam(lr=0.0001)
  model_1.compile(loss=lambda y,f: quantile_loss(q,y,f), optimizer=adam)
  es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
  mc = ModelCheckpoint('best_model_1.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
  history=model_1.fit(X_train_1, y_train_1, epochs=30, callbacks=[es,mc], validation_split=0.2, batch_size=128)
  model_1.load_weights('/content/best_model_1.h5')
  
  print('='*20 + ' ' + str(q) + 'day2' + ' ' + '='*20)
  model_2 = solar_model()
  adam=keras.optimizers.Adam(lr=0.0001)
  model_2.compile(loss=lambda y,f: quantile_loss(q,y,f), optimizer=adam)
  es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
  mc = ModelCheckpoint('best_model_2.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
  history=model_2.fit(X_train_1, y_train_2, epochs=30, callbacks=[es,mc], validation_split=0.2, batch_size=128)
  model_2.load_weights('/content/best_model_2.h5')

  pred_total = []
  for i in range(0,81):
    tmp = pd.read_csv(f'/content/drive/MyDrive/dacon/solar/test/{i}.csv')
    tmp = tmp.drop(['Day','Hour','Minute','WS','RH'],axis=1)
    tmp = scaling.transform(tmp)
    tmp=np.array(tmp).reshape(1,336,4)
    pred = []
    pred.extend(model_1.predict(tmp))
    pred.extend(model_2.predict(tmp))
    pred_total.append(pred)
  prediction.append(np.array(pred_total).reshape(96*81))


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))

Epoch 1/30

Epoch 00001: val_loss improved from inf to 1.41153, saving model to best_model_1.h5
Epoch 2/30

Epoch 00002: val_loss improved from 1.41153 to 1.38446, saving model to best_model_1.h5
Epoch 3/30

Epoch 00003: val_loss improved from 1.38446 to 1.36184, saving model to best_model_1.h5
Epoch 4/30

Epoch 00004: val_loss improved from 1.36184 to 1.34079, saving model to best_model_1.h5
Epoch 5/30

Epoch 00005: val_loss improved from 1.34079 to 1.31618, saving model to best_model_1.h5
Epoch 6/30

Epoch 00006: val_loss improved from 1.31618 to 1.30389, saving model to best_model_1.h5
Epoch 7/30

Epoch 00007: val_loss improved from 1.30389 to 1.26806, saving model to best_model_1.h5
Epoch 8/30

Epoch 00008: val_loss improved from 1.26806 to 1.23054, saving model to best_model_1.h5
Epoch 9/30

Epoch 00009: val_loss improved from 1.23054 to 1.20001, saving model to best_model_1.h5
Epoch 10/30

Epoch 00010: val_loss improved from 1.20001 to 1.17585, saving model to best_model_1.h5
Epo

In [None]:
# Prediction 및 Submission 파일 생성
df_final=pd.DataFrame(np.array(prediction).T)
submission=pd.read_csv('/content/drive/MyDrive/dacon/solar/sample_submission.csv')
cols=submission.columns.tolist()[1:]
submission[cols]=df_final.values
submission.to_csv('/content/drive/MyDrive/dacon/solar/submission.csv',index=False)