In [1]:
import numpy as np
import pandas as pd
import gc
import time
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')
%matplotlib inline

In [2]:
# baseline
import pandas as pd
import numpy as np

from glob import glob
from tqdm import tqdm
from scipy import interpolate

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, AveragePooling1D, GlobalAveragePooling1D

In [3]:
w_list = sorted(glob("Dataset/water_data/*.csv"))
w_list

['Dataset/water_data\\data_2012.csv',
 'Dataset/water_data\\data_2013.csv',
 'Dataset/water_data\\data_2014.csv',
 'Dataset/water_data\\data_2015.csv',
 'Dataset/water_data\\data_2016.csv',
 'Dataset/water_data\\data_2017.csv',
 'Dataset/water_data\\data_2018.csv',
 'Dataset/water_data\\data_2019.csv',
 'Dataset/water_data\\data_2020.csv',
 'Dataset/water_data\\data_2021.csv',
 'Dataset/water_data\\data_2022.csv']

In [4]:
pd.read_csv(w_list[0]).shape

(26496, 15)

In [5]:
pd.read_csv(w_list[0]).head(4)

Unnamed: 0,ymdhm,swl,inf,sfw,ecpc,tototf,tide_level,wl_1018662,fw_1018662,wl_1018680,fw_1018680,wl_1018683,fw_1018683,wl_1019630,fw_1019630
0,2012-05-01 00:00,24.8,555.0,219.07,24.93,555.0,445.0,310.7,469.05,300.2,0.0,290.0,729.8,275.3,540.18
1,2012-05-01 00:10,24.794,464.6,218.86,25.15,562.9,449.0,314.7,498.0,300.2,0.0,290.0,731.48,275.3,540.18
2,2012-05-01 00:20,24.789,478.1,218.69,25.31,576.4,451.0,313.7,490.68,301.2,0.0,290.0,726.42,275.3,540.18
3,2012-05-01 00:30,24.789,464.8,218.69,25.31,563.1,452.0,311.7,476.21,301.2,0.0,290.0,726.42,276.3,552.17


In [6]:
with tf.device("/device:GPU:0"):
    train_data = []
    train_label = []
    num = 0

    for i in w_list[:-1]:

        tmp = pd.read_csv(i)
        tmp = tmp.replace(" ", np.nan)
        tmp = tmp.interpolate(method = 'values')
        tmp = tmp.fillna(0)

        for j in tqdm(range(len(tmp)-432)):
            train_data.append(np.array(tmp.loc[j:j + 431, ["swl", "inf", "sfw", "ecpc",
                                                           "tototf", "tide_level",
                                                           "fw_1018662", "fw_1018680",
                                                           "fw_1018683", "fw_1019630"]]).astype(float))

            train_label.append(np.array(tmp.loc[j + 432, ["wl_1018662", "wl_1018680",
                                                          "wl_1018683", "wl_1019630"]]).astype(float))

100%|██████████| 26064/26064 [00:17<00:00, 1481.58it/s]
100%|██████████| 26064/26064 [00:17<00:00, 1491.09it/s]
100%|██████████| 26064/26064 [00:17<00:00, 1477.88it/s]
100%|██████████| 26064/26064 [00:17<00:00, 1468.53it/s]
100%|██████████| 26064/26064 [00:17<00:00, 1456.58it/s]
100%|██████████| 26064/26064 [00:18<00:00, 1415.29it/s]
100%|██████████| 26064/26064 [00:18<00:00, 1433.91it/s]
100%|██████████| 26064/26064 [00:18<00:00, 1422.71it/s]
100%|██████████| 26064/26064 [00:18<00:00, 1402.70it/s]
100%|██████████| 26064/26064 [00:18<00:00, 1389.04it/s]


In [7]:
train_data = np.array(train_data)
train_label = np.array(train_label)

print(train_data.shape)
print(train_label.shape)

(260640, 432, 10)
(260640, 4)


In [8]:
input_shape = (train_data[0].shape[0], train_data[0].shape[1])

model = Sequential()
model.add(GRU(256, input_shape=input_shape))
model.add(Dense(4, activation = 'relu'))

optimizer = tf.optimizers.RMSprop(0.001)

model.compile(optimizer=optimizer,loss='mse', metrics=['mae'])

In [9]:
with tf.device("/device:GPU:0"):
    model.fit(train_data, train_label, epochs=5, batch_size=256)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
with tf.device("/device:GPU:0"):
    test_data = []
    test_label = []

    tmp = pd.read_csv(w_list[-1])
    tmp = tmp.replace(" ", np.nan)
    # 이전값을 사용
    tmp = tmp.fillna(method = 'pad')
    tmp = tmp.fillna(0)

    #tmp.loc[:, ["wl_1018662", "wl_1018680", "wl_1018683", "wl_1019630"]] = tmp.loc[:, ["wl_1018662", "wl_1018680", "wl_1018683", "wl_1019630"]]*100

    for j in tqdm(range(4032, len(tmp)-432)):
        test_data.append(np.array(tmp.loc[j:j + 431, ["swl", "inf", "sfw", "ecpc",
                                                        "tototf", "tide_level",
                                                        "fw_1018662", "fw_1018680",
                                                        "fw_1018683", "fw_1019630"]]).astype(float))

        test_label.append(np.array(tmp.loc[j + 432, ["wl_1018662", "wl_1018680",
                                                        "wl_1018683", "wl_1019630"]]).astype(float))

100%|██████████| 6912/6912 [00:05<00:00, 1367.36it/s]


In [11]:
test_data = np.array(test_data)
test_label = np.array(test_label)

print(test_data.shape)
print(test_label.shape)

(6912, 432, 10)
(6912, 4)


In [12]:
with tf.device("/device:GPU:0"):
    pred = model.predict(test_data)



In [13]:
pred = pd.DataFrame(pred)

In [14]:
sample_submission = pd.read_csv("Dataset/sample_submission.csv")

sample_submission["wl_1018662"] = pred[0]
sample_submission["wl_1018680"] = pred[1]
sample_submission["wl_1018683"] = pred[2]
sample_submission["wl_1019630"] = pred[3]

In [15]:
sample_submission.to_csv("Dataset/baseline.csv", index = False)

### score : 30.01091