In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
train_data = pd.read_csv("data/train_data.csv")
sample_submission = pd.read_csv("data/sample_submission.csv")
train_data = train_data.fillna(method = 'bfill')
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3859200 entries, 0 to 3859199
Data columns (total 13 columns):
 #   Column   Dtype  
---  ------   -----  
 0   TurbID   int64  
 1   Day      int64  
 2   Tmstamp  object 
 3   Wspd     float64
 4   Wdir     float64
 5   Etmp     float64
 6   Itmp     float64
 7   Ndir     float64
 8   Pab1     float64
 9   Pab2     float64
 10  Pab3     float64
 11  Prtv     float64
 12  Patv     float64
dtypes: float64(10), int64(2), object(1)
memory usage: 382.8+ MB


In [3]:
int_cols   = ['TurbID', 'Day']
float_cols = train_data.columns.drop(int_cols + ['Tmstamp'])

train_data[int_cols]   = train_data[int_cols].astype(np.int32)
train_data[float_cols] = train_data[float_cols].astype(np.float32)

In [4]:
def make_train_data(data):
    train_x, train_y = [], []
    for i in tqdm(sorted(pd.unique(data['TurbID']))):
        tmp_data = data[data['TurbID'] == i]
        for j in range(1, 201 - 6):  # 6일
            day_list       = [x for x in range(j, j+5)]
            label_day_list = [y for y in range(j+5, j+7)]
            
            train_tmp = tmp_data[tmp_data['Day'].isin(day_list)]
            label_tmp = tmp_data[tmp_data['Day'].isin(label_day_list)]['Patv']
            
            train_tmp = train_tmp.drop(["TurbID", "Day"], axis=1)
            train_x.append(np.array(train_tmp))
            train_y.append(np.array(label_tmp))
    return train_x, train_y

In [5]:
tms_list = list(pd.unique(train_data['Tmstamp']))
train_data['Tmstamp'] = train_data['Tmstamp'].apply(lambda x: tms_list.index(x)+1)
train_x, train_y = make_train_data(train_data)

100%|██████████| 134/134 [00:23<00:00,  5.79it/s]


In [6]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU

for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

2022-07-07 10:19:29.558953: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 10:19:29.562589: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 10:19:29.562754: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [7]:
train_x = np.array(train_x, dtype=np.float32).reshape(-1, train_x[0].shape[0], train_x[0].shape[1])
train_y = np.array(train_y, dtype=np.float32)

train_x.shape, train_y.shape

((25996, 720, 11), (25996, 288))

In [8]:
model = Sequential([
    GRU(256, input_shape=train_x[0].shape),
    Dense(516, activation='relu'),
    Dense(288, activation='relu')
])
optimizer = tf.optimizers.RMSprop(1e-3)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

2022-07-07 10:19:29.889396: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-07 10:19:29.890004: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 10:19:29.890164: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 10:19:29.890257: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [9]:
model.fit(train_x, train_y, epochs=10, batch_size=128);

2022-07-07 10:19:30.441021: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 823553280 exceeds 10% of free system memory.
2022-07-07 10:19:30.813504: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 823553280 exceeds 10% of free system memory.


Epoch 1/10


2022-07-07 10:19:32.520144: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101


  3/204 [..............................] - ETA: 7s - loss: 297132.5312 - mae: 350.3201  

2022-07-07 10:19:33.042891: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
test_data_list = [x for x in range(196, 201)]
test_data      = train_data[train_data["Day"].isin(test_data_list)]
test_data      = test_data.drop(["TurbID", "Day"], axis=1)
test_data      = np.array(test_data).reshape(-1, train_x[0].shape[0], train_x[0].shape[1])
test_data.shape

(134, 720, 11)

In [11]:
sample_submission['Patv'] = model.predict(test_data).reshape(-1)
sample_submission.to_csv("output/baseline1.csv", index=False)

