# Import packages

In [1]:
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))

from common import *

# 1. Load dataset

In [2]:
train_data = pd.read_csv(join(PATH.input, "train_data.csv"))
sample_submission = pd.read_csv(join(PATH.input, "sample_submission.csv"))
train_data = train_data.fillna(method = 'bfill')
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3859200 entries, 0 to 3859199
Data columns (total 13 columns):
 #   Column   Dtype  
---  ------   -----  
 0   TurbID   int64  
 1   Day      int64  
 2   Tmstamp  object 
 3   Wspd     float64
 4   Wdir     float64
 5   Etmp     float64
 6   Itmp     float64
 7   Ndir     float64
 8   Pab1     float64
 9   Pab2     float64
 10  Pab3     float64
 11  Prtv     float64
 12  Patv     float64
dtypes: float64(10), int64(2), object(1)
memory usage: 382.8+ MB


# 2. Preprocessing

## 1) Training set

In [3]:
tms_list = list(pd.unique(train_data['Tmstamp']))
train_data['Tmstamp'] = train_data['Tmstamp'].apply(lambda x: tms_list.index(x)+1)

In [4]:
train_x, train_y = make_train_data(train_data, 5, 198)
train_x = train_x.reshape(-1, train_x[0].shape[0], train_x[0].shape[1])
train_x.shape, train_y.shape

100%|██████████| 134/134 [00:21<00:00,  6.24it/s]


((25728, 720, 11), (25728, 288))

## 2) Validation set

In [5]:
val_x, val_y = make_val_data(train_data, 5)
val_x = val_x.reshape(-1, train_x[0].shape[0], train_x[0].shape[1])
val_y = val_y.reshape(-1, train_y.shape[-1])
val_x.shape, val_y.shape

((134, 720, 11), (134, 288))

# 3. Modeling

In [6]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU

tf.keras.utils.set_random_seed(RANDOM_STATE)
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

2022-07-11 06:08:28.358129: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-11 06:08:28.361627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-11 06:08:28.361789: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [7]:
model = Sequential([
    GRU(256, input_shape=train_x[0].shape),
    Dense(516, activation='relu'),
    Dense(288, activation='relu')
])
optimizer = tf.optimizers.RMSprop(0.001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

2022-07-11 06:08:28.450426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-11 06:08:28.451222: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-11 06:08:28.451488: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-11 06:08:28.451674: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

# 4. Training

In [8]:
from tensorflow.keras.callbacks import EarlyStopping

model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=1000, batch_size=128, callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]);

Epoch 1/1000


2022-07-11 06:08:30.813263: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101


  3/201 [..............................] - ETA: 6s - loss: 320064.9062 - mae: 369.9096  

2022-07-11 06:08:31.342565: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000


# 5. Predict

In [11]:
test_data_list = [x for x in range(196, 201)]
test_data      = train_data[train_data["Day"].isin(test_data_list)]
test_data      = test_data.drop(["TurbID", "Day"], axis=1)
test_data      = np.array(test_data).reshape(-1, train_x[0].shape[0], train_x[0].shape[1])
test_data.shape

(134, 720, 11)

# 6. Generate submission

In [12]:
sample_submission['Patv'] = model.predict(test_data).reshape(-1)
sample_submission.to_csv(join(PATH.output, "proposed2.csv"), index=False)

