In [1]:
from datetime import datetime
import json
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta

%matplotlib inline


class Config:
    def __init__(self):
        pass


def set_config(config_dict):
    config = Config()
    config.__dict__ = config_dict
    return config


In [17]:
config = set_config({
    'row_start': '2004-10-16',
    'row_end': '2004-12-14',
    "timesteps": 2,
    "max_batch_size": 1,
    "layer_size": 1,
    "unit_size": 1,
    "dropout": 0,
    "learning_rate": 0.1,
    "max_epochs": 1,
    "time_col": 'tanggal',
    "feature": ['rr'],
})


def train_test_split(dataset, time_step=1):
    dataX, dataY = [], []
    # for i in range(len(dataset)-time_step-1):
    for i in range(len(dataset) - time_step):
        dataX.append(dataset[i:(i + time_step), 0])
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

def proyeksi_split(dataset, time_step=1):
    dataX = []
    for i in range(len(dataset) - time_step + 1):
        dataX.append(dataset[i:(i + time_step), 0])
    return np.array(dataX)

In [18]:
DATASETS = pd.read_csv('../Data/1985-2021.csv').replace(to_replace=[8888, 9999, 2555], value=np.nan)
DATASETS = DATASETS.loc[
  (DATASETS[config.time_col] >= config.row_start) & 
  # (DATASETS[config.time_col] <= (datetime.strptime(config.row_end, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d'))
  (DATASETS[config.time_col] <= config.row_end)
]
DATASETS.interpolate(inplace=True)

datelist = np.array([datetime.strptime(date, '%Y-%m-%d').date() for date in list(DATASETS[config.time_col])])

In [21]:
datelist[-4]

datetime.date(2004, 12, 11)

In [4]:
featureset = DATASETS[config.feature]
vector_featureset = featureset.values

scaller = MinMaxScaler()
vector_featureset_scaled = scaller.fit_transform(vector_featureset)

train_size = int(vector_featureset_scaled.size * 0.9)
trainset, testset = vector_featureset_scaled[0:train_size], vector_featureset_scaled[train_size:vector_featureset_scaled.size]
traindateset, testdateset = datelist[0:train_size], datelist[train_size:datelist.size]

X_train, y_train = train_test_split(trainset, time_step=config.timesteps)
X_train = np.reshape(X_train, (X_train.shape[0], len(config.feature), X_train.shape[1]))

In [16]:
[1,2,3]+[4,5,6]

[1, 2, 3, 4, 5, 6]

In [5]:
# y_pred_loss = list()
# y_true_loss = list()
# loss = list()


def mean_squared_error(y_true, y_pred):
    # y_pred_loss.append(y_pred)
    # y_true_loss.append(y_true)
    # loss.append(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true)))
    return tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true))


def root_mean_squared_error(y_true, y_pred):
    return tf.keras.backend.sqrt(mean_squared_error(y_pred, y_true))

model = tf.keras.models.Sequential()

for i in range(0, config.layer_size):
    model.add(
        tf.keras.layers.LSTM(
            units=config.unit_size,
            return_sequences=False if i == config.layer_size - 1 else True,
            batch_input_shape=(config.max_batch_size, len(config.feature), config.timesteps),
            go_backwards=True,
            dropout=config.dropout,
            # weights=[
            #     np.array([
            #         [0.5774, 0.5774, 0.5774, 0.5774],
            #         [0.5774, 0.5774, 0.5774, 0.5774]
            #     ]),
            #     np.array([
            #         [0.5774, 0.5774, 0.5774, 0.5774]
            #     ]),
            #     np.zeros([4])
            # ]
        )
    )
else:
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=config.learning_rate),
        loss=mean_squared_error,
        run_eagerly=True
    )
    model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (1, 1)                    16        
                                                                 
Total params: 16
Trainable params: 16
Non-trainable params: 0
_________________________________________________________________


In [6]:
# from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

batch_loss = list()


history = model.fit(
    X_train,
    y_train,
    shuffle=False,
    epochs=config.max_epochs,
    verbose=1,
    batch_size=config.max_batch_size,
    # callbacks=[
    #     tf.keras.callbacks.LambdaCallback(
    #         on_epoch_begin=None,
    #         on_epoch_end=None,
    #         on_batch_begin=None,
    #         on_batch_end=lambda batch, logs=None: batch_loss.append(logs['loss']),
    #         on_train_begin=None,
    #         on_train_end=None,
    #     )
    # ]
)



In [7]:
# X_test, y_test = train_test_split(testset, time_step=config.timesteps)
# X_test = np.reshape(X_test, (X_test.shape[0], len(config.feature), X_test.shape[1]))

# results = model.evaluate(
#     X_test,
#     y_test,
#     verbose=1,
#     batch_size=config.max_batch_size,
# )

In [None]:
from pylab import rcParams

rcParams['figure.figsize'] = 14, 5

plt.plot(PREDICTIONS.loc[START_DATE_FOR_PLOTTING:].index, PREDICTIONS.loc[START_DATE_FOR_PLOTTING:][config.feature], color='orange', label='Proyeksi')
plt.plot(HISTORY.loc[START_DATE_FOR_PLOTTING:].index, HISTORY.loc[START_DATE_FOR_PLOTTING:][config.feature], color='b', label='Histori')

plt.axvline(x = min(PREDICTIONS.index), color='green', linewidth=2, linestyle='--')
plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.title('Prediksi dan Histori Curah Hujan', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Tingkat Curah Hujan', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.legend(shadow=True)
plt.show()

NameError: name 'plt' is not defined