In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.graph_objs as go
import plotly.offline as py

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU, Dropout
from keras.optimizers import RMSprop,Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.metrics import RootMeanSquaredError

import warnings

warnings.filterwarnings("ignore")

In [None]:
TRAIN_DIR = '/kaggle/input/praktikum-2-rnn-if4074-2023/train_LTC.csv'
TEST_DIR = '/kaggle/input/praktikum-2-rnn-if4074-2023/test_LTC.csv'

In [None]:
df_train = pd.read_csv(TRAIN_DIR, 
                 infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'])

In [None]:
df_test = pd.read_csv(TEST_DIR, 
                 infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'])

In [None]:
df_train.info()

In [None]:
df_train.shape

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
traces = []

# Loop through each column to create a scatter trace
for col in df_train.columns:
    # Skip Date column or other non-numeric columns if any
    if col != 'Date':  
        trace = go.Scatter(x=df_train.index, y=df_train[col], name=col)
        traces.append(trace)

# Plot all the traces
py.iplot(traces)

In [None]:
# Drop the Date column
df_train = df_train.drop("Date", axis=1)

# Normalize data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_train)

In [None]:
scaled_data

In [None]:
LOOKBACK = 5
def create_dataset(dataset, lookback):
    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(dataset[i-lookback:i])
        y.append(dataset[i, 0:4])
        
    return np.array(X), np.array(y)

X, y = create_dataset(scaled_data, LOOKBACK)

# Main & Alternative Models

Kami mencoba tiga arsitektur berupa:
1. LSTM
2. GRU
3. LSTM with dropout

Dari hasil analisis, didapat bahwa dropout dapat membantu mencegah overfitting. Namun, ia dapat memperlambat konvergensi saat pelatihan. Di sisi lain, GRU relatif lebih cepat saat training dibandingkan LSTM, tetapi tidak seefektif LSTM dalam hal mengingat long-term memory.

Adapun kami juga membuat model yang teroptimisasi di bagian paling akhir.

In [None]:
def build_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape, return_sequences=True))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(4))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(name='rmse')])
    return model

def build_model_gru(input_shape):
    model = Sequential()
    model.add(GRU(units=50, return_sequences=True, input_shape=input_shape, activation='tanh'))
    model.add(GRU(units=50, return_sequences=True, activation='tanh'))
    model.add(Dense(units=25))
    model.add(Dense(units=4))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(name='rmse')])
    return model
    
def build_model_dropout(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(25))
    model.add(Dense(4))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(name='rmse')])
    return model

## Using Base Model to Create 5 Time Series

Pada eksperimen ini, kami mencoba 5 time series berupa 1, 5, 10, 20, dan 50. Adapun dari lima time series tersebut, yang menghasilkan loss paling minim adalah 20 time series.

In [None]:
X_1, y_1 = create_dataset(scaled_data, 1)
X_5, y_5 = create_dataset(scaled_data, 5)
X_10, y_10 = create_dataset(scaled_data, 10)
X_20, y_20 = create_dataset(scaled_data, 20)
X_50, y_50 = create_dataset(scaled_data, 50)

In [None]:
base_model_1 = build_model((X_1.shape[1], X_1.shape[2]))
base_model_5 = build_model((X_5.shape[1], X_5.shape[2]))
base_model_10 = build_model((X_10.shape[1], X_10.shape[2]))
base_model_20 = build_model((X_20.shape[1], X_20.shape[2]))
base_model_50 = build_model((X_50.shape[1], X_50.shape[2]))

In [None]:
history_1 = base_model_1.fit(X_1, y_1, batch_size=1, epochs=10)
history_5 = base_model_5.fit(X_5, y_5, batch_size=1, epochs=10)
history_10 = base_model_10.fit(X_10, y_10, batch_size=1, epochs=10)
history_20 = base_model_20.fit(X_20, y_20, batch_size=1, epochs=10)
history_50 = base_model_50.fit(X_50, y_50, batch_size=1, epochs=10)

In [None]:
def create_prediction(model, lookback):
    last_known_values = df_train.iloc[-lookback:].values 

    predicted_values_list = []

    for date in df_test['Date']:
        scaled_input = scaler.transform(last_known_values)

        predicted = model.predict(np.array([scaled_input]))

        original_predicted = scaler.inverse_transform(predicted)

        predicted_values_list.append(original_predicted[0])

        last_known_values = np.vstack((last_known_values[1:], original_predicted))
        
    predicted_df = pd.DataFrame(predicted_values_list, columns=['Open', 'High', 'Low', 'Close'])
    
    predicted_df['Date'] = df_test['Date'].values
    
    return predicted_df[['Date', 'Open', 'High', 'Low', 'Close']] 

In [None]:
df_predicted_20 = create_prediction(base_model_20, 20)

In [None]:
df_predicted_20

In [None]:
gru_model = build_model_gru((X_20.shape[1], X_20.shape[2]))

In [None]:
gru_history = gru_model.fit(X_20, y_20, batch_size=1, epochs=10)

In [None]:
dropout_model = build_model_dropout((X_20.shape[1], X_20.shape[2]))

In [None]:
dropout_history = dropout_model.fit(X_20, y_20, batch_size=1, epochs=10)

In [None]:
def build_model_optimized(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(Dense(50))
    model.add(Dense(4))
    model.compile(optimizer='adam', loss='mean_sq
    model.add(LSTM(50, return_sequences=False))
    model.add(Dropout(0.2))uared_error', metrics=[RootMeanSquaredError(name='rmse')])
    return model

optimized = build_model_optimized((X_20.shape[1], X_20.shape[2]))

In [None]:
optimized.fit(X_20, y_20, batch_size=10, epochs=50)

In [None]:
optimized_predicted = create_prediction(optimized, 20)

In [None]:
optimized_predicted.to_csv('/kaggle/working/submission.csv', index=False)