In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, Bidirectional

# Load the dataset
data = pd.read_csv('new_data2.csv')

# Assume 'date(dd/mm/yy)' column is present and needs to be dropped along with the target separation
data = data.drop(columns=['date(dd/mm/yy)'])
X = data.drop(columns=['HYB'])
y = data['HYB']

# Function to remove outliers
def remove_outliers_iqr(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

# Remove outliers from the target column
data_cleaned = remove_outliers_iqr(data, 'HYB')
# Optionally remove outliers from feature columns
for col in X.columns:
    data_cleaned = remove_outliers_iqr(data_cleaned, col)

# Prepare features and target after cleaning
X = data_cleaned.drop(columns=['HYB']).values
y = data_cleaned['HYB'].values

# Scaling features and target
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Reshape input to be [samples, time steps, features] which is required for RNN
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define a function to create models
def create_model(model_type='simple_rnn'):
    model = Sequential()
    if model_type == 'simple_rnn':
        model.add(SimpleRNN(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    elif model_type == 'bi_rnn':
        model.add(Bidirectional(SimpleRNN(50, activation='relu'), input_shape=(X_train.shape[1], X_train.shape[2])))
    elif model_type == 'lstm':
        model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    elif model_type == 'bi_lstm':
        model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Create, train and evaluate models
models = ['simple_rnn', 'bi_rnn', 'lstm', 'bi_lstm']
for model_type in models:
    model = create_model(model_type)
    model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), verbose=0)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Results for {model_type.upper()}: MSE = {mse:.4f}, MAPE = {mape:.4f}, R^2 = {r2:.4f}")


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 270ms/step
Results for SIMPLE_RNN: MSE = 0.0686, MAPE = 0.5495, R^2 = -0.1006


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 527ms/step
Results for BI_RNN: MSE = 0.0624, MAPE = 0.5147, R^2 = -0.0009


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 334ms/step
Results for LSTM: MSE = 0.0551, MAPE = 0.4821, R^2 = 0.1156


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 706ms/step
Results for BI_LSTM: MSE = 0.0587, MAPE = 0.4909, R^2 = 0.0585
