In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense
from keras.callbacks import EarlyStopping

# Load the data from Excel
data = pd.read_excel('../data/Data_for_ML_TT-DB0.xlsx', sheet_name="Data", header=0, usecols=[1])
# data_feature = pd.read_excel('../data/Data_for_ML_TT-DB0.xlsx', header=0, usecols='A:B')
data_target = pd.read_excel('../data/Data_for_ML_TT-DB0.xlsx', header=0, usecols='C')

# Data preprocessing
# Assuming you have already preprocessed the data and included relevant features in X.
# X should be a 3D array (samples, time steps, features) for LSTM-CNN model.
# In this case, extract the features and target variable (LC)
X = data.values
y = data_target.values


# Define the look-back window (number of time steps to consider for prediction)
look_back = 12

# Convert the data into sequences
X = []
for i in range(len(data) - look_back):
    X.append(data.iloc[i:i+look_back].values)
X = np.array(X)

# Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_size = int(len(data) * 0.7)
test_size = len(data) - train_size
train_data, test_data = data[0:train_size,:], data[train_size:len(data),:]

X_train, y_train = train_data[:, :-1], train_data[:, -1]
X_test, y_test = test_data[:, :-1], test_data[:, -1]

# Reshape data to fit the LSTM-CNN model input requirements
# The LSTM-CNN model expects data in the format of (samples, time steps, features)
# Here, we assume time steps (look_back) is 1 for simplicity, as we are only using the current data point.
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Normalize the data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Build the LSTM-CNN model
model = Sequential()
# model.add(Conv1D(filters=64, kernel_size=3, activation="relu", input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Conv1D(filters=64, kernel_size=3, activation="relu", input_shape=(look_back, X_train.shape[1])))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(50, activation="relu"))
model.add(Dense(1))  # Output layer with 1 neuron for regression

# Compile the model
model.compile(loss="mean_squared_error", optimizer="adam")

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss = model.evaluate(X_test, y_test)

# Make predictions
# y_pred = model.predict(X_test)
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

# Inverse transform predictions and actual values to their original scale
# y_pred_original = scaler.inverse_transform(y_pred).flatten()
# y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
train_predictions_original = scaler.inverse_transform(X_train).flatten()
test_predictions_original = scaler.inverse_transform(X_test.reshape(-1, 1)).flatten()

# Print the predictions
print("Training predictions:", train_predictions_original)
print("Testing predictions:", test_predictions_original)

# Calculate the accuracy metrics (e.g., Mean Squared Error, Mean Absolute Error, R-squared)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# mse = mean_squared_error(y_test_original, y_pred_original)
# mae = mean_absolute_error(y_test_original, y_pred_original)
# r2 = r2_score(y_test_original, y_pred_original)
mse = mean_squared_error(test_predictions_original, train_predictions_original)
mae = mean_absolute_error(test_predictions_original, train_predictions_original)
r2 = r2_score(test_predictions_original, train_predictions_original)


print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)


InvalidIndexError: (slice(0, 339, None), slice(None, None, None))