In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import pandas as pd
import utm

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score, confusion_matrix, accuracy_score, mean_squared_error, classification_report
from sklearn.model_selection import train_test_split



import tensorflow as tf
import keras
from keras import optimizers
from tensorflow.keras.optimizers import RMSprop
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, TimeDistributed, Input, Activation, concatenate
from keras.callbacks import History
from keras.models import Model

# The values have around 15 decimals, so for more clarity in reading we're increasing the number of displayed values
pd.set_option('display.float_format', '{:.15f}'.format)

# 1. Importing the data

In [None]:
sensor_and_loc_file = '../data/Processed/full_sensor_data_no_interpol_and_location.csv'

In [None]:
df_sensor_and_loc = pd.read_csv(sensor_and_loc_file, index_col=0)
df_sensor_and_loc

In [None]:
df_sensor_and_loc.loc[:, 'lat'].interpolate(method='linear', inplace=True)
df_sensor_and_loc.loc[:, 'long'].interpolate(method='linear', inplace=True)
df_sensor_and_loc.dropna(inplace=True)
df_sensor_and_loc

In [None]:
data = df_sensor_and_loc.copy(deep=True)[::1]
data.shape

In [None]:
plt.plot(data['long'], data['lat'])

# 2. Creating the LSTM model

## 2.1. Scaling the data

In [None]:
backcandles=30  # number of last values to look into
attributes_cols=12
pca_components=3
num_cols_to_eliminate=attributes_cols-data.shape[1]

# Separate features (X) and target (y)
attributes = data.iloc[:, :num_cols_to_eliminate]  # Assuming the target columns are the last two columns
targets = data.iloc[backcandles:, num_cols_to_eliminate:]  # Assuming the target columns are the last two columns
attributes.columns, targets.columns

In [None]:
# Scale the data using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(attributes)

In [None]:

# Apply PCA with 3 desired components
pca = PCA(n_components=pca_components)
X_pca = pca.fit_transform(X_scaled)
X_pca.shape

In [None]:
X = []

for i in range(pca_components): #number of attributes columns
    X.append([])
    for j in range(backcandles, X_pca.shape[0]):
        X[i].append(X_pca[j-backcandles:j, i])
        
# move axis from 0 to position 2
X = np.moveaxis(X, [0], [2])

# We need shape (8581, 30, 12) = (rows, val of past attributes, columns) for LSTM training
X.shape

In [None]:
X, y = np.array(X), np.array(targets)

print('X Shape: ',X.shape)
print('y shape: ', y.shape)

In [None]:
splitlimit_test = int(len(X) * 0.7)
splitlimit_val = int(len(X) * 0.9)

print(splitlimit_test, splitlimit_val)
X_train, X_test, X_val = X[:splitlimit_test], X[splitlimit_test:splitlimit_val], X[splitlimit_val:]
y_train, y_test, y_val = y[:splitlimit_test], y[splitlimit_test:splitlimit_val], y[splitlimit_val:]
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

In [None]:
# Create the LSTM model
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(backcandles, pca_components)))  # 2 LSTM layers
model.add(Dropout(0.2))  # 2 Dropout layers
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(2))  # 1 Dense layer for output
model.compile(optimizer=RMSprop(learning_rate=0.005), loss='mse')

In [None]:
class PlotLearning(keras.callbacks.Callback):
    """
    Callback to plot the learning curves of the model during training.
    """
    def on_train_begin(self, logs={}):
        self.metrics = {}
        for metric in logs:
            self.metrics[metric] = []
            

    def on_epoch_end(self, epoch, logs={}):
        # Storing metrics
        for metric in logs:
            if metric in self.metrics:
                self.metrics[metric].append(logs.get(metric))
            else:
                self.metrics[metric] = [logs.get(metric)]
        
        # Plotting
        metrics = [x for x in logs if 'val' not in x]
        
        f, axs = plt.subplots(1, len(metrics), figsize=(15,5))
        clear_output(wait=True)

        for i, metric in enumerate(metrics):
            axs[i].plot(range(1, epoch + 2), 
                        self.metrics[metric], 
                        label=metric)
            if logs['val_' + metric]:
                axs[i].plot(range(1, epoch + 2), 
                            self.metrics['val_' + metric], 
                            label='val_' + metric)
                
            axs[i].legend()
            axs[i].grid()

        plt.tight_layout()
        plt.show()

In [None]:
batch_size=100
epochs=30

# Train the LSTM model
model.fit(X_train, 
          y_train, 
          epochs=epochs, 
          batch_size=batch_size, 
          validation_data=(X_test, y_test),
          callbacks=[PlotLearning()])

In [None]:
y_pred = model.predict(X_test)
for i in range(10):
    print(y_pred[i], y_test[i])
len(y_pred), len(y_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

y_pred, y_test

In [None]:
plt.scatter(y_test[:, 1], y_test[:, 0], color='black', label='Test')
# plt.scatter(y_pred[:, 1], y_pred[:, 0], color='blue', label='Pred')