In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import psutil
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

np.random.seed(42)

# Load the data
data = pd.read_csv('C:/Users/Storm/Downloads/SN_m_tot_V2.0.csv', delimiter=';', header=None)
data.columns = ['Year', 'Month', 'Date', 'Monthly Mean Total Sunspot Number', 'Uncertainty', 'Observations',
                'Definitive/Provisional']

# Select the 'Monthly Mean Total Sunspot Number' column as the target variable
target = data['Monthly Mean Total Sunspot Number'].values.reshape(-1, 1)

# Normalize the target variable
scaler = StandardScaler()
target_scaled = scaler.fit_transform(target)

# Define number of folds
num_folds = 5

# Compute fold size
fold_size = len(target_scaled) // num_folds

# Initialize lists to store evaluation metrics and computation calculations for each fold
eval_metrics = []
execution_times = []
cpu_usages = []
memory_usages = []

# Initialize variables for best fold and metric
best_fold = 0
best_metric = float('inf')

# Perform k-fold cross-validation
for fold in range(1, num_folds + 1):
    # Define the training and validation data for the current fold
    start_index = (fold - 1) * fold_size
    end_index = fold * fold_size
    val_data = target_scaled[start_index:end_index]
    train_data = np.concatenate((target_scaled[:start_index], target_scaled[end_index:]))

    def calculate_smape(y_true, y_pred):
        """
        Calculate SMAPE (Symmetric Mean Absolute Percentage Error) between two arrays.
        """
        smape = np.mean(np.abs(y_pred - y_true) / ((np.abs(y_pred) + np.abs(y_true)) / 2)) * 100
        return smape

    def train_evaluate_model(train_data, val_data):
        X_train, y_train = train_data[:-1], train_data[1:]
        X_val, y_val = val_data[:-1], val_data[1:]

        # Define and train the Stacked LSTM model
        model = Sequential()
        model.add(LSTM(64, return_sequences=True, input_shape=(1, 1)))
        model.add(LSTM(64))
        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam')
        model.fit(X_train, y_train, epochs=50, verbose=0)

        # Make predictions
        train_predictions = model.predict(X_train)
        val_predictions = model.predict(X_val)

        # Invert the scaling
        train_predictions = scaler.inverse_transform(train_predictions)
        y_train = scaler.inverse_transform(y_train)
        val_predictions = scaler.inverse_transform(val_predictions)
        y_val = scaler.inverse_transform(y_val)

        # Calculate evaluation metrics
        train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
        val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
        train_mae = mean_absolute_error(y_train, train_predictions)
        val_mae = mean_absolute_error(y_val, val_predictions)
        train_r2 = r2_score(y_train, train_predictions)
        val_r2 = r2_score(y_val, val_predictions)
        train_smape = calculate_smape(y_train, train_predictions)
        val_smape = calculate_smape(y_val, val_predictions)

        # Print evaluation metrics for the current fold
        print(f"\nFold {fold}:")
        print(f"Train RMSE: {train_rmse:.5f}")
        print(f"Validation RMSE: {val_rmse:.5f}")
        print(f"Train MAE: {train_mae:.5f}")
        print(f"Validation MAE: {val_mae:.5f}")
        print(f"Train R2: {train_r2:.5f}")
        print(f"Validation R2: {val_r2:.5f}")
        print(f"Train SMAPE: {train_smape:.5f}")
        print(f"Validation SMAPE: {val_smape:.5f}")

        # Store evaluation metrics for the current fold
        eval_metrics.append((train_rmse, val_rmse, train_mae, val_mae, train_r2, val_r2, train_smape, val_smape))

        return train_rmse, val_rmse, y_train, train_predictions, y_val, val_predictions

    # Train and evaluate the Stacked LSTM model
    start_time = time.time()
    train_rmse, val_rmse, y_train, train_predictions, y_val, val_predictions = train_evaluate_model(train_data, val_data)
    computation_time = time.time() - start_time

    # Measure CPU usage during model training
    cpu_usage = psutil.cpu_percent()

    # Record computation calculations
    execution_times.append(computation_time)
    cpu_usages.append(cpu_usage)
    memory_usages.append(psutil.virtual_memory().used / 1024 / 1024)  # Memory usage in MB

    # Check if current fold has the best metric
    if val_rmse < best_metric:
        best_metric = val_rmse
        best_fold = fold

    # Print computation calculations for the current fold
    print(f"\nComputation calculations for Fold {fold}:")
    print(f"Execution Time: {computation_time:.2f} seconds")
    print(f"CPU Usage: {cpu_usage}%")
    print(f"Memory Used: {psutil.virtual_memory().used / 1024 / 1024:.2f} MB")

# Print the evaluation metrics for the best fold
best_train_rmse, best_val_rmse, best_train_mae, best_val_mae, best_train_r2, best_val_r2, best_train_smape, best_val_smape = eval_metrics[best_fold - 1]
print(f"\nBest Fold: {best_fold}")
print(f"Best Train RMSE: {best_train_rmse:.5f}")
print(f"Best Validation RMSE: {best_val_rmse:.5f}")
print(f"Best Train MAE: {best_train_mae:.5f}")
print(f"Best Validation MAE: {best_val_mae:.5f}")
print(f"Best Train R2: {best_train_r2:.5f}")
print(f"Best Validation R2: {best_val_r2:.5f}")
print(f"Best Train SMAPE: {best_train_smape:.5f}")
print(f"Best Validation SMAPE: {best_val_smape:.5f}")

# Plot computation calculations for each fold
plt.figure(figsize=(12, 6))
plt.suptitle('Stacked LSTM Model Evaluation', fontsize=16, fontweight='bold')

plt.subplot(231)
plt.plot(range(1, num_folds + 1), execution_times, 'o-')
plt.plot(best_fold, execution_times[best_fold - 1], 'ro')
plt.title('Execution Time for Each Fold')
plt.xlabel('Fold')
plt.ylabel('Time (seconds)')

plt.subplot(232)
plt.plot(range(1, num_folds + 1), cpu_usages, 'o-')
plt.plot(best_fold, cpu_usages[best_fold - 1], 'ro')
plt.title('CPU Usage for Each Fold')
plt.xlabel('Fold')
plt.ylabel('CPU Usage (%)')

plt.subplot(233)
plt.plot(range(1, num_folds + 1), memory_usages, 'o-')
plt.plot(best_fold, memory_usages[best_fold - 1], 'ro')
plt.title('Memory Usage for Each Fold')
plt.xlabel('Fold')
plt.ylabel('Memory Used (MB)')

plt.tight_layout()
plt.show()
