In [1]:
!pip install ewtpy tensorflow tensorflow-docs scikit-learn pandas numpy matplotlib



In [2]:
!pip install --upgrade tensorflow



In [3]:
# Import necessary libraries
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import ewtpy


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from math import sqrt

In [4]:
!python --version
!pip list | grep PyEMD


Python 3.11.11


In [5]:
from google.colab import drive
drive.mount('/content/drive')
path = "/content/drive/My Drive/datasets/haute.csv"
df = pd.read_csv(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
#convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [7]:
# SVR Model Function
def svr_model(new_data, i, look_back, data_partition, cap):
    x = i
    data1 = new_data.loc[new_data['Month'].isin(x)]
    data1 = data1.reset_index(drop=True).dropna()
    datas = data1['P_avg']
    datas_wind = pd.DataFrame(datas)
    dfs = datas
    s = dfs.values

    datasetss2 = pd.DataFrame(s)
    datasets = datasetss2.values

    train_size = int(len(datasets) * data_partition)
    test_size = len(datasets) - train_size
    train, test = datasets[0:train_size], datasets[train_size:len(datasets)]

    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    X_train = pd.DataFrame(trainX)
    Y_train = pd.DataFrame(trainY)
    X_test = pd.DataFrame(testX)
    Y_test = pd.DataFrame(testY)

    sc_X = StandardScaler()
    sc_y = StandardScaler()

    X = sc_X.fit_transform(X_train)
    y = sc_y.fit_transform(Y_train).ravel()
    X1 = sc_X.transform(X_test)
    y1 = sc_y.transform(Y_test).ravel()

    # Set random seed for reproducibility
    np.random.seed(1234)
    tf.random.set_seed(1234)

    grid = SVR(kernel='rbf')
    grid.fit(X, y)

    y_pred_train_svr = grid.predict(X)
    y_pred_test_svr = grid.predict(X1)

    # Inverse transform predictions
    y_pred_train_svr = sc_y.inverse_transform(y_pred_train_svr.reshape(-1, 1))
    y_pred_test_svr = sc_y.inverse_transform(y_pred_test_svr.reshape(-1, 1))

    y_train = sc_y.inverse_transform(y.reshape(-1, 1))
    y_test = sc_y.inverse_transform(y1.reshape(-1, 1))

    # Convert to DataFrame if needed
    y_pred_train_svr = pd.DataFrame(y_pred_train_svr)
    y_pred_test_svr = pd.DataFrame(y_pred_test_svr)
    y_test = pd.DataFrame(y_test)

    # Summarize the fit of the model
    mape = np.mean((np.abs(y_test - y_pred_test_svr)) / cap) * 100
    rmse = sqrt(mean_squared_error(y_test, y_pred_test_svr))
    mae = metrics.mean_absolute_error(y_test, y_pred_test_svr)

    print('MAPE:', mape)
    print('RMSE:', rmse)
    print('MAE:', mae)


In [8]:
# ANN Model Function
def ann_model(new_data, i, look_back, data_partition, cap):
    data1 = new_data.loc[new_data['Month'].isin(i)].reset_index(drop=True).dropna()
    datas = data1['P_avg']
    datasets = datas.values.reshape(-1, 1)

    train_size = int(len(datasets) * data_partition)
    train, test = datasets[:train_size], datasets[train_size:]

    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    sc_X = StandardScaler()
    sc_y = StandardScaler()

    X_train = sc_X.fit_transform(trainX)
    y_train = sc_y.fit_transform(trainY.reshape(-1, 1)).ravel()
    X_test = sc_X.transform(testX)
    y_test = sc_y.transform(testY.reshape(-1, 1)).ravel()

    # Ensure data is float32 for TensorFlow
    X_train = np.array(X_train, dtype=np.float32)
    y_train = np.array(y_train, dtype=np.float32)
    X_test = np.array(X_test, dtype=np.float32)
    y_test = np.array(y_test, dtype=np.float32)

    # ✅ Keep 2D shape (no extra dimension)
    trainX1 = X_train
    testX1 = X_test

    # Set random seed for reproducibility
    np.random.seed(1234)
    tf.random.set_seed(1234)
    os.environ['PYTHONHASHSEED'] = '0'
    tf.config.experimental.enable_op_determinism()

    # ✅ Correct Input Shape for ANN
    neuron = 128
    model = Sequential()
    model.add(Dense(units=neuron, activation='relu', input_shape=(trainX1.shape[1],)))  # Fixed input shape
    model.add(Dense(1))

    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam())
    model.fit(trainX1, y_train, verbose=0)

    # Make predictions
    y_pred_train = model.predict(trainX1)
    y_pred_test = model.predict(testX1).ravel()

    # Inverse transform predictions
    y_pred_test = sc_y.inverse_transform(y_pred_test.reshape(-1, 1))
    y_test = sc_y.inverse_transform(y_test.reshape(-1, 1))

    # Summarize the fit of the model
    mape = np.mean((np.abs(y_test - y_pred_test)) / cap) * 100
    rmse = sqrt(mean_squared_error(y_test, y_pred_test))
    mae = mean_absolute_error(y_test, y_pred_test)

    print('MAPE:', mape)
    print('RMSE:', rmse)
    print('MAE:', mae)

In [9]:
# RF Model Function
def rf_model(new_data, i, look_back, data_partition, cap):
    data1 = new_data.loc[new_data['Month'].isin(i)].reset_index(drop=True).dropna()
    datas = data1['P_avg']
    datasets = datas.values.reshape(-1, 1)

    train_size = int(len(datasets) * data_partition)
    train, test = datasets[:train_size], datasets[train_size:]

    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    sc_X = StandardScaler()
    sc_y = StandardScaler()

    X_train = sc_X.fit_transform(trainX)
    y_train = sc_y.fit_transform(trainY.reshape(-1, 1)).ravel()
    X_test = sc_X.transform(testX)  # Ensure only transform() is applied
    y_test = sc_y.transform(testY.reshape(-1, 1)).ravel()

    # Ensure data is float32 for TensorFlow
    X_train = np.array(X_train, dtype=np.float32)
    y_train = np.array(y_train, dtype=np.float32)
    X_test = np.array(X_test, dtype=np.float32)
    y_test = np.array(y_test, dtype=np.float32)

    # Set random seed for reproducibility
    np.random.seed(1234)
    tf.random.set_seed(1234)
    os.environ['PYTHONHASHSEED'] = '0'

    # Define the RF model
    grid = RandomForestRegressor()
    grid.fit(X_train, y_train)

    # Make predictions
    y_pred_train_rf = grid.predict(X_train)
    y_pred_test_rf = grid.predict(X_test)

    # Inverse transform predictions
    y_pred_test_rf = sc_y.inverse_transform(y_pred_test_rf.reshape(-1, 1))
    y_test = sc_y.inverse_transform(y_test.reshape(-1, 1))

    # Summarize the fit of the model
    mape = np.mean((np.abs(y_test - y_pred_test_rf)) / cap) * 100
    rmse = sqrt(mean_squared_error(y_test, y_pred_test_rf))
    mae = mean_absolute_error(y_test, y_pred_test_rf)

    print('MAPE:', mape)
    print('RMSE:', rmse)
    print('MAE:', mae)

In [10]:
df

Unnamed: 0,Wind_turbine_name,Date_time,Ba_avg,Ba_min,Ba_max,Ba_std,Rt_avg,Rt_min,Rt_max,Rt_std,...,Pas_max,Pas_std,Wa_c_avg,Wa_c_min,Wa_c_max,Wa_c_std,Na_c_avg,Na_c_min,Na_c_max,Na_c_std
0,R80711,1/1/2017 0:00,-0.99,-0.99,-0.90,0.00,12.00,12.0,12.00,0.00,...,,,,,,,,,,
1,R80711,1/1/2017 0:10,-0.99,-0.99,-0.99,0.00,12.00,12.0,12.00,0.00,...,,,,,,,,,,
2,R80711,1/1/2017 0:20,-0.99,-0.99,-0.70,0.01,12.00,12.0,12.00,0.00,...,,,,,,,,,,
3,R80711,1/1/2017 0:30,-0.99,-0.99,-0.99,0.00,12.00,12.0,12.00,0.00,...,,,,,,,,,,
4,R80711,1/1/2017 0:40,-0.99,-0.99,-0.99,0.00,12.00,12.0,12.00,0.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53842,R80711,1/12/2018 23:20,-0.83,-0.99,0.00,0.32,13.01,13.0,13.78,0.06,...,,,,,,,,,,
53843,R80711,1/12/2018 23:30,-0.15,-0.49,0.23,0.23,13.00,13.0,13.84,0.04,...,,,,,,,,,,
53844,R80711,1/12/2018 23:40,0.03,-0.49,0.50,0.26,13.09,13.0,14.00,0.20,...,,,,,,,,,,
53845,R80711,1/12/2018 23:50,0.48,0.00,0.50,0.10,13.42,13.0,14.00,0.38,...,,,,,,,,,,


In [11]:
df['Date'] = pd.to_datetime(df['Date_time'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
new_data=df[['Month','Year','Date','P_avg']]
new_data=new_data[new_data.Year == 2017]

cap=max(new_data['P_avg'])

In [12]:
i=[1,2] #for fold-1
look_back=6
data_partition=0.5257331291956189

In [13]:
CUDA_VISIBLE_DEVICES=""

In [14]:
svr_model(new_data,i,look_back,data_partition,cap)

MAPE: 4.177064091203618
RMSE: 130.61584545659562
MAE: 85.65821372843024


In [15]:
ann_model(new_data,i,look_back,data_partition,cap)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
MAPE: 4.8182178
RMSE: 151.92779432184224
MAE: 98.80622100830078


In [16]:
rf_model(new_data,i,look_back,data_partition,cap)

MAPE: 3.994839370142407
RMSE: 134.50734290614565
MAE: 81.92136800079696


In [17]:
!pip install EMD-signal



In [18]:
from PyEMD import CEEMDAN

In [19]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
import ewtpy
from PyEMD import CEEMDAN

# Proposed Hybrid CEEMDAN-EWT LSTM Model
def proposed_method(new_data, i, look_back, data_partition, cap):
    data1 = new_data.loc[new_data['Month'].isin(i)].reset_index(drop=True).dropna()
    datas = data1['P_avg']

    # Ensure `datas` is 1D before reshaping
    print("Original shape of datas:", datas.shape)
    datasets = datas.values.reshape(-1, 1)  # Reshape to (N, 1)

    # CEEMDAN Decomposition
    emd = CEEMDAN(epsilon=0.05)
    emd.noise_seed(12345)
    IMFs = emd(datasets.flatten())
    ceemdan1 = pd.DataFrame(IMFs).T
    imf1 = ceemdan1.iloc[:, 0]

    # EWT Decomposition
    ewt, _, _ = ewtpy.EWT1D(imf1, N=3)
    df_ewt = pd.DataFrame(ewt)

    # Drop unwanted component, ensuring shape consistency
    if df_ewt.shape[1] > 2:
        df_ewt.drop(columns=[2], inplace=True)

    denoised = df_ewt.sum(axis=1)
    ceemdan_without_imf1 = ceemdan1.iloc[:, 1:]
    new_ceemdan = pd.concat([denoised, ceemdan_without_imf1], axis=1)

    # **Ensure 2D shape before converting to DataFrame**
    print(f"new_ceemdan shape before pd.DataFrame(): {new_ceemdan.shape}")
    if len(new_ceemdan.shape) == 3:
        print(f"Fixing new_ceemdan shape from {new_ceemdan.shape} to 2D")
        new_ceemdan = new_ceemdan.reshape(new_ceemdan.shape[0], -1)
    print(f"new_ceemdan shape after reshape: {new_ceemdan.shape}")

    pred_test, test_ori, pred_train, train_ori = [], [], [], []
    epoch, batch_size, lr = 100, 64, 0.001

    # Iterate over each IMF for LSTM Training
    for col in new_ceemdan:
        dataset = new_ceemdan[[col]].values

        # **Ensure dataset is 2D**
        print(f"Dataset shape before reshape: {dataset.shape}")
        if len(dataset.shape) == 3:
            print(f"Fixing dataset shape from {dataset.shape} to 2D")
            dataset = dataset.reshape(dataset.shape[0], -1)
        print(f"Dataset shape after reshape: {dataset.shape}")

        train_size = int(len(dataset) * data_partition)
        train, test = dataset[:train_size], dataset[train_size:]

        trainX, trainY = create_dataset(train, look_back)
        testX, testY = create_dataset(test, look_back)

        # Debugging Shape
        print(f"Feature: {col}, TrainX Shape: {trainX.shape}, TrainY Shape: {trainY.shape}")

        sc_X, sc_y = StandardScaler(), StandardScaler()

        # Ensure StandardScaler gets 2D input
        X_train = sc_X.fit_transform(trainX.reshape(trainX.shape[0], -1))
        y_train = sc_y.fit_transform(trainY.reshape(-1, 1)).ravel()
        X_test = sc_X.transform(testX.reshape(testX.shape[0], -1))
        y_test = sc_y.fit_transform(testY.reshape(-1, 1)).ravel()

        # Ensure LSTM input is 3D
        trainX = X_train.reshape(X_train.shape[0], look_back, -1)
        testX = X_test.reshape(X_test.shape[0], look_back, -1)

        print("Final TrainX shape for LSTM:", trainX.shape)  # Should be (samples, timesteps, features)
        print("Final TestX shape for LSTM:", testX.shape)

        # Set random seed for reproducibility
        np.random.seed(1234)
        tf.random.set_seed(1234)
        os.environ['PYTHONHASHSEED'] = '0'

        # Build and train LSTM model
        model = Sequential()
        model.add(tf.keras.layers.Input(shape=(trainX.shape[1], trainX.shape[2])))
        model.add(LSTM(units=128))
        model.add(Dense(1))

        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=lr))
        model.fit(trainX, y_train, epochs=epoch, batch_size=batch_size, verbose=0)

        # Predictions
        y_pred_train = model.predict(trainX).ravel()
        y_pred_test = model.predict(testX).ravel()

        # Inverse transform predictions
        y_pred_train1 = sc_y.inverse_transform(y_pred_train.reshape(-1, 1))
        y_pred_test1 = sc_y.inverse_transform(y_pred_test.reshape(-1, 1))
        y_train = sc_y.inverse_transform(y_train.reshape(-1, 1))
        y_test = sc_y.inverse_transform(y_test.reshape(-1, 1))

        # **Ensure predictions are 2D before appending**
        y_pred_test1 = np.array(y_pred_test1)
        y_pred_train1 = np.array(y_pred_train1)
        y_test = np.array(y_test)
        y_train = np.array(y_train)

        pred_test.append(y_pred_test1)
        test_ori.append(y_test)
        pred_train.append(y_pred_train1)
        train_ori.append(y_train)

    # **Final Fix: Use Mean Instead of Sum for IMFs**
    print(f"Final pred_test shape before DataFrame: {np.array(pred_test).shape}")
    print(f"Final pred_train shape before DataFrame: {np.array(pred_train).shape}")

    pred_test = np.array(pred_test).reshape(len(pred_test), -1)  # Convert to (IMFs, Samples)
    pred_train = np.array(pred_train).reshape(len(pred_train), -1)  # Convert to (IMFs, Samples)
    test_ori = np.array(test_ori).reshape(len(test_ori), -1)

    # Take Mean Instead of Sum Over IMFs
    result_pred_test = pd.DataFrame(pred_test.mean(axis=0))  # Fix: Using mean
    result_pred_train = pd.DataFrame(pred_train.mean(axis=0))
    y_test = pd.DataFrame(test_ori.mean(axis=0))

    print(f"Final result_pred_test shape: {result_pred_test.shape}")
    print(f"Final result_pred_train shape: {result_pred_train.shape}")
    print(f"Final y_test shape: {y_test.shape}")

    # Error Metrics Calculation
    mape = np.mean((np.abs(y_test - result_pred_test)) / cap) * 100
    rmse = sqrt(mean_squared_error(y_test, result_pred_test))
    mae = mean_absolute_error(y_test, result_pred_test)

    print('MAPE:', mape)
    print('RMSE:', rmse)
    print('MAE:', mae)


In [None]:
proposed_method(new_data,i,look_back,data_partition,cap)

Original shape of datas: (8491,)
new_ceemdan shape before pd.DataFrame(): (8491, 11)
new_ceemdan shape after reshape: (8491, 11)
Dataset shape before reshape: (8491, 1)
Dataset shape after reshape: (8491, 1)
Feature: 0, TrainX Shape: (4457, 6), TrainY Shape: (4457,)
Final TrainX shape for LSTM: (4457, 6, 1)
Final TestX shape for LSTM: (4020, 6, 1)
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Dataset shape before reshape: (8491, 1)
Dataset shape after reshape: (8491, 1)
Feature: 1, TrainX Shape: (4457, 6), TrainY Shape: (4457,)
Final TrainX shape for LSTM: (4457, 6, 1)
Final TestX shape for LSTM: (4020, 6, 1)
