In [1]:
import pandas as pd
import numpy as np
from pyhht.emd import EMD
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Conv1D, Flatten, Dense, Attention
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

# Read the data from CSV file
data = pd.read_csv("Office_Garman.csv", index_col="timestamp", parse_dates=True)

# Remove duplicate entries
data = data.drop_duplicates()

# Set the frequency to hourly
data = data.asfreq('H')

# Fill missing values using forward fill method
data = data.fillna(method='ffill')

# Z-score normalization
scaler = StandardScaler()
normalized_data = scaler.fit_transform(data.values)

# Perform CEEMDAN decomposition on the data
emd = EMD()
imfs = emd(normalized_data[:, 0])  # Replace 0 with the appropriate column index for decomposition

# Plotting Decomposed waveform and Fuzzy Entropy of each IMF component
fig, axs = plt.subplots(len(imfs)+1, figsize=(10, 12))
axs[0].plot(normalized_data[:, 0], label='Original Data')
axs[0].set_ylabel('Amplitude')
axs[0].set_title('Decomposed Waveform')
for i, imf in enumerate(imfs):
    axs[i+1].plot(imf, label=f'IMF {i+1}')
    axs[i+1].set_ylabel('Amplitude')
    axs[i+1].set_title(f'IMF {i+1} Fuzzy Entropy')
axs[-1].set_xlabel('Time')
plt.tight_layout()
plt.show()

# Clustering using DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
clusters = dbscan.fit_predict(imfs.T)

# Plotting DBSCAN Clustering Effect
plt.figure(figsize=(10, 6))
plt.scatter(imfs[:, 0], imfs[:, 1], c=clusters)
plt.xlabel('IMF 1')
plt.ylabel('IMF 2')
plt.title('DBSCAN Clustering Effect')
plt.show()

# High-frequency component prediction with Random Forest (RF)
rf_model = RandomForestRegressor()
rf_model.fit(imfs[:, :n], imfs[:, n])  # Replace n with the appropriate column index of high-frequency components
train_rf_pred = rf_model.predict(imfs[:, :n])

# Low-frequency component prediction with GRU and self-attention
gru_model = Sequential()
gru_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(m, 1)))  # Replace m with the appropriate window size
gru_model.add(GRU(units=128, return_sequences=True))
gru_model.add(Attention())
gru_model.add(Flatten())
gru_model.add(Dense(units=1))
gru_model.compile(optimizer='adam', loss='mean_squared_error')
gru_model.fit(imfs[:, n:], imfs[:, :n], epochs=10, batch_size=32)
train_gru_pred = gru_model.predict(imfs[:, n:])

# Combine predictions from RF and GRU models
train_pred = train_rf_pred + train_gru_pred.flatten()

# Plotting Prediction waveform
plt.figure(figsize=(10, 6))
plt.plot(train_pred, label='Predicted')
plt.plot(imfs[:, 0], label='Actual')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.title('Prediction Waveform')
plt.legend()
plt.show()

# Calculate RMSE for each IMF component
train_rmse = np.sqrt(mean_squared_error(imfs[:, 0], train_pred))

# Plotting RMSE for each IMF component
plt.figure(figsize=(10, 6))
plt.plot(train_rmse, marker='o')
plt.xlabel('IMF Component')
plt.ylabel('RMSE')
plt.title('RMSE for Each IMF Component')
plt.xticks(np.arange(0, len(imfs)), np.arange(1, len(imfs)+1))
plt.show()

def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

# Calculate MAPE for each IMF component
train_mape = calculate_mape(imfs[:, 0], train_pred)

# Plotting MAPE for each IMF component
plt.figure(figsize=(10, 6))
plt.plot(train_mape, marker='o')
plt.xlabel('IMF Component')
plt.ylabel('MAPE')
plt.title('MAPE for Each IMF Component')
plt.xticks(np.arange(0, len(imfs)), np.arange(1, len(imfs)+1))
plt.show()


ModuleNotFoundError: No module named 'tensorflow'