In [1]:
import numpy as np
import pandas as pd
from pyhht import EMD
from sklearn.cluster import DBSCAN
from scipy.interpolate import lagrange
from pyentrp import entropy

# Read the data from CSV file
newData = pd.read_csv("Office_Garman.csv", index_col="timestamp", parse_dates=True)
# Remove duplicate entries
newData = newData.drop_duplicates()
# Set the frequency to hourly
newData = newData.asfreq('H')
# Fill missing values using forward fill method
newData = newData.fillna(method='ffill')

# Define the split date
split_date = '2015-09-30'

# Split the data into training and testing sets
train = newData.loc[:split_date]
test = newData.loc[split_date:]

# Perform CEEMDAN decomposition
emd = EMD()
components = emd.emd(newData['Office_Garman'].values)

# Apply DBSCAN detection
dbscan = DBSCAN(eps=0.5, min_samples=5)
labels = dbscan.fit_predict(components.T)

# Lagrange interpolation to fill missing values
def lagrange_interpolation(data):
    missing_indexes = np.isnan(data)
    missing_values = np.where(missing_indexes)[0]
    known_values = np.where(~missing_indexes)[0]
    interpolated_values = lagrange(known_values, data[~missing_indexes])(missing_values)
    data[missing_indexes] = interpolated_values
    return data

interpolated_components = np.apply_along_axis(lagrange_interpolation, axis=1, arr=components)

# Calculate entropy values using FuzzyEn algorithm
entropy_values = [entropy.fuzzy_entropy(component, 2, 1) for component in interpolated_components]

# Split components into high-frequency and low-frequency components
high_freq_components = interpolated_components[np.where(labels == -1)]
low_freq_components = interpolated_components[np.where(labels != -1)]

# Calculate sum of equal weights for each timestamp to obtain actual energy consumption
actual_energy_consumption = np.sum(components, axis=0)

# Print or use the high-frequency and low-frequency components, entropy values, and actual energy consumption as needed


ModuleNotFoundError: No module named 'pyhht'