In [11]:
import numpy as np
import pandas as pd
from scipy.interpolate import lagrange
from pyentrp import entropy
from statsmodels.tsa.seasonal import seasonal_decompose
from tqdm import tqdm

# Read the data from CSV file
newData = pd.read_csv("Office_Garman.csv", index_col="timestamp", parse_dates=True)
# Remove duplicate entries
newData = newData.drop_duplicates()
# Set the frequency to hourly
newData = newData.asfreq('H')
# Fill missing values using forward fill method
newData = newData.fillna(method='ffill')

# Define the split date
split_date = '2015-09-30'

# Split the data into training and testing sets
train = newData.loc[:split_date]
test = newData.loc[split_date:]

# Perform seasonal decomposition using EMD
decomposition = seasonal_decompose(train['Office_Garman'], model='additive')

# Access the components
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

# Lagrange interpolation to fill missing values
def lagrange_interpolation(data):
    if np.isscalar(data):
        return data
    missing_indexes = np.isnan(data)
    missing_values = np.where(missing_indexes)[0]
    known_values = np.where(~missing_indexes)[0]
    interpolated_values = lagrange(known_values, data[~missing_indexes])(missing_values)
    data[missing_indexes] = interpolated_values
    return data

# Apply Lagrange interpolation to trend with a progress bar
interpolated_trend = []
for value in tqdm(trend, desc='Interpolating Trend'):
    interpolated_trend.append(lagrange_interpolation(value))
interpolated_trend = pd.Series(interpolated_trend, index=trend.index)

# Apply Lagrange interpolation to seasonal and residual components
interpolated_seasonal = seasonal.progress_apply(lagrange_interpolation)
interpolated_residual = residual.progress_apply(lagrange_interpolation)

# Calculate sample entropy using pyentrp's sample_entropy function
trend_entropy = entropy.sample_entropy(interpolated_trend, sample_length=2)
seasonal_entropy = entropy.sample_entropy(interpolated_seasonal, sample_length=2)
residual_entropy = entropy.sample_entropy(interpolated_residual, sample_length=2)

# Split the components into high-frequency and low-frequency components
high_freq_components = interpolated_trend + interpolated_residual
low_freq_components = interpolated_seasonal

# Calculate sum of equal weights for each timestamp to obtain actual energy consumption
actual_energy_consumption = train['Office_Garman']

# Print or use the high-frequency and low-frequency components, entropy values, and actual energy consumption as needed



Interpolating Trend: 100%|█████████████| 7296/7296 [00:00<00:00, 2142672.03it/s][A


  0%|          | 0/7296 [00:00<?, ?it/s]

  0%|          | 0/7296 [00:00<?, ?it/s]

  sampen = -np.log(Ntemp[1:] / Ntemp[:-1])
  sampen = -np.log(Ntemp[1:] / Ntemp[:-1])
