In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import DBSCAN
from scipy.interpolate import lagrange
import pywt
from entropy import sample_entropy
from tqdm import tqdm
import warnings

# Set Plotting Styles
plt.style.use('ggplot')

# Ignore all runtime warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Read the data from CSV file
newData = pd.read_csv("Office_Garman.csv", index_col="timestamp", parse_dates=True)
# Remove duplicate entries
newData = newData.drop_duplicates()
# Set the frequency to hourly
newData = newData.asfreq('H')
# Fill missing values using forward fill method
newData = newData.fillna(method='ffill')

# Visualizing Energy Consumption Data
plt.plot(newData.index, newData['Office_Garman'])
plt.title('Energy Consumption')
plt.ylabel('Energy Consumption (MW)')
plt.show()

# Define the split date
split_date = '2015-09-30'

# Split the data into training and testing sets
train = newData.loc[:split_date]
test = newData.loc[split_date:]

# DBSCAN detection
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan.fit(train['Office_Garman'].values.reshape(-1, 1))
labels = dbscan.labels_

# Lagrange interpolation
timestamps = train.index.to_julian_date()
values = train['Office_Garman'].values
interpolated_values = np.zeros_like(values)

# Progress bar
with tqdm(total=len(timestamps), desc="Interpolating") as pbar:
    for i in range(len(timestamps)):
        try:
            interpolated_values[i] = lagrange(timestamps, values)(timestamps[i])
        except (ValueError, TypeError):
            interpolated_values[i] = np.nan
        pbar.update(1)

train['Office_Garman'] = interpolated_values

# DWT decomposition
coeffs = pywt.wavedec(train['Office_Garman'].values, 'db4', level=5)

# FuzzyEn entropy calculation
entropy_values = []
with tqdm(total=len(coeffs), desc="Calculating Entropy") as pbar:
    for coeff in coeffs:
        entropy = sample_entropy(coeff)
        entropy_values.append(entropy)
        pbar.update(1)

# Divide components into high-frequency and low-frequency
high_frequency = coeffs[0]  # Approximation coefficients
low_frequency = coeffs[1:]  # Detail coefficients

# Calculate actual energy consumption value
actual_energy_consumption = pywt.waverec(high_frequency, 'db4')

# Update the code above with the specific modifications required for your dataset and algorithms.
