In [None]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
from numpy import ndarray
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
import statsmodels
import tensorflow as tf

In [None]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\electricity\data\electricity\hourly_electricity.csv'

In [None]:
raw_data: DataFrame = pd.read_csv(file_path, index_col=0)

In [None]:
raw_data.head()

In [None]:
raw_data['date'] = raw_data['date'].astype('datetime64[s]')
raw_data.sort_values(by='date', ascending=True, inplace=True)

In [None]:
# sns.lineplot(data=raw_data, x='date', y='power_usage', hue='id')

Plot a single point of consumption

In [None]:
mt_001 = raw_data[raw_data['id']=='MT_001']
fig = plt.figure(figsize=(12,10))
plt.plot(mt_001['date'], mt_001['power_usage'])
plt.title('MT_001 Power usage')
plt.xlabel('Time')
plt.ylabel('Power usage[MW]')
plt.show()

DECOMPOSING TIME-SERIES INTO TREND AND SEASONALITY

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose, DecomposeResult

In [None]:
mt_001 = mt_001[['date', 'power_usage']]
mt_001.set_index('date', drop=True, inplace=True)
decomposition: DecomposeResult = seasonal_decompose(mt_001)

In [None]:
trend: Series = decomposition.trend
seasonal: Series = decomposition.seasonal
residual: Series = decomposition.resid

In [None]:
fig, ax = plt.subplots(figsize=(12,10), nrows=4)
mt_001.plot(kind='line', ax=ax[0])
trend.plot(kind='line', colormap='RdBu', ax=ax[1])
seasonal.plot(kind='line', colormap='Dark2', ax=ax[2])
residual.plot(kind='line', colormap='Pastel2', ax=ax[3])
ax[0].set_title('Electricity comsumption for MT_001 decomposed into Trend and Seasonality')
ax[0].set_ylabel('Original', fontsize=14)
ax[1].set_ylabel('Trend', fontsize=14)
ax[2].set_ylabel('Seasonal', fontsize=14)
ax[3].set_ylabel('Residual', fontsize=14)
plt.tight_layout()
plt.show()

# FOURIER TRANSFORM

In [None]:
def plot_fourier(data: DataFrame, user: str, param: str):
    df = data[data['id']==user]
    fft = tf.signal.rfft(df[param])
    f_per_dataset = np.arange(0, len(fft))

    n_samples_h = len(df[param])
    hours_per_year = 24*365.2524
    years_per_dataset = n_samples_h/(hours_per_year)

    f_per_year = f_per_dataset/years_per_dataset
    plt.step(f_per_year, np.abs(fft))
    plt.xscale('log')
    plt.ylim(0, 60000)
    plt.xlim([0.1, max(plt.xlim())])
    plt.xticks([1, 365.2524], labels=['1/Year', '1/day'])
    _ = plt.xlabel('Frequency (log scale)')

In [None]:
plot_fourier(raw_data, 'MT_300', 'power_usage')

# AUTOCORRELATION

In [None]:
def autocorr(x: ndarray) -> ndarray:
    result: ndarray = np.correlate(x, x, mode='full')
    return result[len(result)//2:]

In [None]:
def get_autocorr_values(y_values: ndarray, T: float, N: int, f_s: float) -> (ndarray, ndarray):
    autocorr_values: ndarray = autocorr(y_values)
    x_values: ndarray = np.array([T * jj for jj in range(0, N)])
    return x_values, autocorr_values

In [None]:
mt_001['power_usage'].shape

In [None]:
t_n = 3600
N = 6000
T = t_n / N
f_s = 1/T

In [None]:
t_values, autocorr_values = get_autocorr_values(mt_001['power_usage'].values, T, N, f_s)

In [None]:
plt.plot(t_values, autocorr_values, linestyle='-', color='blue')
plt.xlabel('time delay [s]')
plt.ylabel('Autocorrelation amplitude')
plt.show()