# Feature Engineering

In [195]:
from src.modelling import training
from sklearn.mixture import GaussianMixture as GM
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from src import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [196]:
project_root = utils.get_proj_root()
config = utils.get_config('config/config.ini')

## Hourly temprature profiles

In [198]:
preprocessed_data = training.get_training_data()
load_data  = preprocessed_data[['datetime','load']].copy()
load_data['hour'] = preprocessed_data['datetime'].dt.hour.values
load_data['date'] = preprocessed_data['datetime'].dt.date.values
# print(load_data.head())

load_data_pivoted = load_data.pivot_table(values='load', index='date', columns='hour')
# print(load_data_pivoted.isnull().sum())
X = load_data_pivoted.loc[:, 3:].values  # from 3 because some hours are null for hour 2

n_components=2
X_pca = PCA(n_components=n_components).fit_transform(X)


gmm = GM(n_components=n_components, covariance_type='full', random_state=0)

gmm.fit(X_pca)
cluster_label = gmm.predict(X_pca)

print(len(load_data_pivoted), len(cluster_label))
load_data_pivoted['cluster'] = cluster_label
# print(load_data_pivoted.head())
temp_df = load_data.join(load_data_pivoted['cluster'], on='date')
# print(temp_df.head())
temp_df = temp_df.groupby(['cluster', temp_df.datetime.dt.time]).median(numeric_only=True)


def normalize(x:np.array):

    res = (x - x.mean()) / x.std()
    return res

cluster_0_profile = temp_df.loc[0]['load'].values 
cluster_1_profile = temp_df.loc[1]['load'].values
# print(temp_df)
# plt.plot(cluster_1_profile)

daily_load_profile_feature = {
    'profile_1':dict(zip(temp_df.loc[0]['hour'].values.astype(int), normalize(temp_df.loc[0]['load'].values))),
    'profile_2':dict(zip(temp_df.loc[1]['hour'].values.astype(int), normalize(temp_df.loc[1]['load'].values)))
}

file_path = project_root.joinpath(config['modelling_paths']['daily_load_profile_feature'])
utils.save_value(daily_load_profile_feature, fname=file_path)


1095 1095
