In [13]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

In [14]:
import sys
sys.path.append("../../../")

In [15]:
from src.pulsed_power_ml.model_framework.training_data_labelling import trainingdata_switch_detector
from src.pulsed_power_ml.model_framework.data_io import read_training_files
from src.pulsed_power_ml.models.gupta_model.gupta_utils import read_parameters
from src.pulsed_power_ml.model_framework.training_data_labelling import get_switch_features
from src.pulsed_power_ml.model_framework.training_data_labelling import make_labeled_training_data
from src.pulsed_power_ml.model_framework.training_data_labelling import explode_to_complete_label_vector

In [16]:
RAW_DATA_BASE_FOLDER = "../../../../data/raw_data/2022-11-16_training_data/"
TRAINING_DATA_BASE_FOLDER = "../../../../data/training_data/one_class_per_appliance_training_data/"
APPARENT_POWER_BINARY_PREFIX = "FFTApparentPower_"
APPARENT_POWER_BINARY_POSTFIX = "OffOn_FFTSize131072"
PARAMETER_FILE = "../models/gupta_model/parameters_appliance_types.yml"
OUTPUT_PATH = "../../assets/pulsed_power_ml/model_evaluation/gupta_model/model_optimization_intermediate_results/"

In [17]:
features = np.loadtxt(f"{TRAINING_DATA_BASE_FOLDER}/100_0_split/features.csv", delimiter=",")
labels = np.loadtxt(f"{TRAINING_DATA_BASE_FOLDER}/100_0_split/labels.csv", delimiter=",")
nice_labels = np.argmax(labels, axis=1)

In [18]:
pca = PCA(n_components=2)
features_pca = pca.fit_transform(X=features)

# Make plot

In [19]:
cmap = mpl.colormaps["tab20"]

In [20]:
label_map = {
  0: "E ON",
  1: "F ON ",
  2: "FP ON",
  3: "H ON",
  4: "L ON",
  5: "PS ON",
  6: "R ON",
  7: "E OFF",
  8: "F OFF ",
  9: "FP OFF",
  10: "H OFF",
  11: "L OFF",
  12: "PS OFF",
  13: "R OFF"
}

In [25]:
fig = plt.Figure(figsize=(8, 4.5))
ax = fig.add_subplot()
for data_point, label in zip(features_pca, nice_labels):
    if label <= 6:
        color_id = label * 2
    else:
        color_id = (label % 7) * 2 + 1
    color = cmap(color_id)
    label_txt = label_map[label]
    ax.scatter(data_point[0],
               data_point[1],
               color=color,
               label=label_txt,
               marker='.')

ax.grid(True)
ax.legend()
fig.savefig(f"{OUTPUT_PATH}/unscaled_features_pca_appliance_type.pdf")

In [27]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)
pca = PCA(n_components=2)
scaled_features_pca = pca.fit_transform(scaled_features)


fig = plt.Figure(figsize=(16, 9), tight_layout=True)
ax = fig.add_subplot()
# for data_point, label in zip(scaled_features_pca, nice_labels):
#     if label <= 6:
#         color_id = label * 2
#     else:
#         color_id = (label % 7) * 2 + 1
#     color = cmap(color_id)
#     label_txt = label_map[label]
#     ax.scatter(data_point[0],
#                data_point[1],
#                color=color,
#                label=label_txt)

for label_id, label_txt in label_map.items():
    if label_id <= 6:
        color_id = label_id * 2
    else:
        color_id = (label_id % 7) * 2 + 1
    color = cmap(color_id)

    ax.scatter(
        x=scaled_features_pca[nice_labels==label_id, 0],
        y=scaled_features_pca[nice_labels==label_id, 1],
        color=color,
        label=label_txt,
        marker='x'
    )

ax.grid(True)
ax.legend(scatterpoints=1, bbox_to_anchor=[1.05, 1])
ax.set_title("PCA - Per Appliance Type")
fig.savefig(f"{OUTPUT_PATH}/scaled_features_pca_appliance_type.pdf")