In [125]:
from skmultilearn.adapt import MLkNN
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix
from scipy import sparse
from sklearn.metrics import f1_score, classification_report
from sklearn import metrics
import pandas as pd
import numpy as np

In [126]:
all_selected_channels_h1 = ['kitchen_outlets_7','kitchen_outlets_8','kitchen_outlets_15','kitchen_outlets_16'
                            ,'lighting_9','lighting_17','lighting_18','electric_oven_3','electric_oven_4','microwave',
                            'washer_dryer_10','washer_dryer_20', 'refrigerator', 'bathroom_gfi']
selected_channels_h1     = ['kitchen_outlets_7', 'lighting_18', 'washer_dryer_20', 'electric_oven_3', 'microwave', 
                            'refrigerator', 'bathroom_gfi']
all_selected_channels_h3 = ['kitchen_outlets_22','furnace','washer_dryer_13', 'microwave', 'bathroom_gfi']

selected_channels_h3     = ['kitchen_outlets_22','furnace','washer_dryer_13', 'microwave', 'bathroom_gfi']

In [127]:
df = pd.read_csv('house3-nosampling/h3_5min.csv',index_col = 0)

In [128]:
X = df['main'].copy()
y = df[selected_channels_h3].copy()
target_name = selected_channels_h3
window = 100
dimension = 6
delay_in_second = 32
sample = 3

In [129]:
def windowing(narray, window):
    rem = len(narray) % window
    if rem > 0:
        narray = narray[:-rem]
    n_dims = len(narray.shape)
    if n_dims == 1:
        seq_in_batches = np.reshape(narray, (int(len(narray) / window), window))
    elif n_dims == 2:
        seq_in_batches = np.reshape(narray, (int(len(narray) / window), window, narray.shape[1]))
    print(seq_in_batches)
    return seq_in_batches

In [130]:
def reduce_dimensions(data_in_batches: np.ndarray, window: int, sample_period: int,
                      dimension: int, delay_in_seconds: int, should_fit: bool = True):
    """
    It uses the method approximate of the TimeSeriesTransformer in order to achieve dimensionality reduction.
    Args:
        data_in_batches (ndarray): The data of the time series separated in batches.
        window (int): The size of the sub-segments of the given time series.
            This is not supported by all algorithms.
        target (ndarray): The labels that correspond to the given data in batches.
        should_fit (bool): True if it is supported by the algorithm of the specified time series representation.
    Returns:
        The shortened time series as an array (ndarray).
    """
    squeezed_seq = approximate(delay_in_seconds, dimension, sample_period = sample_period, 
                               series_in_segments = data_in_batches, window = window, should_fit = True)

    print('Shape of squeezed seq: {}'.format(squeezed_seq.shape))
    if len(squeezed_seq.shape) == 3:
        squeezed_seq = np.reshape(squeezed_seq, (squeezed_seq.shape[0], squeezed_seq.shape[1] * squeezed_seq.shape[2]))
    return squeezed_seq

In [131]:
def approximate(delay_in_seconds: int, dimension: int, sample_period: int, series_in_segments: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray:
    """
    The time series is given as segments. For each segment we extract the delay embeddings.
    """
    delay_items = int(delay_in_seconds / sample_period)
    window_size = delay_items * dimension

    if window_size > len(series_in_segments[0]):
        raise Exception(
            f'Not enough data for the given delay ({delay_in_seconds} seconds) and dimension ({dimension}).'
            f'\ndelay_items * dimension > len(data): {window_size} > {len(series_in_segments[0])}')

    if window_size == len(series_in_segments[0]):
        print(f"TimeDelayEmbeddingAdapter is applied with delay embeddings equavalent to the length of each segment"
                f" {window_size} == {len(series_in_segments[0])}")

    if window_size < len(series_in_segments[0]):
        print(f"TimeDelayEmbeddingAdapter is applied with delay embeddings covering less than the length of each "
                f"segment. {window_size} < {len(series_in_segments[0])}")

    delay_embeddings = []
    for segment in series_in_segments:
        embedding = takens_embedding(segment, delay_items, dimension)
        delay_embeddings.append(embedding)
    return np.asarray(delay_embeddings)

In [132]:
def takens_embedding(series: np.ndarray, delay, dimension) -> np.ndarray:
    """
    This function returns the Takens embedding of data with delay into dimension,
    delay*dimension must be < len(data)
    """
    if delay * dimension > len(series):
        print(f'Not enough data for the given delay ({delay}) and dimension ({dimension}).'
             f'\ndelay * dimension > len(data): {delay * dimension} > {len(series)}')
        return series
    delay_embedding = np.array([series[0:len(series) - delay * dimension]])
    for i in range(1, dimension):
        delay_embedding = np.append(delay_embedding,
                                    [series[i * delay:len(series) - delay * (dimension - i)]], axis=0)
    return delay_embedding

In [133]:
X = np.asarray(X)
y = np.asarray(y)

In [134]:
X = windowing(X, window)
y = windowing(y, window)
#y = y.reshape(y.shape[0]*y.shape[1],y.shape[2])

[[181.34 180.74 182.   ... 187.97 182.53 181.96]
 [181.   180.86 180.35 ... 181.82 182.41 181.91]
 [194.41 199.73 193.31 ... 181.89 180.91 180.9 ]
 ...
 [ 15.45  15.44  15.48 ...  15.56  15.49  15.55]
 [ 15.43  15.49  15.46 ...  15.6   15.59  15.55]
 [ 15.48  15.48  15.49 ...  15.54  15.55  15.48]]
[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 ...

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  

In [135]:
X = reduce_dimensions(X, window, sample, dimension, delay_in_second)

TimeDelayEmbeddingAdapter is applied with delay embeddings covering less than the length of each segment. 60 < 100
Shape of squeezed seq: (37601, 6, 40)


In [136]:
a = np.zeros(shape=(y.shape[0],y.shape[2] ))
# Process y, if the appliance was on once or more than once in the window then the label of the whole window is 1, else 0 
for i in range(len(y)):
    a[i] = y[i].sum(axis = 0)

Y = np.zeros(shape=(y.shape[0],y.shape[2] ))

for i in range(len(a)):
    Y[i] += [1 if a[i][j] != 0 else 0 for j in range(len(a[i]))]
Y = Y.astype(int)

In [137]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42,shuffle=False)

In [138]:
classifier = MLkNN(k=3)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)
predict_proba = classifier.predict(X_test)

In [139]:
y_pred_csr = sparse.csr_matrix(predictions)

In [140]:
micro = f1_score(y_test, predictions, average='micro')
macro = f1_score(y_test, predictions, average='macro')
#info('MlkNN report')
print('F1 macro {}'.format(macro))
print('F1 micro {}'.format(micro))
report = classification_report(y_test, predictions, target_names=target_name, output_dict=True)
print(report)

F1 macro 0.4485846435323131
F1 micro 0.6478646253021756
{'kitchen_outlets_22': {'precision': 0.14482758620689656, 'recall': 0.16216216216216217, 'f1-score': 0.15300546448087432, 'support': 259}, 'furnace': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'washer_dryer_13': {'precision': 0.989010989010989, 'recall': 0.9540636042402827, 'f1-score': 0.9712230215827339, 'support': 566}, 'microwave': {'precision': 0.4744186046511628, 'recall': 0.4657534246575342, 'f1-score': 0.4700460829493088, 'support': 219}, 'bathroom_gfi': {'precision': 0.5194805194805194, 'recall': 0.8633093525179856, 'f1-score': 0.6486486486486486, 'support': 139}, 'micro avg': {'precision': 0.6189376443418014, 'recall': 0.6796280642434489, 'f1-score': 0.6478646253021756, 'support': 1183}, 'macro avg': {'precision': 0.42554753986991367, 'recall': 0.4890577087155929, 'f1-score': 0.4485846435323131, 'support': 1183}, 'weighted avg': {'precision': 0.6537582681607801, 'recall': 0.6796280642434489, 'f1-sco

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [141]:
print('Exact Match Ratio: {0}'.format(metrics.accuracy_score(y_test, predictions, normalize=True, sample_weight=None)))
print('Hamming loss: {0}'.format(metrics.hamming_loss(y_test, predictions))) 
print('Recall: {0}'.format(metrics.precision_score(y_true=y_test, y_pred=predictions, average='samples'))) 
print('Precision: {0}'.format(metrics.recall_score(y_true=y_test, y_pred=predictions, average='samples')))
print('F1 Measure: {0}'.format(metrics.f1_score(y_true=y_test, y_pred=predictions, average='samples'))) 


Exact Match Ratio: 0.9455234104279152
Hamming loss: 0.014086550084616005
Recall: 0.05796867864721841
Precision: 0.05790152308808123
F1 Measure: 0.05663899857630214


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
