# Discriminative Sparse Coding

### import libraries

In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import time
import librosa
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import SparseCoder,DictionaryLearning
from sklearn import cluster
from sklearn.preprocessing import normalize 
# from lightning.regression import CDRegressor
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import warnings
import psutil
warnings.filterwarnings("ignore")

In [2]:
class DDSC():
    def __init__(self, train_set, train_sum, alpha, 
                 epsilon, reg_lambda, steps, n, m, T, k):
        """
        Inputs:
            train_set: dict of X_i matrix with dim T*m for each individual appliance i 
            train_sum: dataframe of X_sum aggregated matrix T*m 
            alpha: gradiant rate for the convergence step for DD (4b).
            epsilon: gradient stepsize of the pre-training (2e) ||A_t+1 - A_t||< epsilon 
            reg_lambda: reguarization weight of penalty function
            steps: interations to be performed for the convergence part
            n: number of basis functions 
            m: number of features (households)
            T: number of samples (hours)
            k: number of applicances i (1, k)
        """
        self.train_set = train_set.values()
        self.train_sum = train_sum.values
        self.alpha = alpha 
        self.epsilon = epsilon
        self.reg_lambda = reg_lambda
        self.steps = steps
        self.n = n 
        self.m = m
        self.T = T
        self.k = k
        
        # ======= Instances that can be used for plotting =====
        self.acc_nnsc = None
        self.err_nnsc = None
        self.acc_ddsc = None
        self.err_ddsc = None
        
        self.a_nnsc = None
        self.b_nnsc = None
        self.a_ddsc = None
        self.b_ddsc = None

    def _initialization(self):
        '''
        DDSC step 1
        initiualize the matrices A,B with positive values
        scale columns of B s.t b(j) = 1
        '''
        A = np.random.random((self.n,self.m)) # A: n*m
        B = np.random.random((self.T,self.n)) # B: T*n

        # scale columns s.t. b_i^(j) = 1
        B /= sum(B) 
        
        return A, B
    
    @staticmethod
    def _pos_constraint(mat):
        '''
        nnsc step 2(b)
        using only the positive values of matrix  
        input: matrix n*m 
        '''     
        indices = np.where(mat < 0.0)
        mat[indices] = 0.0
        return mat   
    
    def nnsc(self):
        '''
        Method as in NNSC from nonnegative sparse coding finland.
        from P.Hoyer

        return:
            A_list, B_list: list of A and B for each appliance i 
        '''
        
        acc_nnsc = []
        err_nnsc = []
        a_nnsc = []
        b_nnsc = []
        
        # used for F
        X_train = self.train_set # dict_value 
        A_list = []
        B_list = []
        
        for X in X_train:
            # step 1 
            A0, B0 = self._initialization() # initialization 
            Ap, Bp = A0, B0 
            Ap1, Bp1 = Ap, Bp # record previous step Ap, Bp
            t = 0
            change_A = 1.0
            while t <= self.steps and change_A >= self.epsilon:            
                Bp = Bp - self.alpha * np.dot((np.dot(Bp, Ap) - X), Ap.T) # step 2a
                Bp = self._pos_constraint(Bp) # step 2b 
#                 Bp /= sum(Bp) # step 2c 
                Bp = normalize(Bp, norm='l2', axis=0)
                
                # step 2d
                dot_part2 = np.divide(np.dot(Bp.T, X), (np.dot(np.dot(Bp.T, Bp), Ap) + self.reg_lambda)) # element wise division 
                Ap = np.multiply(Ap, dot_part2)

                change_A = np.linalg.norm(Ap - Ap1)
                change_B = np.linalg.norm(Bp - Bp1)
                Ap1, Bp1 = Ap, Bp
                t += 1
                
                if t % 10 == 0:
                    print("iter {t}：A change = {a:8.4f}".format(t=t, a=change_A))
                
            print("Gone through one appliance.\n")
            A_list.append(Ap)
            B_list.append(Bp)


        # for thesis
        acc_iter = self.accuracy(X_train, self.train_sum, B_list, A_list)
        err_iter = self.error(X_train, self.train_sum, B_list, A_list)
        acc_nnsc.append(acc_iter)
        err_nnsc.append(err_iter)
        # append norm of matrices
        a_nnsc.append(np.linalg.norm(sum(A_list)))
        b_nnsc.append(np.linalg.norm(sum(B_list)))

        self.acc_nnsc = acc_nnsc
        self.err_nnsc = err_nnsc
        self.a_nnsc = a_nnsc
        self.b_nnsc = b_nnsc
        
        return A_list, B_list

    def accuracy(self, X_train, X_sum, B, A):
        '''
        inputs:
            X_train: dict_value of list 
        
        Everything needs to be in lists of ndarrays
        of the components
        '''
        B_cat = np.hstack(B)
        A_cat = np.vstack(A)

        A_prime = self.F(X_sum, B_cat, A=A_cat)
        A_last = np.split(A_prime, self.k, axis=0)
        X_predict = self.predict(A_last, B)
        
        
        X_train = list(X_train)
        

        acc_numerator = [np.sum(np.minimum((B[i].dot(A_last[i])).sum(axis=0), (sum(X_train[i].sum(axis=0)))))
                         for i in range(len(B))]
        
        
        acc_denominator = sum(X_predict).sum()
        acc = sum(acc_numerator) / acc_denominator
        
        acc_denominator = X_sum.sum()
        acc_star = sum(acc_numerator) / acc_denominator
        return acc, acc_star

    def get_accuracy_plot(self):
        return self.acc_nnsc, self.acc_ddsc

    def get_error_plot(self):
        return self.err_nnsc, self.err_ddsc

    def get_a(self):
        return self.a_nnsc, self.a_ddsc

    def get_b(self):
        return self.b_nnsc, self.b_ddsc

    def error(self,X, X_sum, B, A):
        '''
        Error for the whole disaggregation part within list, sum the list to get
        the resulting disaggregation
        Parameters : must have x_train as x
        '''
        B_cat = np.hstack(B)
        A_cat = np.vstack(A)
        
        
        error = [(1.0/2.0) * np.linalg.norm((list(X)[i] - B[i].dot(A[i]))**2) for i in range(self.k)]
        error = sum(error)
        
        A_last_error = self.F(X_sum, B_cat,A_cat)
        
        A_last_error_list = np.split(A_last_error,self.k,axis=0)
        error_star = [(1.0/2.0) * np.linalg.norm((list(X)[i] - B[i].dot(A_last_error_list[i]))**2) for i in range(self.k)]
        error_star = sum(error_star)
        return error, error_star
        
    
    def F(self, X_sum, B, A):
        '''
        input is lists of the elements
        output list of elements
        '''
        # 4a  
        B = np.asarray(B)
        A = np.asarray(A)
        
        coder = SparseCoder(dictionary=B.T, transform_alpha=self.reg_lambda, transform_algorithm='lasso_cd')    
        # B: basis function 
        # A: activation function   
        B_hat, A_hat = librosa.decompose.decompose(X_sum, transformer=coder) 
        A_hat = self._pos_constraint(A_hat)

        return A_hat

    def DD(self, B, A):
        '''
        Taking the parameters as x_train_use and discriminate over the
        entire region
        '''
        # step 3
        A_star = np.vstack(A)
        B_cat = np.hstack(B)
        
        # step 4 
        change_B = 1 
        t = 0
        
        acc_ddsc = []
        err_ddsc = []
        a_ddsc = []
        b_ddsc = []
        
        X_sum = self.train_sum # change df to list of list   
        X_train = self.train_set
        
        while t <= self.steps and self.epsilon <= change_B:
            B_cat_p = B_cat
            
            # step 4a
            A_hat = self.F(X_sum, B_cat, A_star)
            
            # step 4b
            B_cat = (B_cat - self.alpha * ((X_sum - B_cat.dot(A_hat)).dot(A_hat.T) - (X_sum - B_cat.dot(A_star)).dot(A_star.T)))
            
            # step 4c
            B_cat = self._pos_constraint(B_cat) # scale columns s.t. b_i^(j) = 1
#             B_cat /= sum(B_cat)
            
            B_cat = normalize(B_cat, norm='l2', axis=0)
            
            change_B = np.linalg.norm(B_cat - B_cat_p)
            t += 1
            
    
            print("step {t}: B change = {c:.4f}".format(t=t, c=change_B))

            # convergence check
            A_hat_split = np.split(A_hat, self.k, axis=0)
            B_split = np.split(B_cat,self.k,axis=1)
            
            acc_iter = self.accuracy(X_train, X_sum, B, A_hat_split)
            acc_iter = self.accuracy(X_train, X_sum, B_split, A)
            err_iter = self.error(X_train, X_sum, B, A_hat_split)

#             error, error_star = sc.error(list(x_train.values()),train_sum,B_list,A_list)

               
            acc_ddsc.append(acc_iter)
            err_ddsc.append(err_iter)
            a_ddsc.append(np.linalg.norm(A_hat))
            b_ddsc.append(np.linalg.norm(B_cat))

        self.acc_ddsc = acc_ddsc
        self.err_ddsc = err_ddsc
        self.a_ddsc = a_ddsc
        self.b_ddsc = b_ddsc
        return B_cat

    def predict(self, A, B):
        result = [x.dot(y) for (x, y) in zip(B, A)]
        return result 

In [199]:
from dataprocess import read_data, format_data, split, split2
df, houses = read_data()

d = format_data(df, houses)

timeframe = 336
# timeframe = 48
portion = 0.5

# x_train, x_test = split(d, portion, timeframe)

start_t = 4000 # summer
# start_t = 0 # winter 
x_train, x_test = split2(d, portion, timeframe, start_t)
x_train_sum = x_train.pop('use',None) # aggregated 
x_test_sum = x_test.pop('use',None) 
x_train_localhour = x_train.pop('localhour',None)
x_test_localhour = x_test.pop('localhour',None)

k = len(x_train.keys())
T, m = x_train[list(x_train.keys())[0]].shape
reg_par = 0.0005
epsilon = 0.001
alpha = 0.001
n = 250
steps = 600 # steps must be higher than k


sc = DDSC(x_train, x_train_sum, alpha, epsilon, reg_par, steps, n, m, T, k)


print('pre-training: ')
A_list,B_list = sc.nnsc()

# print('DD: ')
# # Discriminative Disaggregation training
# B_cat = sc.DD(B_list, A_list)


# # Given test examples x_test
# A_prime = sc.F(x_test_sum.values, B_cat, A=np.vstack(A_list))
# A_last = np.split(A_prime,k,axis=0)

x_predict = sc.predict(A_list,B_list)

range(4000, 4168)
range(4168, 4336)
pre-training: 
iter 10：A change =   0.2720
iter 20：A change =   0.4673
iter 30：A change =   0.8779
iter 40：A change =   1.2196
iter 50：A change =   0.8010
iter 60：A change =   0.4759
iter 70：A change =   0.3702
iter 80：A change =   0.3013
iter 90：A change =   0.2205
iter 100：A change =   0.1525
iter 110：A change =   0.1113
iter 120：A change =   0.0862
iter 130：A change =   0.0690
iter 140：A change =   0.0568
iter 150：A change =   0.0480
iter 160：A change =   0.0416
iter 170：A change =   0.0367
iter 180：A change =   0.0329
iter 190：A change =   0.0298
iter 200：A change =   0.0272
iter 210：A change =   0.0250
iter 220：A change =   0.0229
iter 230：A change =   0.0210
iter 240：A change =   0.0193
iter 250：A change =   0.0176
iter 260：A change =   0.0161
iter 270：A change =   0.0147
iter 280：A change =   0.0134
iter 290：A change =   0.0122
iter 300：A change =   0.0113
iter 310：A change =   0.0105
iter 320：A change =   0.0098
iter 330：A change =   0.0093
i

Gone through one appliance.



In [13]:
print("the shape of the first predicted appliances is :%s" %(np.asarray(x_predict[1]).shape,))
x_predict_sum = sum(x_predict)

# energy disaggregation accuracy
acc = sc.accuracy(x_train.values(), x_train_sum, B_list, A_last)
# energy disaggregation error
error, error_star = sc.error(x_train.values(), x_train_sum, B_list, A_list)

print("error: %s, error_star: %s" % (error, error_star))
acc_nnsc, acc_ddsc = sc.get_accuracy_plot()
err_nnsc, err_ddsc = sc.get_error_plot()
# plotting acc/err
a_nnsc, a_ddsc = sc.get_a()
b_nnsc, b_ddsc = sc.get_b()

the shape of the first predicted appliances is :(24, 21)


NameError: name 'A_last' is not defined

In [None]:
a_nnsc, a_ddsc = sc.get_a()
b_nnsc, b_ddsc = sc.get_b()
plt.plot(a_ddsc)
plt.ylabel('Activations')
plt.xlabel('Iterations')
plt.title('Activation Norm for DDSC algorithm')
plt.show()

plt.plot(b_ddsc)
plt.ylabel('Basis')
plt.xlabel('Iterations')
plt.title('Basis Norm for DDSC algorithm')
plt.show()

In [None]:
print("the shape of the first predicted appliances is :%s" %(np.asarray(list(x_predict)[1]).shape,))


In [None]:
err_nnddsc, err_ddddsc = sc.get_error_plot()
res_err = list(zip(*err_ddddsc)) 
plt.plot(res_err[0])
plt.title('error of DDSC algorithm')
plt.xlabel('Iterations')
plt.show()

acc_nnsc, acc_ddsc = sc.get_accuracy_plot()
res_acc = list(zip(*acc_ddsc)) 
plt.plot(res_acc[0])
plt.title('accuracy of DDSC algorithm')
plt.xlabel('Iterations')
plt.show()


In [None]:
x_predict = sc.predict(A_last,B_list)
print("the shape of the first predicted appliances is :%s" %(np.asarray(list(x_predict)[0]).shape,))
x_test.keys()

In [None]:
x_predict = sc.predict(A_list,B_list)
# pred_val = np.asarray(list(x_predict)[4])[:,6]
# pred_val 

In [None]:
pred_val = np.asarray(list(x_predict)[2])[:,6]
pred_val

In [5]:
x = range(x_train[list(x_train.keys())[0]].shape[0])
y_other_true = np.asarray(x_test[list(x_test.keys())[4]])[0:,house]
y_other_pred = np.asarray(list(x_predict)[4])[0:,house]

result_df = pd.DataFrame({'actuals':y_other_true, 'predicted':y_other_pred})
result_df.head()

NameError: name 'house' is not defined

In [192]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from datetime import datetime

house = 6

y_use_true = np.asarray(x_test_sum)[0:,house]
x_predict = sc.predict(A_list,B_list)

def plot_predict(x_test, x_predict, house, x_test_localhour):
#     fig = make_subplots(rows=5, cols=1, subplot_titles=('air', 'furnace', 'dishwasher', 'regrigerator', 'other'))
    y_axis = ['whole-home', 'air', 'furnace', 'dishwasher', 'refrigerator', 'other']
    fig = make_subplots(rows=6, cols=1)
    dates = x_test_localhour.iloc[:, house]
    
    
    y_use_true = np.asarray(x_test_sum)[0:,house]
    y_use_pred = [-1]*np.asarray(x_test_sum)[0:,house]
    
    true_pt = go.Scatter(name= 'Actual (kWh)',
                     x= dates,
                     y= y_use_true,
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color='blue'), showlegend=True)
    pred_pt = go.Scatter(name= 'Predicted (kWh)',
                     x= dates,
                     y= y_use_pred,
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color="red"), showlegend=True)
    
    fig.append_trace(true_pt, row=1, col=1)
    fig.append_trace(pred_pt, row=1, col=1)
    fig.update_yaxes(title_text=y_axis[0], row=1, col=1, range=[0, 5.5])

    for i in range(len(x_test)):
        flag = False 
#         if i == 1:
#             flag = True
#         else:
#             flag = False
        
        true_val = np.asarray(x_test[list(x_test.keys())[i]])[:,house]
#         x_predict = sc.predict(A_list,B_list)
        pred_val = np.asarray(list(x_predict)[i])[:,house]
        true_pt = go.Scatter(name= 'Actuals (kWh)',
                     x= dates,
                     y= true_val,
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color='blue'), showlegend=flag)
        
        pred_pt = go.Scatter(name= 'Predicted (kWh)',
                     x= dates,
                     y= pred_val,
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color="red"), showlegend=flag)
        fig.append_trace(true_pt, row=i+2, col=1)
        fig.append_trace(pred_pt, row=i+2, col=1)
        fig.update_yaxes(title_text=y_axis[i+1], row=i+2, col=1)
    
    layout=go.Layout(title='A week of daily consumption for a household', 
                     xaxis={'title':'Hour', 'tickformat': '%m/%d'}, yaxis={'title':'kW'}, template="plotly_white")

    # title_text="Test set prediction",
    fig.update_layout(showlegend=True, height=1000, width=800,  
                      template="plotly_white", font=dict(
        family="Times New Roman",
        size=19,
        color="black"                    
    ))
    fig.update_layout(legend=dict(x=0.71, y=1), margin=dict(l=0, r=0, t=0, b=0))
    fig.update_xaxes(showline=True, linewidth=1, linecolor='black', tickformat='%m/%d', mirror=True)
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

    fig.write_image("figures/winter.eps")
    fig.show()
   
    

    
plot_predict(x_test, x_predict, house, x_test_localhour)


### pie chart

In [157]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def pie_plot(x_test, x_predict, house):
    
    labels = ['air', 'furnace', 'dishwasher', 'refrigerator', 'other']
    
    pie_chart_true = []
    pie_chart_pred = []
    for i in range(len(x_test)):
        true_val = np.asarray(x_test[list(x_test.keys())[i]])[:,house]
        pred_val = np.asarray(list(x_predict)[i])[:,house]
        pie_chart_true.append(true_val.sum())
        pie_chart_pred.append(pred_val.sum())
        
    # Create subplots: use 'domain' type for Pie subplot
    fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]], 
                        subplot_titles=['True usage', 'Predicted usage'])

    fig.add_trace(go.Pie(labels=labels, values=pie_chart_true, name="True usage"),
                  1, 1)
    fig.add_trace(go.Pie(labels=labels, values=pie_chart_pred, name="Predicted usage"),
                  1, 2)

    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.update_layout(font=dict(
        family="Arial",
        size=22
    ))
    fig.update_layout(legend=dict(x=0.4, y=1.16, bgcolor='rgba(255, 255, 255, 0)'), plot_bgcolor = 'rgba(0,0,0,0)')
    for i in fig['layout']['annotations']:
        i['font'] = dict(size=30)
    fig.show()

pie_plot(x_test, x_predict, house=10)

### bar chat

In [200]:
import plotly.graph_objects as go
labels = ['air', 'furnace', 'dishwasher', 'refrigerator', 'other']
bar_chart_true = []
bar_chart_pred = []
# y_use_true = np.asarray(x_test_sum)[0:,house].sum()

house = 6
for i in range(len(x_test)):
    true_val = np.asarray(x_test[list(x_test.keys())[i]])[:,house]
    pred_val = np.asarray(list(x_predict)[i])[:,house]
    bar_chart_true.append(true_val.sum())
    bar_chart_pred.append(pred_val.sum())

bar_chart_pred /= sum(bar_chart_pred)
bar_chart_true /= sum(bar_chart_true)

fig = go.Figure(data=[
    go.Bar(name='Actual', x=labels, y=bar_chart_true, marker_color='blue'),
    go.Bar(name='Predicted', x=labels, y=bar_chart_pred, marker_color='red')
])
fig.update_layout(showlegend=True, height=400, width=800,  
                      template="plotly_white", font=dict(
        family="Times New Roman",
        size=19,
        color="black"                    
    ))
fig.update_layout(legend=dict(x=0.6, y=1), margin=dict(l=0, r=0, t=0, b=0))
fig.update_xaxes(showline=True, linewidth=1, linecolor='black',  mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
# Change the bar mode
fig.update_layout(barmode='group')
fig.write_image("figures/summer_per.eps")
fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def pie_plot(x_test, x_predict, house):
    
    labels = ['air', 'furnace', 'dishwasher', 'refrigerator', 'other']
    
    pie_chart_true = []
    pie_chart_pred = []
    for i in range(len(x_test)):
        true_val = np.asarray(x_test[list(x_test.keys())[i]])[:,house]
        pred_val = np.asarray(list(x_predict)[i])[:,house]
        pie_chart_true.append(true_val.sum())
        pie_chart_pred.append(pred_val.sum())
        
    # Create subplots: use 'domain' type for Pie subplot
    fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]], 
                        subplot_titles=['True usage', 'Predicted usage'])

    fig.add_trace(go.Pie(labels=labels, values=pie_chart_true, name="True usage"),
                  1, 1)
    fig.add_trace(go.Pie(labels=labels, values=pie_chart_pred, name="Predicted usage"),
                  1, 2)

    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.update_layout(font=dict(
        family="Arial",
        size=22
    ))
    fig.update_layout(legend=dict(x=0.4, y=1.16, bgcolor='rgba(255, 255, 255, 0)'), plot_bgcolor = 'rgba(0,0,0,0)')
    for i in fig['layout']['annotations']:
        i['font'] = dict(size=30)
    fig.show()

pie_plot(x_test, x_predict, house=10)

In [5]:
def generate_df_detect_anomalies(d, house, appliance, x_test, x_test_localhour, x_predict): 
    
    # {'air1': 0, 'furnace1': 1, 'dishwasher1': 2, 'regrigerator1': 3, 'other': 4}
    labels = dict(zip(list(x_test.keys()), range(len(x_test))))
    
    all_hours = d['localhour'].iloc[:, house]   
    all_true = d[appliance].iloc[:, house]
    true_df = pd.concat([all_hours, all_true], axis=1)   
    true_df.columns = ['date', 'actuals']
    
    test_hours = x_test_localhour.iloc[:, house]
    pred_val = pd.Series(x_predict[labels[appliance]][:,house])
    pred_val.index = test_hours.index 
    pred_df = pd.concat([test_hours, pred_val], axis=1)
    pred_df.columns = ['date', 'predicted']

    result_df = pd.merge(true_df, pred_df, on='date', how= 'outer')
    return result_df

result_df = generate_df_detect_anomalies(d, 6, 'other', x_test, x_test_localhour, x_predict)
result_df.head()

Unnamed: 0,date,actuals,predicted
0,2014-01-01 00:00:00,1.2,
1,2014-01-01 01:00:00,0.473,
2,2014-01-01 02:00:00,0.439,
3,2014-01-01 03:00:00,0.388,
4,2014-01-01 04:00:00,0.427,


In [6]:
def detect_classify_anomalies(df,window):
#     df.replace([np.inf, -np.inf], np.NaN, inplace=True)
#     df.fillna(0,inplace=True)
    df['error']=df['actuals']-df['predicted']
    df['percentage_change'] = ((df['actuals'] - df['predicted']) / df['actuals']) * 100
    df['meanval'] = df['error'].rolling(window=window).mean()
    df['deviation'] = df['error'].rolling(window=window).std()
    df = df.dropna(how='any')
    df = df.reset_index(drop=True)
    df['-3s'] = df['meanval'] - (2 * df['deviation'])
    df['3s'] = df['meanval'] + (2 * df['deviation'])
    df['-2s'] = df['meanval'] - (1.75 * df['deviation'])
    df['2s'] = df['meanval'] + (1.75 * df['deviation'])
    df['-1s'] = df['meanval'] - (1.5 * df['deviation'])
    df['1s'] = df['meanval'] + (1.5 * df['deviation'])
    cut_list = df[['error', '-3s', '-2s', '-1s', 'meanval', '1s', '2s', '3s']]
    cut_values = cut_list.values
    cut_sort = np.sort(cut_values)
    df['impact'] = [(lambda x: np.where(cut_sort == df['error'][x])[1][0])(x) for x in
                               range(len(df['error']))]
    severity = {0: 3, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 2, 7: 3}
    region = {0: "NEGATIVE", 1: "NEGATIVE", 2: "NEGATIVE", 3: "NEGATIVE", 4: "POSITIVE", 5: "POSITIVE", 6: "POSITIVE",
              7: "POSITIVE"}
    df['color'] =  df['impact'].map(severity)
    df['region'] = df['impact'].map(region)
    df['anomaly_points'] = np.where(df['color'] == 3, df['error'], np.nan)
    df['load_date'] = pd.date_range(start='6/1/2014', periods=len(df), freq='H')
#     df = df.sort_values(by='load_date', ascending=False)
#     df.load_date = pd.to_datetime(df['load_date'].astype(str), format="%Y-%m-%d")
    return df

df = detect_classify_anomalies(result_df, window=12)
df.head(10)


Unnamed: 0,date,actuals,predicted,error,percentage_change,meanval,deviation,-3s,3s,-2s,2s,-1s,1s,impact,color,region,anomaly_points,load_date
0,2014-04-01 19:00:00,0.348,1.681344,-1.333344,-383.144879,0.525816,2.145017,-3.764217,4.815849,-3.227963,4.279595,-2.691709,3.743341,3,0,NEGATIVE,,2014-06-01 00:00:00
1,2014-04-01 20:00:00,0.505,2.649418,-2.144418,-424.637161,0.701457,1.772094,-2.842731,4.245644,-2.399708,3.802621,-1.956684,3.359598,2,1,NEGATIVE,,2014-06-01 01:00:00
2,2014-04-01 21:00:00,0.533,7.638083,-7.105083,-1333.036167,-0.095754,2.775894,-5.647542,5.456034,-4.953568,4.762061,-4.259595,4.068087,0,3,NEGATIVE,-7.105083,2014-06-01 02:00:00
3,2014-04-01 22:00:00,0.556,0.781511,-0.225511,-40.559617,-0.150342,2.771061,-5.692465,5.391781,-4.9997,4.699015,-4.306934,4.00625,3,0,NEGATIVE,,2014-06-01 03:00:00
4,2014-04-01 23:00:00,0.511,0.516461,-0.005461,-1.068625,-0.215983,2.756252,-5.728488,5.296521,-5.039425,4.607458,-4.350361,3.918395,4,0,POSITIVE,,2014-06-01 04:00:00
5,2014-04-02 00:00:00,0.228,0.189582,0.038418,16.849844,-0.219531,2.755863,-5.731256,5.292195,-5.04229,4.603229,-4.353325,3.914263,4,0,POSITIVE,,2014-06-01 05:00:00
6,2014-04-02 01:00:00,0.179,0.183584,-0.004584,-2.560691,-0.325165,2.717901,-5.760968,5.110638,-5.081493,4.431162,-4.402017,3.751687,4,0,POSITIVE,,2014-06-01 06:00:00
7,2014-04-02 02:00:00,0.216,0.14662,0.06938,32.120445,-0.711753,2.221424,-5.154602,3.731095,-4.599246,3.175739,-4.04389,2.620383,4,0,POSITIVE,,2014-06-01 07:00:00
8,2014-04-02 03:00:00,0.172,0.195663,-0.023663,-13.757721,-0.867248,2.087675,-5.042598,3.308101,-4.520679,2.786182,-3.99876,2.264264,4,0,POSITIVE,,2014-06-01 08:00:00
9,2014-04-02 04:00:00,0.214,0.162421,0.051579,24.102332,-0.872975,2.084812,-5.042598,3.296648,-4.521396,2.775445,-4.000193,2.254242,4,0,POSITIVE,,2014-06-01 09:00:00


In [80]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from matplotlib import pyplot



def plot_anomaly(df,metric_name):
    #error = pd.DataFrame(Order_results.error.values)
    #df = df.sort_values(by='load_date', ascending=False)
    #df.load_date = pd.to_datetime(df['load_date'].astype(str), format="%Y%m%d")
    dates = df.date
    #meanval = error.rolling(window=window).mean()
    #deviation = error.rolling(window=window).std()
    #res = error
#upper_bond=meanval + (2 * deviation)
    #lower_bond=meanval - (2 * deviation)
#anomalies = pd.DataFrame(index=res.index, columns=res.columns)
    #anomalies[res < lower_bond] = res[res < lower_bond]
    #anomalies[res > upper_bond] = res[res > upper_bond]
    bool_array = (abs(df['anomaly_points']) > 0)
#And a subplot of the Actual Values.
    actuals = df["actuals"][-len(bool_array):]
    anomaly_points = bool_array * actuals
    anomaly_points[anomaly_points == 0] = np.nan
#Order_results['meanval']=meanval
    #Order_results['deviation']=deviation
#     color_map= {0: "palegreen", 1: "yellow", 2: "orange", 3: "red"}
#     table = go.Table(
#     domain=dict(x=[0, 1],
#                 y=[0, 0.3]),
#     columnwidth=[1, 2 ],
#     #columnorder=[0, 1, 2,],
#     header = dict(height = 20,
#                   values = [['<b>Date</b>'],['<b>Actual Values </b>'],
#                             ['<b>Predicted</b>'], ['<b>% Difference</b>'],['<b>Severity (0-3)</b>']],
#                  font = dict(color=['rgb(45, 45, 45)'] * 5, size=14),
#                   fill = dict(color='#d562be')),
#     cells = dict(values = [df.round(3)[k].tolist() for k in ['load_date', 'actuals', 'predicted',
#                                                                'percentage_change','color']],
#                  line = dict(color='#506784'),
#                  align = ['center'] * 5,
#                  font = dict(color=['rgb(40, 40, 40)'] * 5, size=12),
#                  #format = [None] + [",.4f"] + [',.4f'],
# #suffix=[None] * 4,
#                  suffix=[None] + [''] + [''] + ['%'] + [''],
#                  height = 27,
#                  #fill = dict(color=['rgb(235, 193, 238)', 'rgba(228, 222, 249, 0.65)']))
#                  fill=dict(color=  # ['rgb(245,245,245)',#unique color for the first column
#                       [df['color'].map(color_map)],
#                       )
#     ))
# df['ano'] = np.where(df['color']==3, df['error'], np.nan)
    
    upper_bound = go.Scatter(hoverinfo="skip",
                         x=dates,
                         showlegend =False,
                         xaxis='x1',
                         yaxis='y1',
                         y=df['3s'],
                         marker=dict(color="#444"),
                         line=dict(
                             color=('rgb(23, 96, 167)'),
                             width=2,
                             dash='dash'),
                         fillcolor='rgba(68, 68, 68, 0.3)',
                         fill='tonexty')
    lower_bound = go.Scatter(name='Confidence Interval',
                          x=dates,
                         xaxis='x1',
                         yaxis='y1',
                          y=df['-3s'],
                          marker=dict(color="#444"),
                          line=dict(
                              color=('rgb(23, 96, 167)'),
                              width=2,
                              dash='dash'),
                          fillcolor='rgba(68, 68, 68, 0.3)',
                          fill='tonexty')
    
    Actuals = go.Scatter(name= 'Actuals',
                     x= dates,
                     y= df['actuals'],
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color='blue'))
    
    Predicted = go.Scatter(name= 'Predicted',
                     x= dates,
                     y= df['predicted'],
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color="orange"))
    anomalies = go.Scatter(name="Anomaly",
                       x=dates,
                       xaxis='x1',
                       yaxis='y1',
                       y=df['anomaly_points'],
                       mode='markers',
                       marker = dict(color ='red',
                      size = 11,line = dict(
                                         color = 'red',
                                         width = 2)))
# create plot for error...
    Error = go.Scatter(name="Error",
                   x=dates, y=df['error'],
                   xaxis='x1',
                   yaxis='y1',
                   mode='lines',
                   marker=dict(size=12,
                               line=dict(width=1),
                               color="red"),
                   text="Error")
    anomalies_map = go.Scatter(name = "anomaly actual",
                                   showlegend=False,
                                   x=dates,
                                   y=anomaly_points,
                                   mode='markers',
                                   xaxis='x2',
                                   yaxis='y2',
                                    marker = dict(color ="red",
                                  size = 11,
                                 line = dict(
                                     color = "red",
                                     width = 2)))
    Mvingavrg = go.Scatter(name="Moving Average",
                           x=dates,
                           y=df['meanval'],
                           mode='lines',
                           xaxis='x1',
                           yaxis='y1',
                           marker=dict(size=12,
                                       line=dict(width=1),
                                       color="green"),
                           text="Moving average")
    axis=dict(
    showline=True,
    zeroline=False,
    showgrid=True,
    mirror=True,
    ticklen=4,
    tickfont=dict(size=18))
    
    layout = dict(
    width=1000,
    height=865,
    autosize=False,
    title= metric_name,
    margin = dict(t=75),
    showlegend=True,
    xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)),
    xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True)),
    yaxis1=dict(axis, **dict(domain=[2 * 0.21 + 0.20 + 0.09, 1], anchor='x1', hoverformat='.2f')),
    yaxis2=dict(axis, **dict(domain=[0.21 + 0.12, 2 * 0.31 + 0.02], anchor='x2', hoverformat='.2f')),font=dict(
        family="Arial",
        size=60
    ))
    
    fig = go.Figure(data = [anomalies,anomalies_map,
                        upper_bound,lower_bound,Actuals,Predicted,
                        Mvingavrg,Error], layout = layout)
    fig.update_layout(template="plotly_white", xaxis=dict(showgrid=True),
        yaxis=dict(showgrid=True),font=dict(
        family="Arial",
        size=20
    ))
    fig.update_yaxes(title_text='other(kWh)')
    fig.update_layout(legend=dict(x=0.75, y=1.24, bgcolor='rgba(255, 255, 255, 0)'))
    fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
#     fig.update_layout(title={'text': metric_name,'y':0.95,'x':0.4,
#         'xanchor': 'center',
#         'yanchor': 'top'})
    
#     fig.update_layout(showlegend=True, height=1000, width=800, title_text="Test set prediction", 
#                       template="plotly_white", font=dict(
#         family="Arial",
#         size=18
#     ))
    iplot(fig)




In [81]:
# {'air1': 0, 'furnace1': 1, 'dishwasher1': 2, 'regrigerator1': 3, 'other': 4}

result_df = generate_df_detect_anomalies(d, 10, 'other', x_test, x_test_localhour, x_predict)

classify_df = detect_classify_anomalies(result_df,12)
classify_df.reset_index(inplace=True)
del classify_df['index']
plot_anomaly(classify_df,"Anomalous Detection with Prediction")

## Statistical Analysis 

## Plotting the B matrices (basis functions)

In [None]:
from matplotlib import cm

plt.figure(figsize=(16,12))

# row and column sharing
f, ((ax1, ax2, ax3)) = plt.subplots(3, 1, sharex='col', sharey='row', figsize=(16,12))
plt.rcParams.update({'font.size': 15})


B_list[0] = B_list[0]/np.sum(B_list[0],axis=1)[:,None]  # Normalize
ax1.pcolor(B_list[0], cmap = cm.Greys_r)
B_list[1] = B_list[1]/np.sum(B_list[1],axis=1)[:,None]  # Normalize
ax2.pcolor(B_list[1], cmap = cm.Greys_r)
B_list[2] = B_list[2]/np.sum(B_list[2],axis=1)[:,None]  # Normalize
ax3.pcolor(B_list[2], cmap = cm.Greys_r)

ax1.get_xaxis().set_visible(False)
ax1.get_yaxis().set_visible(False)
ax2.get_xaxis().set_visible(False)
ax2.get_yaxis().set_visible(False)
ax3.get_xaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)

# plt.savefig(figure_directory+'basis')

In [None]:
import matplotlib.pyplot as plt
plt.style.use('classic')

# row and column sharing
f, ((ax1, ax2, ax3, ax4, ax5)) = plt.subplots(5, 1, sharex='col', sharey='row', figsize=(16,12))
plt.rcParams.update({'font.size': 15})


for base in range(7):
    ax1.plot(range(n),B_list[0][base,0:])
    ax2.plot(range(n),B_list[1][base,0:])
    ax3.plot(range(n),B_list[2][base,0:])
    ax4.plot(range(n),B_list[3][base,0:])
    ax5.plot(range(n),B_list[4][base,0:])

ax1.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',         # ticks along the top edge are off
    labelbottom='off') # labels along the bottom edge are off
# ax1.set_ylim([0,0.04])
ax1.set_ylabel('Refrigerator')
ax2.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',         # ticks along the top edge are off
    labelbottom='off') # labels along the bottom edge are off
# ax2.set_ylim([0,0.06])
ax2.set_ylabel('Dishwasher')
ax3.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',         # ticks along the top edge are off
    labelbottom='off') # labels along the bottom edge are off
# ax3.set_ylim([0,0.02])
ax3.set_ylabel('Furnace')