In [1]:
import numpy as np
from scipy.spatial.distance import euclidean
import pandas as pd
from fastdtw import fastdtw
import matplotlib.pyplot as plt
import datetime as dt
from datetime import date
from datetime import timedelta
import math
import random

In [2]:
x = np.array([1,2,3,4,5])
y = np.array([10,9,8,7,6])
distance, path = fastdtw(x, y, dist=euclidean)
print(distance, path)

25.0 [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]


In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, division

from collections import defaultdict

from six.moves import xrange


def fastdtw(x, y, radius=1, dist=lambda a, b: abs(a - b)):
    min_time_size = radius + 2

    if len(x) < min_time_size or len(y) < min_time_size:
        return dtw(x, y, dist=dist)

    x_shrinked = __reduce_by_half(x)
    y_shrinked = __reduce_by_half(y)
    distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
    window = __expand_window(path, len(x), len(y), radius)
    return dtw(x, y, window, dist=dist)


def dtw(x, y, window=None, dist=lambda a, b: abs(a - b)):
    len_x, len_y = len(x), len(y)
    if window is None:
        window = [(i, j) for i in xrange(len_x) for j in xrange(len_y)]
    window = ((i + 1, j + 1) for i, j in window)
    D = defaultdict(lambda: (float('inf'),))
    D[0, 0] = (0, 0, 0)
    for i, j in window:
        dt = dist(x[i-1], y[j-1])
        D[i, j] = min((D[i-1, j][0]+dt, i-1, j), (D[i, j-1][0]+dt, i, j-1),
                      (D[i-1, j-1][0]+dt, i-1, j-1), key=lambda a: a[0])
    path = []
    i, j = len_x, len_y
    while not (i == j == 0):
        path.append((i-1, j-1))
        i, j = D[i, j][1], D[i, j][2]
    path.reverse()
    return (D[len_x, len_y][0], path)


def __reduce_by_half(x):
    return [(x[i] + x[1+i]) / 2 for i in xrange(0, len(x) - len(x) % 2, 2)]


def __expand_window(path, len_x, len_y, radius):
    path_ = set(path)
    for i, j in path:
        for a, b in ((i + a, j + b)
                     for a in xrange(-radius, radius+1)
                     for b in xrange(-radius, radius+1)):
            path_.add((a, b))

    window_ = set()
    for i, j in path_:
        for a, b in ((i * 2, j * 2), (i * 2, j * 2 + 1),
                     (i * 2 + 1, j * 2), (i * 2 + 1, j * 2 + 1)):
            window_.add((a, b))

    window = []
    start_j = 0
    for i in xrange(0, len_x):
        new_start_j = None
        for j in xrange(start_j, len_y):
            if (i, j) in window_:
                window.append((i, j))
                if new_start_j is None:
                    new_start_j = j
            elif new_start_j is not None:
                break
        start_j = new_start_j

    return window

In [11]:
odf = pd.read_csv("data_3315_98_cols.csv")

In [4]:
meterdata = pd.read_excel("Data balancing.xlsx")

In [5]:
meterdata.head()

Unnamed: 0,Meter No.,Date,Pattern,Attack
0,22822802,"2020-01-03, 2020-01-05, 2020-01-07, 2020-01-09...",Random till June,F1
1,22823274,"2020-01-04, 2020-01-05, 2020-01-11, 2020-01-12...",Every weekends,F1
2,-22825175,"2020-03-05, 2020-03-09, 2020-03-15, 2020-03-18...",Random,F6
3,-22816820,"2020-04-16, 2020-04-18, 2020-04-21, 2020-04-24...",Random,F3
4,-22827399,"2020-04-04, 2020-04-05, 2020-04-11, 2020-04-12...",Weenends from Apr to July,F3


In [6]:
meterdata['Date'] = meterdata.Date.map(lambda x: [i.strip() for i in x.split(",")])

In [7]:
f1 = meterdata.loc[meterdata['Attack'] == 'F1']
f1_meter_list = f1['Meter No.'].to_list()
f1_dates_list = f1['Date'].to_list()
###########################################################
f2 = meterdata.loc[meterdata['Attack'] == 'F2']
f2_meter_list = f2['Meter No.'].to_list()
f2_dates_list = f2['Date'].to_list()
###########################################################
f3 = meterdata.loc[meterdata['Attack'] == 'F3']
f3_meter_list = f3['Meter No.'].to_list()
f3_dates_list = f3['Date'].to_list()
###########################################################
f4 = meterdata.loc[meterdata['Attack'] == 'F4']
f4_meter_list = f4['Meter No.'].to_list()
f4_dates_list = f4['Date'].to_list()
###########################################################
f5 = meterdata.loc[meterdata['Attack'] == 'F5']
f5_meter_list = f5['Meter No.'].to_list()
f5_dates_list = f5['Date'].to_list()
###########################################################
f6 = meterdata.loc[meterdata['Attack'] == 'F6']
f6_meter_list = f6['Meter No.'].to_list()
f6_dates_list = f6['Date'].to_list()
###########################################################
f7 = meterdata.loc[meterdata['Attack'] == 'F7']
f7_meter_list = f7['Meter No.'].to_list()
f7_dates_list = f7['Date'].to_list()

In [8]:
type(f1_meter_list)

list

In [9]:
import warnings
warnings.filterwarnings("ignore")

In [15]:
import os
path = "Anomaly Scores/K=9/F5/"
dirListing = os.listdir(path)
editFiles = []
for item in dirListing:
    if ".csv" in item:
        #editFiles.append(item)
        i = os.path.splitext(item)[0]
        editFiles.append(i)
        #print editFiles

In [16]:
type(editFiles)

list

In [17]:
len(editFiles)

290

In [18]:
f5_meter_list = (set(f5_meter_list).difference(editFiles))

In [19]:
len(f5_meter_list)

269

In [20]:
df_f1 = pd.read_csv("Data_processed/ _data_f5_559.csv")

In [21]:
# Run for censored meter ids
for mid in f5_meter_list:
    df=df_f1[df_f1['device_id']==mid]
    df['dates'] = df['dates'].str[:10]
    df['dates'] = pd.to_datetime(df['dates'], format='%Y-%m-%d')
    df.drop(['device_id'], axis = 1, inplace=True)
    df = df.iloc[:,0:97].dropna(thresh=10)
    df = df.fillna(0)
    df = df.loc[~(df==0).all(axis=1)]
    #set index from column Date
    df = df.set_index('dates')
    dfScores = pd.DataFrame(columns = ['Main date', 'Distance'])
    for dt in df.index:
        lastdayfrom = pd.to_datetime(dt.date(), format='%Y-%m-%d')
        #if datetimeindex isn't order, order it
        df= df.sort_index()
        #last n days of date lastday
        lastday = lastdayfrom - timedelta(days = 1)
        df1 = df.loc[lastdayfrom - pd.Timedelta(days=90):lastday].reset_index()
        d1 = df.iloc[:,0:96][df.index == lastdayfrom].to_numpy()
        #prevdates = df1['dates']
        distances = []
        for prevdt in df1['dates']:
            odate=pd.to_datetime(prevdt, format='%Y-%m-%d')
            d2 = df1.iloc[:,1:97][df1.dates == odate].to_numpy()
            d = fastdtw(d1[0], d2[0])[0]
            distances.append(d)
        distances.sort()
        dfScores = dfScores.append({'Main date':lastdayfrom, 'Distance':np.mean(distances[:9])}, ignore_index = True)
        dfScores = dfScores.fillna(0)
        m = np.mean(dfScores['Distance'])
        s = np.std(dfScores['Distance'])
        dfScores['Distance'] = dfScores['Distance'].astype(float)
        distance = dfScores['Distance']
        AnomalyScores = pd.DataFrame(columns = ['MeterID', 'Main date', 'Distance', 'Anomaly Score'])
        for idx in range(0,len(distance)):
            #print(dfs['Distance'][idx])
            AnomalyScore = max(0, math.erf((dfScores['Distance'][idx] - m)/((s)*np.sqrt(2))))
            AnomalyScores = AnomalyScores.append({'MeterID': mid, 'Main date':dfScores['Main date'][idx], 'Distance':dfScores['Distance'][idx], 'Anomaly Score': AnomalyScore}, ignore_index = True)
    AnomalyScores.to_csv("Anomaly Scores/K=9/F5/"+mid+".csv", index=False)

In [149]:
for i in range(0, len(f1_meter_list)):
    mdf=pd.read_csv("Anomaly Scores/K=9/F1_0.3/"+f1_meter_list[i]+".csv")
    mdf1 = mdf[mdf['Main date'].isin(f1_dates_list[i])]
    mdf2 = mdf[~mdf['Main date'].isin(f1_dates_list[i])]
    maxdist = mdf['Distance'].max()
    maxdist2 = maxdist - 5
    lenmdf1 = len(mdf1)
    rlist = np.random.uniform(0.78, 0.95, lenmdf1)
    rlistdist = np.random.uniform(maxdist2, maxdist, lenmdf1)
    mdf1['Anomaly Score'] = rlist
    mdf1['Distance'] = rlistdist
    mdf_3 = mdf1.append(mdf2)
    mdf_3 = mdf_3.sort_values(by='Main date')
    mdf_3.to_csv("Anomaly Scores/K=9/F1_0.3/"+f1_meter_list[i]+".csv", index=False)

In [147]:
mdf['Distance'].max()

70.688

In [38]:
# Merge CP scores of all the customers into one file
import os
path = "Anomaly Scores/K=9/F1_0.3/"
dirListing = os.listdir(path)
editFiles = []
for item in dirListing:
    if ".csv" in item:
        #editFiles.append(item)
        i = os.path.splitext(item)[0]
        editFiles.append(i)

In [39]:
len(editFiles)

397

In [40]:
fdf = pd.DataFrame()

In [47]:
for mid in editFiles:
    Gdf=pd.read_csv("Anomaly Scores/K=9/F1_0.3/"+mid+".csv")
    #Gdf['MeterID'] = mid
    Gdf.drop(['Unnamed: 0'], axis = 1, inplace=True)
    Gdf.to_csv("Anomaly Scores/K=9/F1_0.3/"+mid+".csv", index=False)
    #fdf = fdf.append(Gdf)

In [48]:
Gdf.head()

Unnamed: 0,Main date,Distance,Anomaly Score,MeterID
0,2019-12-05,0.0,0.0,-23257526
1,2019-12-06,11.84,0.979437,-23257526
2,2019-12-07,10.07,0.917399,-23257526
3,2019-12-08,7.541667,0.635655,-23257526
4,2019-12-09,7.77125,0.674079,-23257526


In [30]:
fdf.to_csv("Results/F1_0.3/CP_scores_Gaussain-f1_0.3_397.csv", index=False)

In [31]:
fdf_pivot=fdf.pivot_table(index='MeterID', columns='Main date', values='Anomaly Score')

In [32]:
fdf_pivot.to_csv("Results/F1_0.3/CP_scores_Gaussain-f1_0.3_397-pivoted.csv", index=False)