# Importing Libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from sklearn.cluster import KMeans
from collections import Counter
from scipy.spatial import ConvexHull, convex_hull_plot_2d
import matplotlib.pyplot as plt
from sklearn.metrics import davies_bouldin_score
from sklearn import metrics
from z3 import *
warnings.filterwarnings("ignore")

# Constants

In [2]:
NUM_ZONES = 5
NUM_TIMESLOTS = 1440

# Reading Cleaned Datasets

In [3]:
cleaned_dataframe_house_A_occ_1 = pd.read_csv("../../data/cleaned/Cleaned-Dataframe_House-A_Occupant-1.csv")
cleaned_dataframe_house_A_occ_1
cleaned_dataframe_house_A_occ_2 = pd.read_csv("../../data/cleaned/Cleaned-Dataframe_House-A_Occupant-2.csv")
cleaned_dataframe_house_A_occ_2
cleaned_dataframe_house_B_occ_1 = pd.read_csv("../../data/cleaned/Cleaned-Dataframe_House-B_Occupant-1.csv")
cleaned_dataframe_house_B_occ_1
cleaned_dataframe_house_B_occ_2 = pd.read_csv("../../data/cleaned/Cleaned-Dataframe_House-B_Occupant-2.csv")
cleaned_dataframe_house_B_occ_2

Unnamed: 0,Day,Occupant's Activity,Occupant's Zone,Zone Arrival Time (Minute),Zone Leaving Time (Minute),Stay Duration (Minute)
0,1,11,1,0,619,619
1,1,15,4,620,628,8
2,1,18,2,629,671,42
3,1,4,3,672,688,16
4,1,18,2,689,699,10
...,...,...,...,...,...,...
312,29,12,2,1240,1335,95
313,29,15,4,1336,1337,1
314,29,12,2,1338,1351,13
315,29,15,4,1352,1365,13


# Clustering

In [4]:
# returns kmeans clusters
def kmeans(X, number_clusters):
    kmeans = KMeans(n_clusters = number_clusters)
    cluster = kmeans.fit(X)
    return cluster

# Convex Hull

In [5]:
# returns convex hull of points associated with zones
def convex_hull(zone, points):
        '''
        Parameters
        ----------
        points : Array
            A set of points.
            
        Returns
        -------
        vertices : Points
            Arranges convex hull vertices in counter-clockwise orientation.
        '''    
        
        hull = ConvexHull(points)
            
        simplices = hull.simplices
        
        vertices = []
        for index in hull.vertices:
            vertices.append((points[index][0], zone, points[index][1]))
        vertices.append((points[hull.vertices[0]][0], zone, points[hull.vertices[0]][1]))
        
        return vertices

# Cluster Boundary Acquisition

In [6]:
def get_cluster(dataframe, number_clusters):
    list_cluster = []
        
    count = 0
    for zone in range(NUM_ZONES):
        mod_dataframe = dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values
        features = mod_dataframe[mod_dataframe[:,0] == zone][:,1:]
        
        cluster_model = kmeans(features, number_clusters)
        labels = cluster_model.labels_
                
        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
        for cluster in range(n_clusters):
            points = []
            vertices = []
            
            for k in range(len(labels)):
                if labels[k] == cluster:
                    points.append(features[k])
            
            if len(points) >= 3:
                try:
                    vertices = convex_hull(zone, points)       
                except:
                    pass
            list_cluster.append({"zone_id": zone, "cluster_id": cluster, "points": vertices})
            
    return list_cluster

# Range Claculation Function

In [7]:
def is_left( x, y, init_x, init_y, final_x, final_y):
     return ((final_x - init_x)*(y - init_y) - (final_y - init_y)*(x - init_x)) >= 0

def range_calculation(list_cluster):
    
    list_time_min = [[[] for j in range(NUM_TIMESLOTS)] for i in range(NUM_ZONES + 1)]
    list_time_max = [[[] for j in range(NUM_TIMESLOTS)] for i in range(NUM_ZONES + 1)]

    for i in range(len(list_cluster)):
        zone_id = list_cluster[i]["zone_id"]
        min_x_range = 1440
        max_x_range = 0

        ##################################################################
        ##################### Zone Constraints ###########################
        ##################################################################
        x = Int('x')
        y = Int('y')

        points = list_cluster[i]["points"]
        for i in range(len(points)):
            for j in range(len(points[i])):
                points[i] = list(points[i])
                points[i][j] = int(points[i][j])
        zone_constraints = []

        and_constraints = []
        for j in range(len(points) - 1):
            and_constraints.append(is_left(x, y, points[j][0], points[j][2], points[j + 1][0], points[j + 1][2]))

        zone_constraints.append(And(and_constraints))

        #print(zone_constraints)
        ####### Minimum value of X range #######
        o = Optimize()
        o.add(zone_constraints)
        o.minimize(x)
        o.check()
        
        min_x_range = int(str(o.model()[x]))

        ####### Maximum value of X range #######
        o = Optimize()
        o.add(zone_constraints)
        o.maximize(x)
        o.check()
        #print(o.model()[x])

        max_x_range = int(str(o.model()[x]))
        
        
        for j in range(min_x_range, max_x_range):
            ####### Minimum value of Y range #######
            o = Optimize()
            o.add(zone_constraints)
            o.add(x == j)
            o.minimize(y)
            o.check()

            min_y_range = o.model()[y]
            if min_y_range == None:
                min_y_range = 0

            ####### Maximum value of Y range #######
            o = Optimize()
            o.add(zone_constraints)
            o.add(x == j)
            o.maximize(y)
            o.check()

            max_y_range = o.model()[y]
            if max_y_range == None:
                max_y_range = 0
                
            list_time_min[zone_id][j].append(int(str(min_y_range)))
            list_time_max[zone_id][j].append(int(str(max_y_range)))
#         for i in range(len(zone_time_val)):
#             zone = zone_time_val[i][0]
#             time = zone_time_val[i][1]
#             val = zone_time_val[i][2]
#             list_time_min[zone][time].append(val)
#             list_time_max[zone][time].append(val)
            
    return list_time_min, list_time_max

# BIoTA Attack Samples Processing

## Purpose: Testing with SHATTER ADM (Evaluating ADM Performance)

In [8]:
def biota_attack_sample_processing(dataset):
    occupant_1_schedule = []
    occupant_2_schedule = []
    
    for i in range(len(dataset)):
        occupants = dataset.iloc[i,:].values
        #print(occupants)    
        if occupants.tolist() == [0, 0, 0, 2, 0]:
            occupant_1_schedule.append(1)
            occupant_2_schedule.append(1)
        elif occupants.tolist() == [1, 0, 0, 1, 0]:
            occupant_1_schedule.append(0)
            occupant_2_schedule.append(1)
        elif occupants.tolist() == [2, 0, 0, 0, 0]:
            occupant_1_schedule.append(0)
            occupant_2_schedule.append(0)
        else:
            print(occupants.tolist())
            
    occupant_1_events = []
    occupant_2_events = []
    
    for i in range(0, len(dataset), NUM_TIMESLOTS):
        occupant_1_start_time = 0
        occupant_2_start_time = 0
        occupant_1_duration = 0
        occupant_2_duration = 0
        for j in range(NUM_TIMESLOTS):
            if occupant_1_schedule[i + j] == 0 or j == NUM_TIMESLOTS - 1:
                if occupant_1_duration != 0:
                    occupant_1_events.append([occupant_1_start_time, occupant_1_duration])
                    occupant_1_duration = 0
                else:
                    continue
            else:
                if occupant_1_duration == 0:
                    occupant_1_start_time = j
                    occupant_1_duration = 1
                else:
                    occupant_1_duration += 1
                    
            if occupant_2_schedule[i + j] == 0 or j == NUM_TIMESLOTS - 1:
                if occupant_2_duration != 0:
                    occupant_2_events.append([occupant_2_start_time, occupant_2_duration])
                    occupant_2_duration = 0
                else:
                    continue
            else:
                if occupant_2_duration == 0:
                    occupant_2_start_time = j
                    occupant_2_duration = 1
                else:
                    occupant_2_duration += 1
            #print(j, occupant_1_schedule[i + j], occupant_1_start_time, occupant_1_duration)
    return occupant_1_events, occupant_2_events

# Arrival Exit Events from BIoTA Attack Samples

In [9]:
attack_dataset_house_A = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-A.csv').iloc[:,7:12]
house_A_occupant_1_events, house_A_occupant_2_events = biota_attack_sample_processing(attack_dataset_house_A)
attack_dataset_house_B = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-B.csv').iloc[:,7:12]
house_B_occupant_1_events, house_B_occupant_2_events = biota_attack_sample_processing(attack_dataset_house_B)
len(house_A_occupant_1_events), len(house_A_occupant_2_events), len(house_B_occupant_1_events), len(house_B_occupant_2_events)

(143, 21, 113, 28)

# Anomaly Detection Model Scoring Function

## Based on Different Hyper-parameters

In [10]:
def adm_score(dataframe, events, number_clusters):
    dataframe = cleaned_dataframe_house_A_occ_1
    zone = 3 
    mod_dataframe = dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values
    features = mod_dataframe[mod_dataframe[:,0] == zone][:,1:]

    cluster_model = kmeans(features, number_clusters)
    labels = cluster_model.labels_

    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

    list_cluster = []

    for cluster in range(n_clusters):
        points = []
        for k in range(len(labels)):
            if labels[k] == cluster:
                points.append(features[k])

        if len(points) >= 3:
            try:
                vertices = convex_hull(zone, points)       
            except:
                pass
        list_cluster.append({"zone_id": zone, "cluster_id": cluster, "points": vertices})

    list_time_min, list_time_max = range_calculation(list_cluster)

    benign = 0
    anomaly = 0

    for i in range(len(events)):
        arrival_time = events[i][0]
        duration = events[i][1]
        flag = False
        for j in range(len(list_time_min[zone][arrival_time])):
            if duration >= list_time_min[zone][arrival_time][j] and duration <= list_time_max[zone][arrival_time][j]:
                flag = True
        if flag == True:
            benign +=1
        if flag == False:
            anomaly += 1
   
    

    ratio = benign / (benign + anomaly)
    
    predictions = KMeans(n_clusters = number_clusters).fit_predict(features)
    
    prediction_dataframe = pd.DataFrame(predictions)
    prediction_dataframe.columns = ['Type']
 
    num_noise = sum(prediction_dataframe['Type'] == -1)
    max_label = max(prediction_dataframe["Type"])

    return [ratio * 100, num_noise/len(dataframe), davies_bouldin_score(features, predictions), metrics.silhouette_score(features, predictions, metric='euclidean'), metrics.calinski_harabasz_score(features, predictions)]

# Hyper-Parameter Tuning for House A Occupant 1

In [11]:
adm_scores = []
for i in range(2, 50):
    print(i)
    try:
        adm_scores.append(adm_score(cleaned_dataframe_house_A_occ_1, house_A_occupant_1_events, i))
    except:
        adm_scores.append(adm_scores[len(adm_scores) - 1])
        pass

adm_score_dataframe = pd.DataFrame(adm_scores, columns = ["Anomaly Detection Rate", "Noise Ratio", "Davies Bouldin Score", "Silhouette Score", "Calinski-Harabasz Index"])
#print(adm_score_dataframe)

# plt.plot(adm_score_dataframe["Silhouette Score"])
# plt.plot(adm_score_dataframe["Davies Bouldin Score"])
# plt.plot(adm_score_dataframe["Calinski-Harabasz Index"]/1000)
adm_score_dataframe.to_csv('../../data/shatter/KMeans_HyperParameter_House-A_Occ-1.csv', index = False)
adm_score_dataframe

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Unnamed: 0,Anomaly Detection Rate,Noise Ratio,Davies Bouldin Score,Silhouette Score,Calinski-Harabasz Index
0,41.958042,0.0,0.582778,0.597874,277.510205
1,26.573427,0.0,0.431476,0.616048,433.801966
2,26.573427,0.0,0.375887,0.661517,914.911107
3,28.671329,0.0,0.451938,0.623318,1099.598008
4,27.272727,0.0,0.538229,0.564662,1134.381204
5,24.475524,0.0,0.599826,0.516071,1253.711202
6,24.475524,0.0,0.614745,0.50622,1338.208597
7,25.874126,0.0,0.614484,0.494793,1336.213506
8,20.27972,0.0,0.641166,0.465359,1343.769986
9,20.27972,0.0,0.670228,0.465726,1414.36319


# Hyper-Parameter Tuning for House A Occupant 2

In [12]:
adm_scores = []
for i in range(2, 50):
    print(i)
    try:
        adm_scores.append(adm_score(cleaned_dataframe_house_A_occ_2, house_A_occupant_2_events, i))
    except:
        adm_scores.append(adm_scores[len(adm_scores) - 1])
        pass

adm_score_dataframe = pd.DataFrame(adm_scores, columns = ["Anomaly Detection Rate", "Noise Ratio", "Davies Bouldin Score", "Silhouette Score", "Calinski-Harabasz Index"])
#print(adm_score_dataframe)

#plt.plot(adm_score_dataframe["Silhouette Score"])
#plt.plot(adm_score_dataframe["Davies Bouldin Score"])
#plt.plot(adm_score_dataframe["Calinski-Harabasz Index"]/1000)
adm_score_dataframe.to_csv('../../data/shatter/KMeans_HyperParameter_House-A_Occ-2.csv', index = False)
adm_score_dataframe

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Unnamed: 0,Anomaly Detection Rate,Noise Ratio,Davies Bouldin Score,Silhouette Score,Calinski-Harabasz Index
0,0.0,0.0,0.582778,0.597874,277.510205
1,0.0,0.0,0.431476,0.616048,433.801966
2,0.0,0.0,0.375887,0.661517,914.911107
3,0.0,0.0,0.451938,0.623318,1099.598008
4,0.0,0.0,0.530764,0.565472,1142.326806
5,0.0,0.0,0.596352,0.516772,1253.646243
6,0.0,0.0,0.601319,0.511539,1348.12656
7,0.0,0.0,0.61506,0.505188,1353.942413
8,0.0,0.0,0.629427,0.474106,1361.707992
9,0.0,0.0,0.642255,0.46255,1390.28046


# Hyper-Parameter Tuning for House B Occupant 1

In [13]:
adm_scores = []
for i in range(2, 50):
    print(i)
    try:
        adm_scores.append(adm_score(cleaned_dataframe_house_B_occ_1, house_B_occupant_1_events, i))
    except:
        adm_scores.append(adm_scores[len(adm_scores) - 1])
        pass

adm_score_dataframe = pd.DataFrame(adm_scores, columns = ["Anomaly Detection Rate", "Noise Ratio", "Davies Bouldin Score", "Silhouette Score", "Calinski-Harabasz Index"])
#print(adm_score_dataframe)

# plt.plot(adm_score_dataframe["Silhouette Score"])
# plt.plot(adm_score_dataframe["Davies Bouldin Score"])
# plt.plot(adm_score_dataframe["Calinski-Harabasz Index"]/1000)
adm_score_dataframe.to_csv('../../data/shatter/KMeans_HyperParameter_House-B_Occ-1.csv', index = False)
adm_score_dataframe

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Unnamed: 0,Anomaly Detection Rate,Noise Ratio,Davies Bouldin Score,Silhouette Score,Calinski-Harabasz Index
0,40.707965,0.0,0.580092,0.596944,277.640547
1,28.318584,0.0,0.431476,0.616048,433.801966
2,26.548673,0.0,0.375887,0.661517,914.911107
3,21.238938,0.0,0.451938,0.623318,1099.598008
4,23.00885,0.0,0.530764,0.565472,1142.326806
5,22.123894,0.0,0.599826,0.516071,1253.711202
6,16.814159,0.0,0.601319,0.511539,1348.12656
7,17.699115,0.0,0.613923,0.505693,1352.969565
8,15.929204,0.0,0.627113,0.488242,1380.911412
9,15.929204,0.0,0.649343,0.461341,1395.682313


# Hyper-Parameter Tuning for House B Occupant 2

In [14]:
adm_scores = []
for i in range(2, 50):
    print(i)
    try:
        adm_scores.append(adm_score(cleaned_dataframe_house_B_occ_2, house_B_occupant_2_events, i))
    except:
        adm_scores.append(adm_scores[len(adm_scores) - 1])
        pass

adm_score_dataframe = pd.DataFrame(adm_scores, columns = ["Anomaly Detection Rate", "Noise Ratio", "Davies Bouldin Score", "Silhouette Score", "Calinski-Harabasz Index"])
#print(adm_score_dataframe)

# plt.plot(adm_score_dataframe["Silhouette Score"])
# plt.plot(adm_score_dataframe["Davies Bouldin Score"])
# plt.plot(adm_score_dataframe["Calinski-Harabasz Index"]/1000)
adm_score_dataframe.to_csv('../../data/shatter/KMeans_HyperParameter_House-B_Occ-2.csv', index = False)
adm_score_dataframe

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Unnamed: 0,Anomaly Detection Rate,Noise Ratio,Davies Bouldin Score,Silhouette Score,Calinski-Harabasz Index
0,7.142857,0.0,0.582778,0.597874,277.510205
1,3.571429,0.0,0.431476,0.616048,433.801966
2,3.571429,0.0,0.375887,0.661517,914.911107
3,3.571429,0.0,0.45239,0.621837,1099.592334
4,3.571429,0.0,0.526711,0.566333,1142.281638
5,3.571429,0.0,0.596352,0.516772,1253.646243
6,3.571429,0.0,0.613724,0.506583,1345.390914
7,3.571429,0.0,0.613923,0.505693,1352.969565
8,3.571429,0.0,0.667246,0.469283,1320.501361
9,3.571429,0.0,0.647501,0.466838,1412.720528


# Anomaly Detection Model Testing

In [15]:
record = []
def adm(dataframe, attack_dataframe, number_clusters, occupant):
    records = []
    for day in ([10, 15, 20, 25]):
        print(day)
        train_dataframe = dataframe[dataframe['Day'] <= day]
        test_dataframe = dataframe.iloc[len(train_dataframe):, :]
        mod_dataframe = pd.DataFrame()

        train_values = train_dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values
        test_values = test_dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values

        ############################################################################
        ################################## Training ################################
        ############################################################################
        #train_pos = 0 
        #train_anomaly = 0
        
        train_true_benign = 0
        train_false_anomaly = 0
        true_anomaly = 0
        false_benign = 0
        
        list_cluster = get_cluster(train_dataframe, number_clusters)
        list_time_min, list_time_max = range_calculation(list_cluster)
        
        

        for i in range(len(train_values)):
            zone = int(train_values[i][0])
            entrance = int(train_values[i][1])
            duration = int(train_values[i][2])
            flag = False
            for j in range(len(list_time_min[zone][entrance])):
                if duration >= list_time_min[zone][entrance][j] and duration <= list_time_max[zone][entrance][j]:
                    flag = True
            if flag == True:
                train_true_benign +=1
            else:
                train_false_anomaly += 1
        processed_attack_dataframe = attack_dataframe[attack_dataframe['Day'] < day].iloc[:,7:12]
        
        if occupant == 1:
            events , _ = biota_attack_sample_processing(processed_attack_dataframe)
        else:
            _ , events = biota_attack_sample_processing(processed_attack_dataframe)
            
        for i in range(len(events)):
            arrival_time = events[i][0]
            duration = events[i][1]
            flag = False
            for j in range(len(list_time_min[zone][arrival_time])):
                if duration >= list_time_min[zone][arrival_time][j] and duration <= list_time_max[zone][arrival_time][j]:
                    flag = True
            if flag == True:
                false_benign +=1
            else:
                true_anomaly += 1
            #print(events[i], benign, negative)

#         train_true_benign = int((train_true_benign/(train_true_benign + train_false_anomaly)) * 100)
#         train_false_anomaly = 100 - train_true_benign

#         true_anomaly = int((true_anomaly/(true_anomaly + false_benign)) * 100)
#         false_benign = 100 - true_anomaly


        train_accuracy = (train_true_benign + true_anomaly) / (train_true_benign + true_anomaly + false_benign + train_false_anomaly)
        train_precision = true_anomaly / (true_anomaly + train_false_anomaly)
        train_recall = (true_anomaly) / (true_anomaly + false_benign)
        train_f1_score = (2 * train_precision * train_recall) / (train_precision + train_recall)
        
        ############################################################################
        ################################### Testing ################################
        ############################################################################
        test_true_benign = 0
        test_false_anomaly = 0

        

        for i in range(len(test_values)):
            zone = int(test_values[i][0])
            entrance = int(test_values[i][1])
            duration = int(test_values[i][2])
            flag = False
            for j in range(len(list_time_min[zone][entrance])):
                if duration >= list_time_min[zone][entrance][j] and duration <= list_time_max[zone][entrance][j]:
                    flag = True
            if flag == False:
                test_true_benign +=1
            else:
                test_false_anomaly += 1
                
        test_accuracy = (test_true_benign + true_anomaly) / (test_true_benign + true_anomaly + false_benign + test_false_anomaly)
        test_precision = true_anomaly / (true_anomaly + test_false_anomaly)
        test_recall = (true_anomaly) / (true_anomaly + false_benign)
        test_f1_score = (2 * test_precision * test_recall) / (test_precision + test_recall)
        
        # test_accuracy = (test_pos/(test_pos + test_anomaly)) * 100
        records.append([true_anomaly, false_benign, train_true_benign, train_false_anomaly, test_true_benign, test_false_anomaly, train_accuracy, train_precision, train_recall, train_f1_score, test_accuracy, test_precision, test_recall, test_f1_score ])
        #records.append([train_pos, train_anomaly, train_accuracy, test_pos, test_anomaly, test_accuracy])
        #print(records)
    return records

# ADM Testing for House A Occupant 1

In [16]:
attack_dataset = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-A.csv')

records = adm(cleaned_dataframe_house_A_occ_1, attack_dataset, 29, 1)

adm_accuracy_dataframe_house_A_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'Train True Benign', 'Train False Anomaly', 'Test True Benign', 'Test False Anomaly', 'Train Accuracy', 'Train Precision', 'Train Recall', 'Train F1-Score', 'Test Accuracy', 'Test Precision', 'Test Recall', 'Test F1-score' ])
adm_accuracy_dataframe_house_A_occ_1["Training Days"] = [10, 15, 20, 25]
adm_accuracy_dataframe_house_A_occ_1.to_csv('../../data/shatter/KMeans_ADM_Accuracy_House-A_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_A_occ_1

10
15
20
25


Unnamed: 0,True Anomaly,False Benign,Train True Benign,Train False Anomaly,Test True Benign,Test False Anomaly,Train Accuracy,Train Precision,Train Recall,Train F1-Score,Test Accuracy,Test Precision,Test Recall,Test F1-score,Training Days
0,100,0,46,54,579,34,0.73,0.649351,1.0,0.787402,0.952314,0.746269,1.0,0.854701,10
1,100,0,65,35,382,56,0.825,0.740741,1.0,0.851064,0.895911,0.641026,1.0,0.78125,15
2,81,19,74,26,234,49,0.775,0.757009,0.81,0.782609,0.822454,0.623077,0.81,0.704348,20
3,76,24,79,21,95,30,0.775,0.783505,0.76,0.771574,0.76,0.716981,0.76,0.737864,25


# ADM Testing for House A Occupant 2

In [17]:
attack_dataset = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-A.csv')

records = adm(cleaned_dataframe_house_A_occ_2, attack_dataset, 15, 2)

adm_accuracy_dataframe_house_A_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'Train True Benign', 'Train False Anomaly', 'Test True Benign', 'Test False Anomaly', 'Train Accuracy', 'Train Precision', 'Train Recall', 'Train F1-Score', 'Test Accuracy', 'Test Precision', 'Test Recall', 'Test F1-score' ])
adm_accuracy_dataframe_house_A_occ_2["Training Days"] = [10, 15, 20, 25]
adm_accuracy_dataframe_house_A_occ_2.to_csv('../../data/shatter/KMeans_ADM_Accuracy_House-A_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_A_occ_2

10
15
20
25


Unnamed: 0,True Anomaly,False Benign,Train True Benign,Train False Anomaly,Test True Benign,Test False Anomaly,Train Accuracy,Train Precision,Train Recall,Train F1-Score,Test Accuracy,Test Precision,Test Recall,Test F1-score,Training Days
0,100,0,49,51,342,40,0.745,0.662252,1.0,0.796813,0.917012,0.714286,1.0,0.833333,10
1,100,0,66,34,242,56,0.83,0.746269,1.0,0.854701,0.859296,0.641026,1.0,0.78125,15
2,100,0,76,24,151,49,0.88,0.806452,1.0,0.892857,0.836667,0.671141,1.0,0.803213,20
3,94,6,82,18,36,28,0.88,0.839286,0.94,0.886792,0.792683,0.770492,0.94,0.846847,25


# ADM Testing for House B Occupant 1

In [18]:
attack_dataset = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-B.csv')

records = adm(cleaned_dataframe_house_B_occ_1, attack_dataset, 14, 2)

adm_accuracy_dataframe_house_B_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'Train True Benign', 'Train False Anomaly', 'Test True Benign', 'Test False Anomaly', 'Train Accuracy', 'Train Precision', 'Train Recall', 'Train F1-Score', 'Test Accuracy', 'Test Precision', 'Test Recall', 'Test F1-score' ])
adm_accuracy_dataframe_house_B_occ_1["Training Days"] = [10, 15, 20, 25]
adm_accuracy_dataframe_house_B_occ_1.to_csv('../../data/shatter/KMeans_ADM_Accuracy_House-B_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_B_occ_1

10
15
20
25


Unnamed: 0,True Anomaly,False Benign,Train True Benign,Train False Anomaly,Test True Benign,Test False Anomaly,Train Accuracy,Train Precision,Train Recall,Train F1-Score,Test Accuracy,Test Precision,Test Recall,Test F1-score,Training Days
0,72,28,41,59,307,19,0.565,0.549618,0.72,0.623377,0.889671,0.791209,0.72,0.753927,10
1,93,7,58,42,227,31,0.755,0.688889,0.93,0.791489,0.893855,0.75,0.93,0.830357,15
2,80,20,73,27,134,32,0.765,0.747664,0.8,0.772947,0.804511,0.714286,0.8,0.754717,20
3,82,18,79,21,44,21,0.805,0.796117,0.82,0.807882,0.763636,0.796117,0.82,0.807882,25


# ADM Testing for House B Occupant 2

In [19]:
attack_dataset = pd.read_csv('../../data/biota/BIoTA-Attack-Dataframe_House-B.csv')

records = adm(cleaned_dataframe_house_B_occ_2, attack_dataset, 10, 2)

adm_accuracy_dataframe_house_B_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'Train True Benign', 'Train False Anomaly', 'Test True Benign', 'Test False Anomaly', 'Train Accuracy', 'Train Precision', 'Train Recall', 'Train F1-Score', 'Test Accuracy', 'Test Precision', 'Test Recall', 'Test F1-score' ])
adm_accuracy_dataframe_house_B_occ_2["Training Days"] = [10, 15, 20, 25]
adm_accuracy_dataframe_house_B_occ_2.to_csv('../../data/shatter/KMeans_ADM_Accuracy_House-B_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_B_occ_2

10
15
20
25


Unnamed: 0,True Anomaly,False Benign,Train True Benign,Train False Anomaly,Test True Benign,Test False Anomaly,Train Accuracy,Train Precision,Train Recall,Train F1-Score,Test Accuracy,Test Precision,Test Recall,Test F1-score,Training Days
0,72,28,53,47,174,15,0.625,0.605042,0.72,0.657534,0.851211,0.827586,0.72,0.770053,10
1,100,0,65,35,105,19,0.825,0.740741,1.0,0.851064,0.915179,0.840336,1.0,0.913242,15
2,95,5,74,26,59,16,0.845,0.785124,0.95,0.859729,0.88,0.855856,0.95,0.900474,20
3,82,18,76,24,31,12,0.79,0.773585,0.82,0.796117,0.79021,0.87234,0.82,0.845361,25


# Final ADM Model Performance Function

In [21]:
def adm_performance(dataframe, events, number_clusters):
    list_cluster = get_cluster(dataframe, number_clusters)
    list_time_min, list_time_max = range_calculation(list_cluster)
    true_benign = 0
    false_anomaly = 0
    true_anomaly = 0
    false_benign = 0
    
    dataframe_values = dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values
    
    for i in range(len(dataframe_values)):
        zone = int(dataframe_values[i][0])
        entrance = int(dataframe_values[i][1])
        duration = int(dataframe_values[i][2])
        flag = False
        for j in range(len(list_time_min[zone][entrance])):
            if duration >= list_time_min[zone][entrance][j] and duration <= list_time_max[zone][entrance][j]:
                flag = True
            
        if flag == True:
            true_benign +=1
        else:
            false_anomaly += 1
        
    for i in range(len(events)):
        arrival_time = events[i][0]
        duration = events[i][1]
        flag = False
        for j in range(len(list_time_min[zone][arrival_time])):
            if duration >= list_time_min[zone][arrival_time][j] and duration <= list_time_max[zone][arrival_time][j]:
                flag = True
        if flag == True:
            false_benign += 1
        else:
            true_anomaly += 1
            
        true_benign = int((true_benign/(true_benign + false_anomaly)) * 100)
        false_anomaly = 100 - true_benign

        true_anomaly = int((true_anomaly/(true_anomaly + false_benign)) * 100)
        false_benign = 100 - true_anomaly


        accuracy = (true_benign + true_anomaly) / (true_benign + true_anomaly + false_benign + false_anomaly)
        precision = true_anomaly / (true_anomaly + false_anomaly)
        recall = (true_anomaly) / (true_anomaly + false_benign)
        f1_score = (2 * precision * recall) / (precision + recall)
        
        
    return [[true_anomaly, false_benign, true_benign, false_anomaly, accuracy, precision, recall, f1_score ]]

# Final ADM Testing for House A Occupant 1

In [22]:
records = adm_performance(cleaned_dataframe_house_A_occ_1, house_A_occupant_1_events, 29)

adm_accuracy_dataframe_house_A_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_A_occ_1.to_csv('../../data/shatter/KMeans_Final_ADM_Accuracy_House-A_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_A_occ_1

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,59,41,81,19,0.7,0.75641,0.59,0.662921


# Final ADM Testing for House A Occupant 2

In [23]:
records = adm_performance(cleaned_dataframe_house_A_occ_2, house_A_occupant_2_events, 15)
adm_accuracy_dataframe_house_A_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_A_occ_2.to_csv('../../data/shatter/KMeans_Final_ADM_Accuracy_House-A_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_A_occ_2

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,100,0,85,15,0.925,0.869565,1.0,0.930233


# Final ADM Testing for House B Occupant 1

In [24]:
records = adm_performance(cleaned_dataframe_house_B_occ_1, house_B_occupant_1_events, 14)
adm_accuracy_dataframe_house_B_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_B_occ_1.to_csv('../../data/shatter/KMeans_Final_ADM_Accuracy_House-B_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_B_occ_1

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,71,29,83,17,0.77,0.806818,0.71,0.755319


# Final ADM Testing for House B Occupant 2

In [25]:
records = adm_performance(cleaned_dataframe_house_B_occ_2, house_B_occupant_2_events, 10)
adm_accuracy_dataframe_house_B_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_B_occ_2.to_csv('../../data/shatter/KMeans_Final_ADM_Accuracy_House-B_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_B_occ_2

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,99,1,78,22,0.885,0.818182,0.99,0.895928


# Partial ADM Model Performance

In [32]:
def part_adm_performance(dataframe, events, number_clusters):
    
    train_dataframe = dataframe[dataframe['Day'] <= 15]
                         
    list_cluster = get_cluster(train_dataframe, number_clusters)
    list_time_min, list_time_max = range_calculation(list_cluster)

    true_benign = 0
    false_anomaly = 0
    true_anomaly = 0
    false_benign = 0
    
    dataframe_values = dataframe[['Occupant\'s Zone','Zone Arrival Time (Minute)', 'Stay Duration (Minute)']].values
    
    for i in range(len(dataframe_values)):
        zone = int(dataframe_values[i][0])
        entrance = int(dataframe_values[i][1])
        duration = int(dataframe_values[i][2])
        flag = False
        for j in range(len(list_time_min[zone][entrance])):
            if duration >= list_time_min[zone][entrance][j] and duration <= list_time_max[zone][entrance][j]:
                flag = True
                true_benign +=1
        if flag == False:
            false_anomaly += 1
        
    for i in range(len(events)):
        arrival_time = events[i][0]
        duration = events[i][1]
        flag = False
        for j in range(len(list_time_min[zone][arrival_time])):
            if duration >= list_time_min[zone][arrival_time][j] and duration <= list_time_max[zone][arrival_time][j]:
                flag = True
        if flag == True:
            false_benign += 1
        else:
            true_anomaly += 1
        #print(events[i], benign, negative)

    true_benign = int((true_benign/(true_benign + false_anomaly)) * 100)
    false_anomaly = 100 - true_benign

    true_anomaly = int((true_anomaly/(true_anomaly + false_benign)) * 100)
    false_benign = 100 - true_anomaly



    accuracy = (true_benign + true_anomaly) / (true_benign + true_anomaly + false_benign + false_anomaly)
    precision = true_anomaly / (true_anomaly + false_anomaly)
    recall = (true_anomaly) / (true_anomaly + false_benign)
    f1_score = (2 * precision * recall) / (precision + recall)

        
    return [[true_anomaly, false_benign, true_benign, false_anomaly, accuracy, precision, recall, f1_score ]]

# Partial ADM Testing for House A Occupant 1

In [33]:
records = part_adm_performance(cleaned_dataframe_house_A_occ_1, house_A_occupant_1_events, 29)
adm_accuracy_dataframe_house_A_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_A_occ_1.to_csv('../../data/shatter/KMeans_Partial_ADM_Accuracy_House-A_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_A_occ_1

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,87,13,41,59,0.64,0.59589,0.87,0.707317


# Partial ADM Testing for House A Occupant 2

In [34]:
records = part_adm_performance(cleaned_dataframe_house_A_occ_2, house_A_occupant_2_events, 15)
adm_accuracy_dataframe_house_A_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_A_occ_2.to_csv('../../data/shatter/KMeans_Partial_ADM_Accuracy_House-A_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_A_occ_2

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,100,0,40,60,0.7,0.625,1.0,0.769231


# Partial ADM Testing for House B Occupant 1

In [35]:
records = part_adm_performance(cleaned_dataframe_house_B_occ_1, house_B_occupant_1_events, 14)
adm_accuracy_dataframe_house_B_occ_1 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_B_occ_1.to_csv('../../data/shatter/KMeans_Partial_ADM_Accuracy_House-B_Occ-1.csv', index = False)
adm_accuracy_dataframe_house_B_occ_1

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,90,10,32,68,0.61,0.56962,0.9,0.697674


# Partial ADM Testing for House B Occupant 2

In [36]:
records = part_adm_performance(cleaned_dataframe_house_B_occ_2, house_B_occupant_2_events, 10)
adm_accuracy_dataframe_house_B_occ_2 = pd.DataFrame(records, columns = ['True Anomaly', 'False Benign', 'True Benign', 'False Anomaly', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
adm_accuracy_dataframe_house_B_occ_2.to_csv('../../data/shatter/KMeans_Partial_ADM_Accuracy_House-B_Occ-2.csv', index = False)
adm_accuracy_dataframe_house_B_occ_2

Unnamed: 0,True Anomaly,False Benign,True Benign,False Anomaly,Accuracy,Precision,Recall,F1-Score
0,89,11,46,54,0.675,0.622378,0.89,0.73251
