In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from itertools import product
from pathlib import Path
from sklearn.model_selection import StratifiedShuffleSplit

KeyboardInterrupt: 

### Auxiliar functions

In [19]:
###
class AccessLog:
    """ All operation in AccessLog (AL) """
    def __init__(self, df_users, df_rscs, df_env, df_op) -> None:
        """ Initialize AL attributes """
        self.df_users = df_users
        self.df_rscs = df_rscs
        self.df_env = df_env
        self.df_op = df_op
        self.user_attrs = df_users.columns
        self.rsrc_attrs = df_rscs.columns
        self.universal_access_log = []
    
    def compute_universal_al(self) -> None:
        """ Compute the universal Access Log (UAL). |UAL| = |U| x |R|"""
        access_log = []
        for user in self.df_users.values:
            for res in self.df_rscs.values:
                for env in self.df_env.values:   
                    for op in self.df_op.values:
                        access_log.append(list(user) + list(res)+ list(env) + list(op))
        
        access_log = pd.DataFrame(access_log)
        access_log.columns = list(self.df_users.columns) + list(self.df_rscs.columns) + list(self.df_env.columns) + list(self.df_op.columns)
        self.universal_access_log = access_log

    def tuple_rule_evaluation(self, item_rule, acc_req):
        if item_rule[0] == 0:
            if acc_req[item_rule[1]] != item_rule[2]:
                return False                
        if item_rule[0] == 1: # => attr1.val = attr2.val
            if acc_req[item_rule[1]] != acc_req[item_rule[2]]:
                return False
        if item_rule[0] == 2: # => attr in values.list
            if not acc_req[item_rule[1]] in item_rule[2]:
                return False
        if item_rule[0] == 3: # => attr < values.list
            if not acc_req[item_rule[1]] > item_rule[2]:
                return False
        if item_rule[0] == 4: # => attr != values.list
            if not acc_req[item_rule[1]] == item_rule[2]:
                return False
        return True

    def policy_evaluation(self, policy) -> None:
        """ Compare each access record with a policy set."""
        false_negs = [] # False negatives
        true_pos = [] # True positives
        len_policy = len(policy)
        counter = 0

        for acc_req in self.universal_access_log.iloc:
            denies_count = 0
            for rule in policy:
                res = True
                for item_rule in rule:
                    res = self.tuple_rule_evaluation(item_rule, acc_req)                    
                if res == False:
                    denies_count += 1
            if denies_count == len_policy:
                false_negs.append(acc_req)
            else:
                true_pos.append(acc_req)

            counter += 1
            if counter % 500000 == 0:
                print("Counter:", counter)
        
        false_negs = pd.DataFrame(false_negs)
        true_pos = pd.DataFrame(true_pos)

        print("FN;", len(false_negs), "\t\tTP:", len(true_pos))

        # Add "ACTION" column
        false_negs["ACTION"] = 0
        true_pos["ACTION"] = 1

        # Access log with "ACTION" column
        self.universal_access_log = pd.concat([true_pos,false_negs])

    def get_sparse_al(self, fraction, random_state=None, previous_al=None):        
        """ Compute the sparse access log (SAL) version. """
        acc_log = self.universal_access_log
        if isinstance(previous_al, pd.DataFrame):
            acc_log = previous_al
        sparse_positive_al = None   
        pos_access_req = acc_log[acc_log["ACTION"]==1]
        if random_state != None:
            sparse_positive_al = pos_access_req.sample(frac=fraction,
                                                       random_state=random_state)
        else:
            sparse_positive_al = pos_access_req.sample(frac=fraction)

        return pd.concat([sparse_positive_al, acc_log[acc_log["ACTION"]==0]])

    def get_noisy_al(self, fraction, random_state=None, previous_al=None):
        """ Compute the noisy access log (NAL) version. """
        acc_log = self.universal_access_log
        if isinstance(previous_al, pd.DataFrame):
            acc_log = previous_al

        noysi_al = None
        if random_state != None:
            noisy_separation = StratifiedShuffleSplit(n_splits = 1,
                                                      train_size = fraction,
                                                      random_state = random_state)
        else:
            noisy_separation = StratifiedShuffleSplit(n_splits = 1,
                                                      train_size = fraction)
        
        noisy_acc_log_selection = noisy_separation.split(acc_log, acc_log.ACTION)
                        
        noisy_acc_req = None
        normal_acc_req = None
        for noisy_idx, normal_idx, in noisy_acc_log_selection:
            noisy_acc_req = acc_log.iloc[noisy_idx]
            normal_acc_req = acc_log.iloc[normal_idx]

        # Change the resolution in the noisy_selection dataset
        noisy_acc_req["ACTION"] = np.where(noisy_acc_req["ACTION"] == 0, 1, 0)

        noysi_al = pd.concat([noisy_acc_req, normal_acc_req])

        return noysi_al

    def get_balanced_al(self, positive_frac, random_state = None, previous_al = None):
        acc_log = self.universal_access_log
        
        if isinstance(previous_al, pd.DataFrame):
            acc_log = previous_al        

        # Compute the original balanced
        pos_acc_log = acc_log[acc_log["ACTION"]==1]
        neg_acc_log = acc_log[acc_log["ACTION"]==0]

        len_al = len(acc_log)
        len_pos_ar = len(pos_acc_log)

        limit_positive_frac = ( len_pos_ar * 100 ) / len_al

        if positive_frac < limit_positive_frac and positive_frac > 1:
            print("Is not possible")
            return
        
        len_neg_ar = int(( (1-positive_frac) * 100 * len_pos_ar ) / (positive_frac*100))        

        neg_acc_req = []
        if random_state != None:
            neg_acc_req = neg_acc_log.sample(n=len_neg_ar,
                                             random_state=random_state)
        else:
            neg_acc_req = neg_acc_log.sample(n=len_neg_ar)

        return pd.concat([pos_acc_log, neg_acc_req])


### Main

In [20]:
### Policy
# 0 = attribute = value
# 1 = attribute = attribute => attribute.value = attribute.value
# 2 = atributo = {valor, valor2, valor3, ...} 
# 3 = atributo < valor
# 4 = atributo != valor
policy = [
    [
        [0, "role", "homeowner"],
        [2, "timeday", ["day","midday"]],
        [2, "op", ["access","control"]]
    ],
    [
        [0, "role", "guest"],
        [0, "timeday", "day"],
        [0,"type", "smartlocks"],
        [0,"area", "livingroom"],
        [0, "op", "access"]
    ],
    [
        [3, "age", 18],
        [0, "type", "securitycameras"],
        [2, "op", ["access", "control"]]
    ],
    [
        [0, "role", "child"],
        [0, "location", "livingroom"],
        [2, "type", ["TV", "lights"]],
        [0, "op", "control"]
    ],
    [
        [0, "mode", "night"],
        [4, "role", "homeowner"],
        [0, "area", "livingroom"],
        [0, "type", "lights"],
        [0, "op", "control"]
    ],
    [
        [4, "temperature", 15],
        [1, "location", "area"],
        [0, "type", "thermostat"],
        [0, "op", "control"]
    ],
    [
        [0, "lockstatus", 0], # 0 Unlocked y 1 locked
        [0, "type", "smartlocks"],
        [0, "area", "livingroom"],
        [0, "timeday", "day"],
        [0, "op", "control"]
    ],
    [
        [0, "health", "emergency"],
        [0, "type", "smartlocks"],
        [0, "op", "control"]
    ],
    [        
        [4, "role", "child"],
        [0, "type", "lights"],
        [1, "area", "location"],
        [0, "op", "control"]
    ],
    [
        [0, "role", "homeowner"],
        [0, "mode", "arm"],
        [0, "type", "securitycameras"],
        [0, "op", "disarm"]
    ],
    [
        [0, "role", "homeowner"],
        [0, "mode", "disarm"],
        [0, "type", "securitycameras"],
        [0, "op", "arm"]
    ]
]

# Policy summary
counter_len = 0
for rule in policy:
    counter_len += len(rule)
print("|R|", len(policy))
print("avg(R)", counter_len/len(policy))
print("WSC", counter_len)


|R| 11
avg(R) 4.0
WSC 44


### Data

In [21]:
### Load data

# Load users
filepath = "../data/IoT-users.csv"
df_users = pd.read_csv(filepath)
df_users = df_users.drop_duplicates() # Remove deplicated users
df_users["uname"] = df_users.index
print("###*** DF - USERS - DONE!***###")
print(df_users.info())

# Load resources
filepath = "../data/IoT-res.csv"
df_res = pd.read_csv(filepath)
#df_res = df_res[df_res.columns[1:]] # Remove the ID
df_res = df_res.drop_duplicates() # Remove deplicated resources
print("###*** DF - RESOURCES - DONE!***###")
print(df_res.info())

###*** DF - USERS - DONE!***###
<class 'pandas.core.frame.DataFrame'>
Int64Index: 120 entries, 0 to 122
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   role    120 non-null    object
 1   age     120 non-null    int64 
 2   health  120 non-null    object
 3   uname   120 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 4.7+ KB
None
###*** DF - RESOURCES - DONE!***###
<class 'pandas.core.frame.DataFrame'>
Int64Index: 11 entries, 0 to 10
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   type         11 non-null     object
 1   area         11 non-null     object
 2   mode         11 non-null     object
 3   temperature  11 non-null     int64 
 4   lockstatus   11 non-null     object
dtypes: int64(1), object(4)
memory usage: 528.0+ bytes
None


In [22]:
# Load enviroment
env_data = {"location": ["livingroom", "bedroom", "backyard", "backyard"], "timeday": ["day", "midday", "night", "midnight"]}
df_env = pd.DataFrame.from_dict(env_data)
print("###*** DF - ENV - DONE! ***###")
print(df_env.info())

###*** DF - ENV - DONE! ***###
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   location  4 non-null      object
 1   timeday   4 non-null      object
dtypes: object(2)
memory usage: 192.0+ bytes
None


In [23]:
# Load enviroment
op_data = {"op": ["access", "control", "arm", "emergency"]}
df_op = pd.DataFrame.from_dict(op_data)
print("###*** DF - ENV - DONE! ***###")
print(df_op.info())

###*** DF - ENV - DONE! ***###
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   op      4 non-null      object
dtypes: object(1)
memory usage: 160.0+ bytes
None


In [24]:
### Preprocessing in resources 

# All possible combination
uniques = [df_env[i].unique().tolist() for i in df_env.columns]
df_env = pd.DataFrame(product(*uniques), columns = df_env.columns)
print("Done")
print(df_env)

Done
      location   timeday
0   livingroom       day
1   livingroom    midday
2   livingroom     night
3   livingroom  midnight
4      bedroom       day
5      bedroom    midday
6      bedroom     night
7      bedroom  midnight
8     backyard       day
9     backyard    midday
10    backyard     night
11    backyard  midnight


In [25]:
### Preprocessing in resources 

# All possible combination
uniques = [df_res[i].unique().tolist() for i in df_res.columns]
df_res = pd.DataFrame(product(*uniques), columns = df_res.columns)
df_res["rname"] = df_res.index
print("Done")
print(df_res)

Done
            type        area   mode  temperature lockstatus  rname
0    Smart locks  livingroom  armed          -10     locked      0
1    Smart locks  livingroom  armed          -10   unlocked      1
2    Smart locks  livingroom  armed           -5     locked      2
3    Smart locks  livingroom  armed           -5   unlocked      3
4    Smart locks  livingroom  armed            0     locked      4
..           ...         ...    ...          ...        ...    ...
985           TV    backyard  night           30   unlocked    985
986           TV    backyard  night           35     locked    986
987           TV    backyard  night           35   unlocked    987
988           TV    backyard  night           40     locked    988
989           TV    backyard  night           40   unlocked    989

[990 rows x 6 columns]


In [26]:
#### Access Log object

acc_log = AccessLog(df_users=df_users, df_rscs=df_res, df_env=df_env, df_op=df_op)

# Compute access log universal
acc_log.compute_universal_al()

In [27]:
# Compute the decision for each access requests
acc_log.universal_access_log

Unnamed: 0,role,age,health,uname,type,area,mode,temperature,lockstatus,rname,location,timeday,op
0,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,access
1,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,control
2,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,arm
3,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,emergency
4,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,midday,access
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5702395,guest,15,emergency,122,TV,backyard,night,40,unlocked,989,backyard,night,emergency
5702396,guest,15,emergency,122,TV,backyard,night,40,unlocked,989,backyard,midnight,access
5702397,guest,15,emergency,122,TV,backyard,night,40,unlocked,989,backyard,midnight,control
5702398,guest,15,emergency,122,TV,backyard,night,40,unlocked,989,backyard,midnight,arm


In [28]:
acc_log.policy_evaluation(policy)

Counter: 500000
Counter: 1000000
Counter: 1500000
Counter: 2000000
Counter: 2500000
Counter: 3000000


In [16]:
def tuple_rule_evaluation_test(item_rule, acc_req):
        if item_rule[0] == 0:
            if acc_req[item_rule[1]] != item_rule[2]:
                return False                
        if item_rule[0] == 1: # => attr1.val = attr2.val
            if acc_req[item_rule[1]] != acc_req[item_rule[2]]:
                return False
        if item_rule[0] == 2: # => attr in values.list
            if not acc_req[item_rule[1]] in item_rule[2]:
                return False
        if item_rule[0] == 3: # => attr < values.list
            if not acc_req[item_rule[1]] > item_rule[2]:
                return False
        if item_rule[0] == 4: # => attr != values.list
            if not acc_req[item_rule[1]] == item_rule[2]:
                return False
        return True

def policy_evaluation_test(policy, access_log):
    """ Compare each access record with a policy set."""
    false_negs = [] # False negatives
    true_pos = [] # True positives
    len_policy = len(policy)
    counter = 0;

    for acc_req in access_log.iloc:
        denies_count = 0
        for rule in policy:
            res = True
            for item_rule in rule:
                res = tuple_rule_evaluation_test(item_rule, acc_req)                    
            if res == False:
                denies_count += 1
        if denies_count == len_policy:
            false_negs.append(acc_req)
        else:
            true_pos.append(acc_req)    
        
        counter += 1
        if counter % 1000000 == 0:
            break
    
    false_negs = pd.DataFrame(false_negs)
    true_pos = pd.DataFrame(true_pos)

    print("FN;", len(false_negs), "\t\tTP:", len(true_pos))

    # Add "ACTION" column
    false_negs["ACTION"] = 0
    true_pos["ACTION"] = 1

    # Access log with "ACTION" column
    access_log = pd.concat([true_pos,false_negs])
    return access_log

In [17]:
primer_acc_log_eva = policy_evaluation_test(policy, primer_set)
primer_acc_log_eva

FN; 2500 		TP: 7500


Unnamed: 0,role,age,health,uname,type,area,mode,temperature,lockstatus,rname,location,timeday,op,ACTION
0,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,access,1
1,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,control,1
2,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,day,arm,1
4,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,midday,access,1
5,child,3,health,0,Smart locks,livingroom,armed,-10,locked,0,livingroom,midday,control,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9983,child,3,health,0,Thermostats,livingroom,armed,10,unlocked,207,backyard,midnight,emergency,0
9987,child,3,health,0,Thermostats,livingroom,armed,15,locked,208,livingroom,day,emergency,0
9991,child,3,health,0,Thermostats,livingroom,armed,15,locked,208,livingroom,midday,emergency,0
9995,child,3,health,0,Thermostats,livingroom,armed,15,locked,208,livingroom,night,emergency,0
