# Long-run identificaiton

<a id='tc'></a>
## Table of content

**PART I** - [Setup](#setup)
1. [imports](#imports)
2. [helpers](#helpers)
3. [evaluate function](#evaluate)
4. [build per action capture model](#build-per-action-capture-model)

2. [sliding time window](#stw)
    1. [implementation](#implementation_stw)
    2. [Training](#evaluation_stw)
        - No training, output simply majority voting
        - Train on per-action capture: perform data augmentation (cut corp the data) labeled as 1. etc...and use NoApp NoAction labeled as 0 (maybe use same model as )
        - Train on longrun capture with probabilities output


<a id='setup'></a>
## PART I - Setup
[table of content](#tc)
<a id='imports'></a>

In [1]:
# imports
import pandas as pd
import numpy as np
import sys
import random
from build_datasets import *
import csv
import os
import pickle
import copy
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from scipy.stats import kurtosis
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from sklearn.model_selection import train_test_split, ShuffleSplit, cross_val_score
from sklearn.utils.multiclass import unique_labels
from sklearn.utils import shuffle

<a id='helpers'></a>
## Helpers
[Table of Content](#tc)

In [2]:
### Capture time serie

all_columns = set()
def extract_columns(dataset_file):
    all_columns = set()

    with open(dataset_file, "r") as file:
        csv_reader = csv.reader(file, delimiter=',', quotechar='"')
        for i, line in enumerate(csv_reader):
            all_columns.update(line)
            return all_columns


def new_packet(columns):
    packet = dict()
    for column in columns:
        packet[column] = ""

    return packet


def read_file(dataset_file, columns):
    """
    Reads a .csv file and puts its raw contents in packet_store[dataset_file]
    """
    ID_COLUMN = "Packet #"
    dataset_packet_store = dict() # packetID => packets
    headers = []
    with open(dataset_file, "r") as file:
        reader = csv.reader(file, delimiter=',', quotechar='"')

        for i, line in enumerate(reader):
            if i == 0:
                headers = line
                continue

            packet = new_packet(columns)

            for j, item in enumerate(line):
                key = headers[j]
                val = item

                if key not in packet:
                    print(packet)
                    print("Fatal: column '{}' not found in packet; all_columns is {}".format(key, columns))
                    sys.exit(1)
                packet[key] = val

            packet_id = toInt(packet[ID_COLUMN].replace('\'', ''))
            if packet_id not in dataset_packet_store:
                dataset_packet_store[packet_id] = []

            dataset_packet_store[packet_id].append(packet)

    return dataset_packet_store

def extract_payload_length(payload_string, default=0):
    payload_string = payload_string.strip()
    if payload_string == "" or "No data" in payload_string:
        return default
    parts = payload_string.split(' ')
    return toInt(parts[0])

def packet_store_cleanup(packets): #dataset_packet_store is packetID => packets
    for packet_id in packets:
        layers = packets[packet_id]
        for layer in layers:
            layer[ID_COLUMN] = toInt(layer[ID_COLUMN])
            layer["Time"] = toFloat(layer["Time"], default=-1)
            layer["Time delta"] = toFloat(layer["Time delta"], default=-1)
            layer["PayloadLength"] = extract_payload_length(layer["Payload"], default=0)
            layer["PayloadRaw"] = extract_payload_bytes(layer["Payload"])
    return packets


def packets_to_timesize_tuples(packets):
    global master
    xy = dict(xs=[], ys=[])
    packets_ids = list(packets.keys())
    packets_ids.sort(reverse=False)

    # Ensure that the direction stays the same even if HUAWEI Watch becomes master


    for packet_id in packets_ids:
        for layer in packets[packet_id]:
            master = extract_master(layer["Communication"])
            if master in POSSIBLE_MASTERS:
                direction = 1
            else:
                print("WARNING master not in Possible masters: '" + master + "'")
                print(layer["Communication"])
                direction = -1

            if not "master" in layer['Transmitter'].lower():
                direction *= -1
            xy['xs'].append(float(layer['Time']))
            xy['ys'].append(direction * int(layer['PayloadLength']))
    return xy



def merge_actions_in_app(events, to_merge):
    for actions in to_merge:
        events = merge_action_in_app(events, actions)
    return events


def merge_action_in_app(events, to_merge, separate_watch=True, maximum_level=None): # TODO separate watch
    """to_merge is of the form [app_feature_x1, app_feature_x2...]
    Cannot merge across apps
    """
    ## TODO: Make sure merge and 
    app = to_merge[0].split("_")[0]
    events_out = copy.deepcopy(events)
    to_merge_action_set = set([f.split("_")[1] for f in to_merge])
    print("to_merge_action_set = ", to_merge_action_set)
    for w in events:
        for appli in events[w]:
            if appli != app:
                continue
            merged_actions = []
            labeled_actions = ""
            for action in events[w][app]:
                if action in to_merge_action_set:
                    labeled_actions += action
                    merged_actions += events[w][app][action]
                    del events_out[w][app][action]
            events_out[w][app][labeled_actions] = merged_actions
    return events_out


def discard_actions(source_files, to_discard):
    sf_new = []
    for f in source_files:
        _in = False
        for df in to_discard:
            if df in f:
                _in = True
        if not _in:
            sf_new.append(f)
    return sf_new


def build_features_labels_dataset(events, adversary_capture_duration=-1, unique_from=46, unique_to=1006):
    data = []
    labels = []
    for device in events:
        for app in events[device]:
            for action in events[device][app]:
                label = app + "_" + action
                for event in events[device][app][action]:
                    features_dict = extract_features(event, adversary_capture_duration, unique_from, unique_to)
                    features = list(features_dict.values())
                    data.append(features)
                    labels.append(label)

    return data, labels



def delete_different_action_across_watch(events):
        # Create common action set if multiple watches
    intersection_set = set()
    for i, w in enumerate(events):
        all_action = []
        for app in events[w]:
            actions = set(events[w][app].keys())
            all_action += [app + "_" + act for act in actions]
        if i == 0:
            intersection_set = set(all_action)
        else:
            intersection_set = intersection_set.intersection(set(all_action))
    
    events_copy = copy.deepcopy(events)
    # Delete not common action from dictionnary
    for device in events_copy:
        for app in events_copy[device]:
            for action in events_copy[device][app]:
                if app + "_" + action not in intersection_set:
                    del events[device][app][action]
    return events


def equilibrate_events_across_apps_and_watch(events):
    """
    Equilibrate the events by identifing the minimum number of samples per class
    and discarding Randomly the extra samples present in the oder classes. 
    If there are multiple watchs, only the common application are kept

    Parameters: 
        events (dict[watch][app][action] -> event): dataset 

    Returns: 
        events (dict[watch][app][action] -> event): equilibrate dataset
    """
    
    events = delete_different_action_across_watch(events)
    
    # Find minimum samples 
    counts = dict()
    for device in events:
        for app in events[device]:
            for action in events[device][app]:
                counts[device + "_" + app +"_" + action] = 0
            for action in events[device][app]:
                counts[device + "_" + app + "_" + action] += len(events[device][app][action])

    if len(counts.values())== 0:
        return events


    nb_samples_per_cat = min(counts.values())

    events_out = dict()

    # remove everything above the min across devices
    for device in events:
        for app in events[device]:

            for action in events[device][app]:

                if not device in events_out:
                    events_out[device] = dict()
                if not app in events_out[device]:
                    events_out[device][app] = dict()
                if not action in events_out[device][app]:
                    events_out[device][app][action] = random.sample(events[device][app][action], k=nb_samples_per_cat)

    counts = dict()
    for device in events_out:
        for app in events_out[device]:
            if not app in counts:
                counts[app] = 0
            for action in events_out[device][app]:
                counts[app] += len(events_out[device][app][action])

    return events_out, nb_samples_per_cat



def extract_features(xy, capture_duration_does_nothing=0, unique_from=46, unique_to=1006): # dataset is {'xs': [packet1, packet2,...], 'ys': [packet1, packet2,...]} where x is time and y is size
    xs = xy['xs']
    ys = xy['ys']
    f = dict()

    def take(arr, n=30):
        if len(arr) > n:
            return arr[:30]
        return arr

    def stats(key, data):
        if len(data) == 0:
            data=[-1]
        f['min_'+key] = np.min(data)
        f['mean_'+key] = np.mean(data)
        f['max_'+key] = np.max(data)
        f['count_'+key] = len(data)
        f['std_'+key] = np.std(data)
        f['kurtosis_'+key] = kurtosis(data)



    # general statistics
    stats("non_null", [abs(y) for y in ys if y != 0])
    stats("outgoing", [abs(y) for y in ys if y > 0])
    stats("incoming", [abs(y) for y in ys if y < 0])
    stats("outgoing_30", [abs(y) for y in take(ys) if y > 0])
    stats("incoming_30", [abs(y) for y in take(ys) if y < 0])

    # f["total_payload"] = sum([abs(y) for y in ys])

    # statistics about timings
    x_deltas = []
    i = 1
    while i<len(xs):
        x_deltas.append(xs[i]-xs[i-1])
        i += 1

    stats("x_deltas", x_deltas)
    stats("x_deltas_30", take(x_deltas))

    # bursts

    # unique packet lengths [Liberatore and Levine; Herrmann et al.]
    lengths = dict()
    for i in range(unique_from, unique_to):
        lengths[str(i)] = 0
    for y in ys:
        if str(abs(y)) in lengths:
            lengths[str(abs(y))] += 1

    lengths_array = list(lengths.values())
    stats("unique_lengths", lengths_array)
    for l in lengths:
        f['unique_lengths_'+str(l)] = lengths[l]
        
    return f


def filter_by_length(events, minimum_payload=200, ratio_app_not_satisfing_minimum_payload_length=0.25, printInfo = False):
    results = copy.deepcopy(events)
    for watch in events:
        for app in events[watch]:
            for action in events[watch][app]:
                total_event = len(events[watch][app][action])
                bellow_minimum_payload = 0
                for sample in events[watch][app][action]:

                    payload_length = sum([abs(s) for s in sample["ys"]])
                    if payload_length < minimum_payload:
                        bellow_minimum_payload += 1

                ratio_bellow = bellow_minimum_payload / total_event
                if ratio_bellow > ratio_app_not_satisfing_minimum_payload_length:
                    if printInfo:
                        print("total_event: ", total_event, " - bellow threshold: ", bellow_minimum_payload)
                        print(app + "_" + action + " removed")
                        print(" ratio_below = ", ratio_bellow)
                    del results[watch][app][action]

            if len(results[watch][app]) == 0:
                del results[watch][app]
    return results



def count_print(events):
    for d in events:
        for app in events[d]:
            for act in events[d][app]:
                print("{}: {}_{} - {}".format(d, app, act,  len(events[d][app][act])))

                
# Compute the mean over realisations
def plot_acc_and_conf(n_samples, accuracies, repeat, title, xlabel, ylabel, fname, y_lim=None, RETURN_ACC=False, dpi=500):
    n_samples = np.array(n_samples)
    accuracies = np.array(accuracies)


    n_samples_repr = n_samples.reshape((-1,repeat))[:,0]
    accuracies_repr = accuracies.reshape((-1,repeat)) * 100

    accuracies_avg = accuracies.reshape((-1,repeat)).mean(axis = 1)
    if RETURN_ACC:
        return n_samples_repr, accuracies_avg
    
    accuracies_conf = accuracies.reshape((-1,repeat)).std(axis = 1) * 2 

    conf_upper = accuracies_avg + accuracies_conf
    conf_lower = accuracies_avg - accuracies_conf

    fig, ax = plt.subplots()
    ax.plot(n_samples_repr, accuracies_avg, '-b', label='averaged accuracy')
    ax.plot(n_samples_repr, conf_upper, '--r', label='95% confidence interval')
    ax.plot(n_samples_repr, conf_lower, '--r')
    plt.title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if y_lim is not None:
        plt.ylim(y_lim[0], y_lim[1])
    leg = ax.legend()
    plt.savefig("./"+fname, dpi=dpi)

    
    
def extract_features(xy, capture_duration_does_nothing=0, unique_from=46, unique_to=1006): # dataset is {'xs': [packet1, packet2,...], 'ys': [packet1, packet2,...]} where x is time and y is size
    xs = xy['xs']
    ys = xy['ys']
    f = dict()

    def take(arr, n=30):
        if len(arr) > n:
            return arr[:30]
        return arr

    def stats(key, data):
        if len(data) == 0:
            data=[-1]
        f['min_'+key] = np.min(data)
        f['mean_'+key] = np.mean(data)
        f['max_'+key] = np.max(data)
        f['count_'+key] = len(data)
        f['std_'+key] = np.std(data)
        f['kurtosis_'+key] = kurtosis(data)



    # general statistics
    stats("non_null", [abs(y) for y in ys if y != 0])
    stats("outgoing", [abs(y) for y in ys if y > 0])
    stats("incoming", [abs(y) for y in ys if y < 0])
    stats("outgoing_30", [abs(y) for y in take(ys) if y > 0])
    stats("incoming_30", [abs(y) for y in take(ys) if y < 0])

    # f["total_payload"] = sum([abs(y) for y in ys])

    # statistics about timings
    x_deltas = []
    i = 1
    while i<len(xs):
        x_deltas.append(xs[i]-xs[i-1])
        i += 1

    stats("x_deltas", x_deltas)
    stats("x_deltas_30", take(x_deltas))

    # bursts

    # unique packet lengths [Liberatore and Levine; Herrmann et al.]
    lengths = dict()
    for i in range(unique_from, unique_to):
        lengths[str(i)] = 0
    for y in ys:
        if str(abs(y)) in lengths:
            lengths[str(abs(y))] += 1

    lengths_array = list(lengths.values())
    stats("unique_lengths", lengths_array)
    for l in lengths:
        f['unique_lengths_'+str(l)] = lengths[l]
        
    return f

def get_all_actions(events):
    all_actions = set()
    for d in events:
        for app in events[d]:
            for act in events[d][app]:
                all_actions.add(app+"_"+act)
    return all_actions

<a id="evaluate"></a>
### Evaluate
[Table of Content](#tc)


In [3]:
def evaluate(DATA_PATH, DISCARDED_ACTION=[], TO_MERGE=[], EQUALIZATION=True,
             REBUILD=False, TEST_PERCENTAGE=0.25, MINIMUM_PAYLOAD=200, SHUFFLE=False,
             RATIO=0.25, N_SPLITS=50, N_ESTIMATOR=100, SEPARATE_WATCH=False,
             PRINT_COUNT=False, PLOT_DIR="./plots/", RETURN_PRED=False, RETURN_FILTIRED=False,
             RETURN_EQUILIBRATE_EVENTS=False, RETURN_FEATURES_AND_LABELS=False):
    
    print("\nimporting data...")
    sources_files = find_sources(DATA_PATH)


    if len(DISCARDED_ACTION) != 0:
        print("withdraw action to be discarded")
        sources_files = discard_actions(sources_files, DISCARDED_ACTION)


    if REBUILD:
        print("rebuilding dataset")
        rebuild_all_datasets(sources_files)

    events, counts = cut_all_datasets_in_events(sources_files)

    if len(TO_MERGE) != 0:
        print("merging events")
        events = merge_actions_in_app(events, TO_MERGE)


    print("filtering app that does not send traffic by their length")
    filtered_events = filter_by_length(events, minimum_payload=MINIMUM_PAYLOAD, ratio_app_not_satisfing_minimum_payload_length=RATIO)
    
    if RETURN_FILTIRED:
        return filtered_events
    
    if PRINT_COUNT:
        print("\nclass event count")
        count_print(filtered_events)
        print()

    nb_samples_per_cat = "not uniform"
    if EQUALIZATION:
        print("dataset equalization per class")
        filtered_events, nb_samples_per_cat = equilibrate_events_across_apps_and_watch(filtered_events)
        
        if PRINT_COUNT:
            print("\nclass event count after equalization")
            count_print(filtered_events)
            print()
        if RETURN_EQUILIBRATE_EVENTS:
            return filtered_events
    
    if SEPARATE_WATCH:
        print("separate watch action")
        filtered_events = separate_watch(filtered_events)
        if PRINT_COUNT:
            print()
            count_print(filtered_events)


    print("building features and labels")
    X, y = build_features_labels_dataset(filtered_events)
    if RETURN_FEATURES_AND_LABELS:
        return X, y

    # ## Accuracy with cross validation

    print("building and training the model for cross validation ")
    clf=RandomForestClassifier(n_estimators=N_ESTIMATOR, random_state=None)
    shuf_split = ShuffleSplit(n_splits=N_SPLITS, test_size=0.25, random_state=None)
    scores_shuffle = cross_val_score(clf, X, y, cv=shuf_split)
    print("Random split cross-validation: Accuracy=%0.3f (+/- %0.2f). " % (scores_shuffle.mean(), scores_shuffle.std() * 2))
    eval_metric = "cross-val radomSplit={} accRs={:.1f} +-{:.1f}% 95% conf interval".format(N_SPLITS, scores_shuffle.mean() *100, scores_shuffle.std() * 2 * 100)

    # ### Plotting the confusion matrix

    print("building and training a model for confusion matrix")
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.25, random_state=None)


    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    if RETURN_PRED:
        return y_test, y_pred
    accuracy = metrics.accuracy_score(y_test, y_pred)
    print("accuracy = ", accuracy)


    title = "Confusion matrix for {}acc={:0.2f} ".format("and ".join([f.replace("/", "_") for f in DATA_PATH]), accuracy * 100)
    title += eval_metric
    title += "test={}% minimum_payload={}B nb_samples={}".format(int(TEST_PERCENTAGE * 100), MINIMUM_PAYLOAD, nb_samples_per_cat)
    saved_title = title.replace(".", "_").replace(" ", "_")
    cm, _, _ = plot_confusion_matrix(y_test, y_pred, title= title, save = saved_title, PLOT_DIR=PLOT_DIR)

    print("done")

<a id='build-per-action-capture-model'></a>
## Preparation

### Build per action capture model
[Table of Content](#tc)

In [4]:
# Choose the data to be included in the model
DATA_PATH = ["data/huawei/Endomondo-1/", "data/huawei/force-stop/", "data/huawei/DiabetesM-2/",
             "data/huawei/DiabetesM-3/", "data/huawei/DiabetesM-4/",
             "data/huawei/FoursquareCityGuide-1/", "data/huawei/HealthyRecipes-1/",
             "data/huawei/Lifesum-1/", "data/huawei/Playstore-1/", "data/huawei/open-6/"]

# Extract features 
X, y = evaluate(DATA_PATH, RETURN_FEATURES_AND_LABELS=True)

# Create and Train the model
clf=RandomForestClassifier(n_estimators=200, random_state=None)
clf.fit(X, y)

# Get the set of all classes the classifier knows about
events = evaluate(DATA_PATH, RETURN_FILTIRED=True)
all_actions = get_all_actions(events)


importing data...
filtering app that does not send traffic by their length
dataset equalization per class
building features and labels

importing data...
filtering app that does not send traffic by their length


<a id='convert-longrun'></a>
### Import data, ground-truth and checked ground-truth
[Table of Content](#tc)


- ***Data*** refers to the actual longrun captures 

- ***Ground-truth*** refers to the metadata that comes automatically with the longrun capture. It is a log file and contains information about which and when an action is performed since the capture's launch

- ***Checked ground-truth*** refers to the manually checked ground-thruth. (Used to measure the discrepancy of the automation vs the manual checked. This discrepency is due to a delay between sending the command to performing an action and the actual traffic generated by this action).


In [511]:
def filter_out_path(fs, filter_out_device = False):
    f = fs[fs.rfind('/')+ 1:]
    if filter_out_device:
        f = f[f.rfind('_')+ 1:]
    return f

# Finds datasets recursively
def find_sources(folders='./'):

    if type(folders) is not list:
        folders = [folders]

    sources_files = []

    for folder in folders:
        files = glob.glob(folder + '*.csv', recursive=True)
        for file in files:
            ignore = False
            for ignore_pattern in SOURCES_FILE_IGNORE:
                if ignore_pattern in file:
                    ignore = True
            if not ignore:
                sources_files.append(file.replace('./', ''))

    return sorted(sources_files)

def extract_fname(f):
    return filter_out_path(f[:f.rfind(".")])

def intersection_fname(files1, files2, print_missing=True):
    "Return the list of files names (without extension) that files1 containes and files2 contains"
    if len(files1) == 0 or len(files2) == 0:
        print("WARNING: one or both dir. are empty")
        return []

    extension1 = files1[0][files1[0].rfind("."):]
    extension2 = files2[0][files2[0].rfind("."):]

    files1_filtered = set()
    files2_filtered = set()
    
    for f1 in files1:
        files1_filtered.add(extract_fname(f1))
    for f2 in files2:
        files2_filtered.add(extract_fname(f2))
    
    complete_files = files1_filtered.intersection(files2_filtered)
    missing_files = sorted(list(files1_filtered.union(files2_filtered) - complete_files))

    # print Missing files
    if print_missing:
        for mf in missing_files:
            if mf in files1_filtered:
                print("WARNING: {} - {} companion missing".format(mf, extension2))
            if mf in files2_filtered:
                print("WARNING: {} - {} companion missing".format(mf, extension1))
    return sorted(list(complete_files))

DATA_PATH = "./data/huawei/longrun/deterministic-15min/"
GROUND_TRUTH_PATH = "./data/huawei/longrun/deterministic-15min/ground-truth/"
CHECKED_GT = "./data/huawei/longrun/deterministic-15min/checked-gt/"

data_path_content = sorted(glob.glob(DATA_PATH + '*.csv', recursive=True))
gt_path_content = sorted(glob.glob(GROUND_TRUTH_PATH + '*.log', recursive=True))
checked_gt_path_content = sorted(glob.glob(CHECKED_GT + '*.log', recursive=True))


# All data that can be used must have their .log companion (in ground-truth)
ready_dataset = intersection_fname(data_path_content, gt_path_content)
# All checkeds that can be used must have their .log companion (in ground-truth)
checked_files = intersection_fname([f + ".log" for f in ready_dataset], checked_gt_path_content, print_missing=False)

# All data that can be used to evaluate the model
path_files_data = [DATA_PATH + f + ".csv" for f in ready_dataset]
path_files_ground_truth = [GROUND_TRUTH_PATH + f + ".log" for f in ready_dataset]

### Build ground_truth
#### Estimating delay between groundtruth and csv file (boundaries)

In [327]:
def read_longrun_log_file(longrunLogFile, all_action=None):
    """
    Args: 
        longrunLogFile : File log name (including path)
        all_action : actions used to train the model (needed to filter out actions not part of the training set)
    
    Return
        [[time, action],] : filtered and clean version of the content of a logfile
    """
    out = []
    with open(longrunLogFile, "r") as file:
        csv_reader = csv.reader(file, delimiter=',', skipinitialspace=True)
        next(csv_reader)
        for i, line in enumerate(csv_reader):
            if all_action is None or line[1] in all_action:
                out.append(line)
    return out
            
def read_longrun_log_files(longrunLogFiles, all_action=None):
    """
    Args: 
        longrunLogFiles : a list of files log names (including path)
        all_action : actions used to train the model (needed to filter out actions not part of the training set)
    
    Return
        dict[filename] = [[time, action],] : filtered and clean version of the content of multiple logfile
    """
    out = dict()
    for longrunLogFile in longrunLogFiles:
        out[extract_fname(longrunLogFile)] = read_longrun_log_file(longrunLogFile, all_action)
    return out

In [328]:

path_files_data_boundaries = [DATA_PATH + f + ".log" for f in checked_files]
path_files_ground_truth_boundaries = [GROUND_TRUTH_PATH + f + ".log" for f in checked_files]
path_files_checked_boundaries = [CHECKED_GT + f + ".log" for f in checked_files]

content_checked_boundaries = read_longrun_log_files(path_files_checked_boundaries, all_actions)
content_ground_truth_boundaries = read_longrun_log_files(path_files_ground_truth_boundaries, all_actions)

In [329]:
def record_diff_checked_gt(checkeds, gts, all_actions):
    """
    Aggregate and filter the recordings timing difference between checked and ground-truth accross all files 
    """
    recordings = []
    for checked, gt in zip(checkeds, gts):
        action = gt[1]
        if action is None:
            print("ERROR: Parsing failure")
            break
        if eval(checked[0]) is None or action not in all_actions:
            continue

        recordings.append(float(checked[0]) - float(gt[0]))  # add the difference between 
    return recordings


recordings = []

for capt in content_checked_boundaries:
    checkeds = content_checked_boundaries[capt]
    gts = content_ground_truth_boundaries[capt]
    recordings += record_diff_checked_gt(checkeds, gts, all_actions)
rec = np.array(recordings)

print("Discrepency between automate and manually checked gournd-truth for action launch time differece:\n")
print("   mean_diff = {}\n   max_diff={}\n   min_diff={}\n".format(np.mean(rec), np.max(rec), np.min(rec)))
margin_timing_difference = 0
upper = np.max(np.array(recordings)) + margin_timing_difference
lower =  np.min(np.array(recordings)) - margin_timing_difference
print(" adapting correct new bound with lower = {}, upper = {}".format(lower, upper))

Discrepency between automate and manually checked gournd-truth for action launch time differece:

   mean_diff = 5.078723404255315
   max_diff=16.5
   min_diff=0.39999999999997726

 adapting correct new bound with lower = 0.39999999999997726, upper = 16.5


In [330]:

def make_bound_for_timing_in_action(content_ground_truth, lower, upper):
    content_ground_truth_with_bound = dict()
    for gt in content_ground_truth:
        new_gt = []
        for i, action in content_ground_truth[gt]:
            lower_bound = float(i) + lower
            upper_bound = float(i) + upper
            new_gt.append((lower_bound, upper_bound, action))
        content_ground_truth_with_bound[gt] = new_gt
    return content_ground_truth_with_bound
bounded_gt = make_bound_for_timing_in_action(content_ground_truth_boundaries, lower, upper)

In [331]:
def find_critical_point(time_serie):
    # find Critcal points: where we have more than 2k data
    ts = pd.Series(data = time_serie['ys'], index = pd.to_timedelta(time_serie["xs"], 'sec'))

    # filter out packets with no payload length and (or not) the ones that contains < 26 bits
    ts = ts.map(abs)[ts != 0] #[ts > 26]

    def time_delta_to_float(td):
        if len(td) == 0:
            return None
        return float(str(td[0].seconds) +"." + str(td[0].microseconds))

    # Compute the moving average of 30 seconds head in data PayloadLength

    stw = ts[::-1].rolling("20s").sum()[::-1]
    stw = stw[stw > 200]  # filter out minimum 200B payload (banned app)


    stw = stw.resample('5s').apply(lambda x: x.index) # 5 seconds jump
    critical_points = stw.map(time_delta_to_float).dropna().values

In [332]:
content_ground_truth = read_longrun_log_files(path_files_ground_truth, all_actions)
bounded_gt = make_bound_for_timing_in_action(content_ground_truth, lower, upper)

<a id='stw'></a>
## Sliding Time Window
[table of content](#tc)

Adventage: does not need supervision for the data

Idea to improve: Take the noApp noAcction > 200KB and train a model to predict noise

Preprocessing: Remove
for each 5


### Helpers

## Data importation
<a id='stw'></a>

In [500]:
# longrunCaptFile = "./data/huawei/longrun/deterministic-15min/longrun_deterministic_20-04-03_02-42-49.csv"
# gt_f = "./data/huawei/longrun/deterministic-15min/ground-truth/longrun_deterministic_20-04-03_02-42-49.log"

correct = False

if not correct:

    longrunCaptFile = "./data/huawei/longrun/deterministic-15min/longrun_deterministic_20-04-03_02-59-25.csv"
    gt_f = "./data/huawei/longrun/deterministic-15min/ground-truth/longrun_deterministic_20-04-03_02-59-25.log"
else:
    longrunCaptFile = "./data/huawei/longrun/deterministic-15min/longrun_deterministic_20-04-03_02-42-49.csv"
    gt_f = "./data/huawei/longrun/deterministic-15min/ground-truth/longrun_deterministic_20-04-03_02-42-49.log"



gt = read_longrun_log_file(gt_f, all_actions)
columns = extract_columns(longrunCaptFile)
packets = read_file(longrunCaptFile, columns)
packets = packet_store_cleanup(packets)
time_serie = packets_to_timesize_tuples(packets)


# find Critcal points: where we have more than 2k data
ts = pd.Series(data = time_serie['ys'], index = pd.to_timedelta(time_serie["xs"], 'sec'))

# filter out packets with no payload length and (or not) the ones that contains < 26 bits
ts = ts.map(abs)[ts != 0] #[ts > 26]


def extract_indexes_in_groups(x):
    return x.index.tolist()

def time_delta_to_float(td):
    if len(td) == 0:
        return None
    return float(str(td[0].seconds) +"." + str(td[0].microseconds))

# Compute the moving average of 30 seconds head in data PayloadLength

stw = ts[::-1].rolling("20s").sum()[::-1]
stw = stw[stw > 200]  # filter out minimum 200B payload (banned app)


stw = stw.resample('5s').apply(extract_indexes_in_groups)  # 5 seconds jump


critical_points = stw.map(time_delta_to_float).dropna().values

In [501]:
critical_points

array([ 22.714694,  59.465249,  62.720255,  67.715268,  72.965279,
        80.465277,  83.104035,  87.96466 ,  97.714643, 108.214624,
       122.464598, 142.715186, 149.464558, 153.964565, 157.717074,
       163.714574, 167.768335, 174.214577, 177.917078, 184.71457 ,
       188.464562, 195.964549, 204.964533, 213.964516, 224.464496,
       229.715111, 232.974494, 239.464481, 243.214476, 249.215089,
       306.214995, 307.902501, 330.964968, 333.34974 , 387.964867,
       393.964247, 401.465484, 405.214226, 408.214845, 413.46422 ,
       418.572346, 423.964223, 446.464179, 450.214797, 470.464133,
       472.714765, 477.964128, 483.214118, 491.464726, 493.47231 ,
       497.717244, 515.467228, 517.715985, 574.717125, 579.214001,
       584.464618, 588.158379, 595.713989, 610.713958, 656.464493,
       658.566999, 672.963849, 682.71383 , 687.963818, 709.7144  ,
       713.463776, 740.464347, 743.715606, 762.214321, 765.21369 ,
       778.713662, 789.21364 , 799.713618, 826.713566, 828.232

In [510]:
longrunCaptFile = "./data/huawei/longrun/longrun_deterministic_20-04-03_04-23-24.csv"
gt_f = "./data/huawei/longrun/ground-truth/longrun_deterministic_20-04-03_04-23-24.log"


captures = dict()
for longrunCaptFile in data_path_content:
    
    columns = extract_columns(longrunCaptFile)
    packets = read_file(longrunCaptFile, columns)
    packets = packet_store_cleanup(packets)
    time_serie = packets_to_timesize_tuples(packets)
    captures[extract_fname(longrunCaptFile)] = time_serie

<a id='implementation_stw'></a>
## Implementation
[Table of Content](#tc)

In [535]:
predicted = dict()
for capture in captures:
    print(capture)
    predicted[capture] = predict(captures[capture])


longrun_deterministic_20-04-03_02-42-49
longrun_deterministic_20-04-03_02-59-25
longrun_deterministic_20-04-03_03-16-03
longrun_deterministic_20-04-03_03-32-47
longrun_deterministic_20-04-03_03-49-15
longrun_deterministic_20-04-03_04-06-24
longrun_deterministic_20-04-03_04-23-24
longrun_deterministic_20-04-03_04-39-20
longrun_deterministic_20-04-03_04-55-47
longrun_deterministic_20-04-03_05-12-52
longrun_deterministic_20-04-03_05-29-28
longrun_deterministic_20-04-03_05-46-07
longrun_deterministic_20-04-03_06-03-24
longrun_deterministic_20-04-03_06-20-26
longrun_deterministic_20-04-03_06-37-03
longrun_deterministic_20-04-03_06-53-52
longrun_deterministic_20-04-03_07-10-45
longrun_deterministic_20-04-03_07-27-29
longrun_deterministic_20-04-03_07-44-02
longrun_deterministic_20-04-03_08-00-50
longrun_deterministic_20-04-03_10-59-20
longrun_deterministic_20-04-03_11-16-04
longrun_deterministic_20-04-03_11-33-07
longrun_deterministic_20-04-03_11-49-30
longrun_deterministic_20-04-03_12-06-04


In [532]:
def find_critical_point(time_serie):
    # find Critcal points: where we have more than 2k data
    ts = pd.Series(data = time_serie['ys'], index = pd.to_timedelta(time_serie["xs"], 'sec'))

    # filter out packets with no payload length and (or not) the ones that contains < 26 bits
    ts = ts.map(abs)[ts != 0] #[ts > 26]
    
    
    def extract_indexes_in_groups(x):
        return x.index.tolist()
    
    def time_delta_to_float(td):
        if len(td) == 0:
            return None
        return float(str(td[0].seconds) +"." + str(td[0].microseconds))

    # Compute the moving average of 30 seconds head in data PayloadLength

    stw = ts[::-1].rolling("20s").sum()[::-1]
    stw = stw[stw > 200]  # filter out minimum 200B payload (banned app)


    stw = stw.resample('5s').apply(extract_indexes_in_groups) # 5 seconds jump
    
    
    critical_points = stw.map(time_delta_to_float).dropna().values
    return critical_points



def find_action_end(xs_capt, ys_capt, FILTER_LENGTH_LIMIT=46 ,INTER_TIMER_EVENT_CUTOFF=5):
    """
    return the potential end of the action that begins at indicce j
    xs_capt : time elements
    ys_capt : length elements
    FILTER_LENGTH_LIMIT : do not take length <= n
    INTER_TIMER_EVENT_CUTOFF ; Do not take into accout 
    
    return indices of the en
    """
    xs_no_zeros = [x  for y, x in zip(ys_capt, xs_capt) if abs(y) > FILTER_LENGTH_LIMIT]
    for i, x0 in enumerate(xs_no_zeros):
        if i + 1 == len(xs_no_zeros):
            return xs_capt[-1]  # reached the end
        x1 = xs_no_zeros[i + 1]
        inter_time = x1-x0
        if inter_time > INTER_TIMER_EVENT_CUTOFF:
            return x0
        
def find_x_indices(xs_capt, j, xs_end):
    for i, x in enumerate(xs_capt[j:]):
        if xs_end <= x:
            return i + j



def predict(time_serie):
    critical_points = find_critical_point(time_serie)


    predicted = []  # tuple list of 
    critical_points_i = 0
    xs_end = -1
    xs_capt = time_serie["xs"]
    ys_capt = time_serie["ys"]



    for i, _ in enumerate(xs_capt):


        current_xs = xs_capt[i]
        critical_point = critical_points[critical_points_i]
        if current_xs > critical_point and current_xs > xs_end:

            j = i-1   # take previous one since we are one step further

            xs_start = xs_capt[i]
            xs_end = find_action_end(xs_capt[i:], ys_capt[i:])
            end_indice = find_x_indices(xs_capt, j, xs_end)
            xy = dict()
            xy["xs"] = xs_capt[j:end_indice+1]
            xy["ys"] = ys_capt[j:end_indice+1]
            features_dict = extract_features(xy)
            features = list(features_dict.values())
            y = clf.predict(np.array(features).reshape(1,-1))
            predicted.append((xs_start, xs_end, y[0]))

            while critical_points[critical_points_i] < xs_end:
                critical_points_i +=1
                if critical_points_i == len(critical_points):
                    break




        if critical_points_i == len(critical_points) or xs_end == xs_capt[-1]:
            break
    return predicted


In [534]:
predict(captures["longrun_deterministic_20-04-03_11-16-04"])

[(42.611644875, 56.7222565, 'Outlook_open'),
 (58.536010375, 65.8197485, 'Translate_open'),
 (67.709129375, 144.62143025, 'SalatTime_open'),
 (148.3614295, 167.175752, 'Citymapper_open'),
 (167.857003375, 179.1219935, 'Telegram_force-stop'),
 (182.86074325, 251.90429075, 'SalatTime_open'),
 (252.856170125, 258.806162875, 'Shazam_open'),
 (296.111065625, 310.3641705, 'WashPost_force-stop'),
 (312.611674, 347.16036525, 'Meduza_open'),
 (349.116618875, 357.247222875, 'Spotify_force-stop'),
 (358.25909875, 417.887052875, 'FoursquareCityGuide_force-stop'),
 (429.611402375, 437.706411875, 'FoursquareCityGuide_food'),
 (437.70953825, 444.61452975, 'WashPost_force-stop'),
 (448.360778625, 450.82452425, 'FoursquareCityGuide_force-stop'),
 (452.850151125, 510.0643555, 'MapMyRun_open'),
 (515.109974125, 528.648683875, 'Citymapper_open'),
 (533.109933125, 547.36364225, 'Spotify_force-stop'),
 (548.4686445, 590.194188125, 'FoursquareCityGuide_open'),
 (593.109813375, 610.28479375, 'FoursquareCityGu

In [288]:
def overlaps(a, b):
    return not (b[0] > a[1] or a[0] > b[1])

def overlap_length(a,b):
    if not overlaps(a, b):
        return 0
    return  min(a[1], b[1]) - max(a[0], b[0])

def compute_eval_metric_v1(prediciton, ground_truth, print_details=True):
    """
    calculate accuarcy, precision and recall 
    Args:
        prediction : [(start, stop, action),] list of predicition with time boundaries
        ground_truth : [(start, stop, action),] list of ground_turh associated with the prediction
        
    Return (tp, fp, fn) True Positive, False Positive and False Negative
    """
    tp, fp, fn = 0, 0, 0
    i_pred, i_gt = 0, 0
    correct_pred, wrong_pred, missed_pred = [], [], []
    last_run = False

    while True:
        # Make sure indices are not overflowing
        i_pred_m = min(i_pred, len(prediciton) - 1)
        i_gt_m = min(i_gt, len(ground_truth) - 1)
        
        start_pred, stop_pred = prediciton[i_pred_m][0], prediciton[i_pred_m][1]
        action_pred = prediciton[i_pred_m][2]

        start_gt, stop_gt = ground_truth[i_gt_m][0], ground_truth[i_gt_m][1]
        action_gt = ground_truth[i_gt_m][2]
        
        if print_details:
            print("\nChecking : \npred: ", action_pred, "\n gt : ", action_gt)

        if overlaps((start_pred, stop_pred), (start_gt, stop_gt)) and action_gt == action_pred:
            i_pred += 1
            i_gt += 1
            tp += 1
            correct_pred.append(action_gt)
            if print_details:
                print(action_gt + " correct (tp + 1 = ", tp,")")

        
        elif stop_pred > stop_gt and i_gt != len(ground_truth):
            i_gt += 1
            fn += 1
            missed_pred.append(action_gt)
            if print_details:
                print(action_gt, " missed prediction (fn + 1= ", fn,")")
            
        elif stop_gt > stop_pred and i_pred != len(prediciton):
            i_pred += 1
            fp += 1
            wrong_pred.append(action_pred)
            if print_details:
                print(action_pred + " wrong prediction (fp + 1= ", fp, ")")
        else:
            print("HELP")
            sys.exit(1)

        if i_pred >= len(prediciton) - 1 and i_gt >= len(ground_truth) -1:
            break
        
    print(i_pred, i_gt)


    return correct_pred, wrong_pred, missed_pred


def compute_eval_metric_v2(prediciton, ground_truth, print_details=True):
    """
    calculate accuarcy, precision and recall 
    Args:
        prediction : [(start, stop, action),] list of predicition with time boundaries
        ground_truth : [(start, stop, action),] list of ground_turh associated with the prediction
        
    Return (tp, fp, fn) True Positive, False Positive and False Negative
    """
    tp, fp, fn = 0, 0, 0
    i_pred, i_gt = 0, 0
    correct_pred, wrong_pred, missed_gt, correct_gt = [], [], [], []
    last_run = False
    
    
    def add_wrong_pred(pred, fp):
        fp += 1
        wrong_pred.append(pred)
        if print_details:
            print(pred[2] + " wrong prediction (fp + 1= ", fp, ")")
        return fp
            
    def add_missed_gt(gt, fn):
        fn += 1
        missed_gt.append(gt)
        if print_details:
            print(gt[2], " missed prediction (fn + 1= ", fn,")")
        return fn
            
    def add_correct_pred(pred, tp):
        tp += 1
        correct_pred.append(pred)
        if print_details:
            print(pred[2] + " correct pref (tp + 1 = ", tp,")")
        return tp
    
    def add_correct_gt(gt):
        correct_gt.append(gt)
        if print_details:
            print(gt[2], " correct gt")
        return fn

        


    matched_pred = []
    for gt in ground_truth:
        start_gt, stop_gt, action_gt = gt
        
        match = []
        for pred in prediciton:
            start_pred, stop_pred, action_pred = pred
            
                
            # we found a match
            if overlaps((start_pred, stop_pred), (start_gt, stop_gt)) and action_gt == action_pred:
                match.append((pred, overlap_length((start_pred, stop_pred), (start_gt, stop_gt))))
        
        if len(match) == 0:
            fn = add_missed_gt(gt, fn)
            continue
        
        best_matchs = sorted(match,key=lambda item:-item[1])[0]
        for bm in best_matchs:
            if not bm in correct_pred:
                tp = add_correct_pred(bm, tp)
                break
        
    # compute wrong pred:
    for pred in prediciton:        
        if pred not in correct_pred:
            fp = add_wrong_pred(pred, fp)

    return correct_pred, wrong_pred, missed_gt


In [295]:

    
(correct_pred, wrong_pred, missed_gt) = compute_eval_metric_v2(predicted , bounded_gt['longrun_deterministic_20-04-03_02-42-49'], print_details=True)

tp, fp, fn = len(correct_pred), len(wrong_pred), len(missed_pred)

precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("precision = {}, recall = {}".format(precision, recall))

Telegram_open correct pref (tp + 1 =  1 )
Telegram_force-stop  missed prediction (fn + 1=  1 )
FitWorkout_open  missed prediction (fn + 1=  2 )
SalatTime_open correct pref (tp + 1 =  2 )
MapMyRun_open correct pref (tp + 1 =  3 )
SalatTime_open correct pref (tp + 1 =  4 )
Shazam_open correct pref (tp + 1 =  5 )
Outlook_open correct pref (tp + 1 =  6 )
Translate_open correct pref (tp + 1 =  7 )
Translate_force-stop  missed prediction (fn + 1=  3 )
MapMyRun_open correct pref (tp + 1 =  8 )
Running_open correct pref (tp + 1 =  9 )
Running_force-stop  missed prediction (fn + 1=  4 )
Lifesum_open correct pref (tp + 1 =  10 )
Lifesum_addWater  missed prediction (fn + 1=  5 )
Lifesum_force-stop  missed prediction (fn + 1=  6 )
Weather_open wrong prediction (fp + 1=  1 )
FindMyPhone_force-stop wrong prediction (fp + 1=  2 )
SalatTime_open wrong prediction (fp + 1=  3 )
Maps_force-stop wrong prediction (fp + 1=  4 )
precision = 0.7142857142857143, recall = 0.625


In [560]:
from time import sleep
all_wrong_pred = []
all_correct_pred = []
all_missed_gt = []
for capture in captures:
    print(capture)
    (correct_pred, wrong_pred, missed_gt) = compute_eval_metric_v2(predicted[capture] , bounded_gt[capture], print_details=False)
    all_wrong_pred += wrong_pred
    all_correct_pred += correct_pred
    all_missed_gt += missed_gt
    
    tp, fp, fn = len(correct_pred), len(wrong_pred), len(missed_gt)

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print("precision = {}, recall = {}".format(precision, recall))
    print()

    


longrun_deterministic_20-04-03_02-42-49
precision = 0.7142857142857143, recall = 0.625

longrun_deterministic_20-04-03_02-59-25
precision = 0.42105263157894735, recall = 0.47058823529411764

longrun_deterministic_20-04-03_03-16-03
precision = 0.6111111111111112, recall = 0.4583333333333333

longrun_deterministic_20-04-03_03-32-47
precision = 0.47619047619047616, recall = 0.5882352941176471

longrun_deterministic_20-04-03_03-49-15
precision = 0.4782608695652174, recall = 0.4782608695652174

longrun_deterministic_20-04-03_04-06-24
precision = 0.4090909090909091, recall = 0.4090909090909091

longrun_deterministic_20-04-03_04-23-24
precision = 0.030303030303030304, recall = 0.125

longrun_deterministic_20-04-03_04-39-20
precision = 0.35, recall = 0.3888888888888889

longrun_deterministic_20-04-03_04-55-47
precision = 0.43478260869565216, recall = 0.5

longrun_deterministic_20-04-03_05-12-52
precision = 0.5263157894736842, recall = 0.45454545454545453

longrun_deterministic_20-04-03_05-29-2



In [561]:

tp, fp, fn = len(all_correct_pred), len(all_wrong_pred), len(all_missed_gt)

precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("precision = {}, recall = {}".format(precision, recall))
print()

precision = 0.4223057644110276, recall = 0.4821173104434907



In [562]:
predicted["longrun_deterministic_20-04-03_06-20-26"]

[(58.9801365, 61.483892875, 'Krone_open'),
 (64.2288965, 84.88763575, 'Mobilis_open'),
 (92.72887925, 144.35251175, 'Krone_open'),
 (150.47875825, 228.634216375, 'Outlook_open'),
 (230.641095, 312.2802905, 'DiabetesM_open'),
 (313.980918, 398.90512225, 'FitBreathe_open'),
 (400.26387575, 486.676846625, 'PlayStore_open'),
 (490.228723375, 519.59054425, 'PlayStore_deterministicBrowse'),
 (540.478639, 589.251690875, 'Spotify_open'),
 (598.977931125, 626.732250875, 'Spotify_force-stop'),
 (628.9803805, 669.962190875, 'PlayStore_open'),
 (687.479033125, 745.414532625, 'Outlook_open'),
 (829.2287225, 865.433154, 'Endomondo_open'),
 (874.2281455, 953.72797525, 'Fit_open')]

In [563]:
bounded_gt["longrun_deterministic_20-04-03_06-20-26"]

[(54.199999999999974, 70.3, 'Krone_open'),
 (82.79999999999998, 98.9, 'Krone_force-stop'),
 (139.29999999999998, 155.4, 'Krone_open'),
 (167.99999999999997, 184.1, 'Krone_force-stop'),
 (223.29999999999998, 239.4, 'Outlook_open'),
 (307.09999999999997, 323.2, 'DiabetesM_open'),
 (390.59999999999997, 406.7, 'FitWorkout_open'),
 (473.7, 489.8, 'PlayStore_open'),
 (480.2, 496.3, 'PlayStore_deterministicBrowse'),
 (574.5, 590.6, 'Spotify_open'),
 (603.6, 619.7, 'Spotify_force-stop'),
 (657.9, 674.0, 'PlayStore_open'),
 (740.1, 756.2, 'Outlook_open'),
 (822.5, 838.6, 'Endomondo_open'),
 (829.0, 845.1, 'Endomondo_browseMap'),
 (863.1, 879.2, 'Endomondo_force-stop'),
 (918.9, 935.0, 'Fit_open')]

#### Evaluation metrics: 

To evaluate the system, we first defined the following quantity:

- TP: stands for True Positive. In our case, TP is defined as The number of correct prediction. The match
- FP: stands for False Positive. The number of wrong prediction (either because of the timing or because of the label)
- FN: stands for False Negative. The number of labels that was missed 

***Precision*** :
   The Precision can be computed by the following formula:
   $$\frac{TP}{TP + FP}$$
   
   This quantity represents the fraction of time a prediction is correct regardless of rather there is no associated action at that time or the action is not the one predicted.


 
***Recall***:
    The recall can be computed by the following formula:
    $$\frac{TP}{TP + FN}$$
    
   This quantity represents the fraction of time actions performed on the smartwatch are corrected classified. (fraction of time we have a correct match in time and in label)
    

In [409]:
tp + fp 

14

In [410]:
tp + fn

15

In [406]:
fp

5

In [405]:
tp

10

In [404]:
fn

4

In [346]:
precision

0.6666666666666666