In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import glob
import numpy as np
from matplotlib.colors import ListedColormap
from sklearn import neighbors, model_selection, metrics
from sklearn.inspection import DecisionBoundaryDisplay

In [83]:
# Path to folder containing the CSVs
path = "./data/traces/"
# Dataframe of the devices
devices = pd.read_csv("./data/list_of_devices.csv")

def read_data():
    # List of all CSV files
    csv_files = glob.glob(path + "*.csv")

    # Store dataframes into a list
    df_list = []

    # Read CSVs into dataframe
    for file in csv_files:
        df = pd.read_csv(file)
        df_list.append(df)

    # Concatenate into one dataframe
    combined_df = pd.concat(df_list, ignore_index=True)

    return combined_df

data =  read_data()

KeyboardInterrupt: 

In [75]:
def attach_window_id(timestamped_frame: pd.DataFrame) -> list[pd.DataFrame]:
    """Slice the dataframe into 1-second intervals"""

    # Convert Int64Index into datetime object
    timestamped_frame['timestamp'] = pd.to_datetime(timestamped_frame['timestamp'])

    # Index the frame with the 'timestamp' column
    timestamped_frame.set_index('timestamp', inplace=True)

    #Slicing the frame into one-second intervals by creating a new column 'window_id' representing the 1-second window
    timestamped_frame['window_id'] = (timestamped_frame.index - timestamped_frame.index[0]).total_seconds().astype(int)

    return timestamped_frame

windowed_frame = attach_window_id(data)

Slicing done!
CPU times: total: 3.34 s
Wall time: 3.63 s


In [82]:
def mode_mean_med(traffic_frame: pd.DataFrame):
    """Calculate the mode, mean, and median of a window"""

    # Keep only the mac address name and package size
    df = traffic_frame.loc[:, ['window_id' ,'eth_src', 'packet_size']]

    # Group the dataframe by window_id and mac adress & Calculate the statistical measures
    grouped = df.groupby(['window_id', 'eth_src']).agg({'packet_size': ['mean', 'median', lambda x: x.mode().iat[0]]})

    # Reset index to make window_id a column again
    grouped = grouped.reset_index()

    # Save the results
    grouped.to_csv('stats.cvs', index=False)

    return grouped

result = mode_mean_med(windowed_frame)


KeyboardInterrupt: 

In [None]:
def mean_sd_sum(traffic_frame: pd.DataFrame):
    """Calculate the mode, mean, and median of a window"""

    # Keep only the mac address name and package size
    df = traffic_frame.loc[:, ['window_id' ,'eth_src', 'packet_size']]

    # Group the dataframe by window_id and mac adress & Calculate the statistical measures used for training
    grouped = df.groupby(['window_id', 'eth_src']).agg({'packet_size': ['mean', 'std', 'sum']})

    # Reset index to make window_id a column again
    grouped = grouped.reset_index()

    # Save the results
    grouped.to_csv('train_stats.cvs', index=False)

    return grouped

In [81]:
%%time

def mode_mean_med_without_window(df: pd.DataFrame, devices: pd.DataFrame):
    """Calculate the mode, mean, and median of a window"""

    # Keep only the device name and mac address
    device_names = devices.loc[:, ['device_name', 'eth_src']]

    # Keep only the mac address name and package size
    df_size = df.loc[:, ['eth_src', 'packet_size']]

    # Merge the stats with the devices names
    merge_df = device_names.merge(df_size, on='eth_src')

    # Group the merged frame and calculate the stats
    stats = merge_df.groupby(['device_name']).agg({'packet_size': ['mean', 'median', lambda x: x.mode().iat[0]]})

    # Save the results
    stats.to_csv('stats_no_window.cvs', index=False)

    print(stats)

    return stats

devices = pd.read_csv("./data/list_of_devices.csv")
result_no_window = mode_mean_med_without_window(data, devices)


                                    packet_size                   
                                           mean  median <lambda_0>
device_name                                                       
Amazon Echo                          118.734701    75.0         66
Android Phone                        167.706915    66.0         66
Belkin Wemo switch                   415.264227   118.0         66
Belkin wemo motion sensor            113.394691    66.0         66
Blipcare Blood Pressure meter        108.393701    59.0         54
Dropcam                              206.533368   156.0        156
HP Printer                           168.309073    60.0        140
IPhone                               109.954947    66.0         54
Insteon Camera                       104.441343    90.0        102
Laptop                               111.325552    54.0         54
Light Bulbs LiFX Smart Bulb           96.446889    92.0        123
MacBook                              128.950355    66.0       

In [78]:
# n_neighbors = 5
#
# for metric in mmm_frame.columns[2:5]:
#     X = mmm_frame[metric].values
#     y = mmm_frame.iloc[:, -1].values
#     X = X.reshape(-1, 1)
#
#     X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25)
#
#     for weights in ["uniform", "distance"]:
#         # we create an instance of Neighbours Classifier and fit the data.
#         clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
#         clf.fit(X_train, y_train)
#
#         accuracy = metrics.accuracy_score(y_test, clf.predict(X_test))
#         print("Accuracy of {} with {}: {}".format(metric, weights, accuracy))