In [1]:
%pip install psycopg2
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [14]:
import psycopg2
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd

%matplotlib inline

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

data_dir_path = "data"
if not os.path.exists(data_dir_path):
    os.makedirs(data_dir_path)

# Connect to DB

In [3]:
load_dotenv()
conn = psycopg2.connect(
    host=os.getenv("DB_HOST"),
    database=os.getenv("DB_NAME"),
    user=os.getenv("DB_USER"),
    password=os.getenv("DB_PASSWORD"),
    port=os.getenv("DB_PORT"),
)
cur = conn.cursor()

# Wifi Stats Data Preprocessing

In [4]:
query = "SELECT * FROM public.wifistats"
cur.execute(query)
data = cur.fetchall()

In [5]:
wifi_df = pd.DataFrame(
    data,
    columns=[
        "label",
        "time",
        "ping_str",
        "signal (dBm)",
        "tx_bitrate_str",
        "rx_bitrate_str",
        "connected_time_str",
        "packet_loss",
    ],
)

wifi_df["connected_time (s)"] = (
    wifi_df["connected_time_str"].str.split(" ", expand=True)[0].astype(float)
)
wifi_df["tx_bitrate (MBit/s)"] = (
    wifi_df["tx_bitrate_str"].str.split(" ", expand=True)[0].astype(float)
)
wifi_df["rx_bitrate (MBit/s)"] = (
    wifi_df["rx_bitrate_str"].str.split(" ", expand=True)[0].astype(float)
)

pattern = r"min/avg/max/mdev = ([\d\.]+)/([\d\.]+)/([\d\.]+)/([\d\.]+) ms"
extracted_values = wifi_df["ping_str"].str.extract(pattern).astype(float)
extracted_values.columns = [
    "min_ping_rtt (ms)",
    "avg_ping_rtt (ms)",
    "max_ping_rtt (ms)",
    "mdev_ping_rtt (ms)",
]

wifi_df = pd.concat([wifi_df, extracted_values], axis=1)
wifi_df = wifi_df.drop(
    ["connected_time_str", "tx_bitrate_str", "rx_bitrate_str", "ping_str"], axis=1
)

wifi_df.to_csv("./data/wifistats.csv", index=False)

display(wifi_df.head())

Unnamed: 0,label,time,signal (dBm),packet_loss,connected_time (s),tx_bitrate (MBit/s),rx_bitrate (MBit/s),min_ping_rtt (ms),avg_ping_rtt (ms),max_ping_rtt (ms),mdev_ping_rtt (ms)
0,raspi-e4:5f:01:72:a4:93,2023-10-27 17:30:59.941150,-61.0,0.0,1483.0,180.0,180.0,3.151,4.908,5.733,0.97
1,raspi-e4:5f:01:ac:ed:5b,2023-10-27 17:30:59.941707,-59.0,0.0,19319.0,65.0,65.0,3.43,4.66,5.737,0.942
2,raspi-e4:5f:01:a0:21:49,2023-10-27 17:30:59.942179,-46.0,0.0,66621.0,72.2,72.2,3.602,3.948,4.238,0.224
3,raspi-e4:5f:01:a0:4b:01,2023-10-27 17:30:59.942646,-53.0,0.0,19409.0,72.2,58.5,3.984,6.634,15.219,4.311
4,raspi-e4:5f:01:75:54:ec,2023-10-27 17:30:59.943151,-54.0,0.0,19473.0,72.2,58.5,3.081,3.801,6.149,1.178


# Device Data Preprocessing

In [6]:
query = "SELECT * FROM public.devices"
cur.execute(query)
data = cur.fetchall()

In [None]:
devices_df = pd.DataFrame(
    data,
    columns=[
        "label",
        "ethernet_mac",
        "wlan_mac",
        "location",
        "tags",
        "comment",
        "uptime",
        "wireless_bytes",
        "wired_bytes",
        "last_seen",
    ],
)

devices_df.to_csv("./data/devices.csv", index=False)

display(devices_df.head())

# Extra Info Data Preprocessing

In [8]:
query = "SELECT * FROM public.extra_info"
cur.execute(query)
data = cur.fetchall()

In [9]:
extra_info_df = pd.DataFrame(
    data,
    columns=[
        "label",
        "comment",
        "contact_person",
        "latitude",
        "longitude",
    ],
)

extra_info_df["latitude"].replace("None", np.nan, inplace=True)
extra_info_df["longitude"].replace("None", np.nan, inplace=True)
extra_info_df = extra_info_df.dropna(subset=["latitude", "longitude"]).reset_index(
    drop=True
)
extra_info_df = extra_info_df.drop(["comment", "contact_person"], axis=1)

extra_info_df.to_csv("./data/extra_info.csv", index=False)

display(extra_info_df.head())

Unnamed: 0,label,latitude,longitude
0,raspi-e4:5f:01:75:6b:2c,34.41773457866324,-119.8546902810392
1,raspi-e4:5f:01:8e:27:aa,34.40957409582008,-119.85180308648168
2,raspi-e4:5f:01:a7:b2:7e,34.409548306773345,-119.85153880818854
3,raspi-e4:5f:01:75:54:04,34.418302607166105,-119.85669070847874
4,raspi-e4:5f:01:9b:84:c4,34.408679667265304,-119.85184124971644


# Close DB connection

In [10]:
cur.close()
conn.close()