In [None]:
import pandas as pd
import numpy as np
import seaborn as sb

from matplotlib import pyplot as plt

import distfit
import scipy

sb.set_theme(palette="rocket")

In [None]:
raw_df = pd.read_csv("PCS_TEST_DETERMINSTIC.csv")
raw_df.info()

## Simple inference
In this section, we will determine the distributions of each column individually.


In [None]:
def plot_distfit_result(
    fitter: distfit.distfit,
    title: str = "",
    num_top: int = 3,
    line_colour: str = "#BBBBBB",
    pdf_linewidth: int = 2,
    pdf_bar_multiplier: float = 0.5,

) -> plt.Figure:
    '''
    pdf_color accepts hex strings only. (0xFFFFFF)
    '''

    bar_props = {
        "color": line_colour
    }

    pdf_props = {
        "color": manip_rgb(line_colour, pdf_bar_multiplier),
        "linewidth": pdf_linewidth,
    }

    fig, ax = fitter.plot(
        chart="pdf",
        title=title,
        n_top=num_top,
        pdf_properties=pdf_props,
        bar_properties=bar_props,
        figsize=(12, 8),
    )

    return fig


def manip_rgb(input: str, amount: float) -> str:
    '''Input is a hash-prefixed RGB string: "#FFFFFF"'''

    input_filt = input[1:].ljust(6, "0")

    r = int(float(int(input_filt[0:2], 16)) * amount) % 255
    g = int(float(int(input_filt[2:4], 16)) * amount) % 255
    b = int(float(int(input_filt[4:6], 16)) * amount) % 255

    r_str = hex(r)[2:].ljust(2, "0")
    g_str = hex(g)[2:].ljust(2, "0")
    b_str = hex(b)[2:].ljust(2, "0")

    res = f"#{r_str}{g_str}{b_str}"

    return res

In [None]:
# inter-arrival times
deltas = np.diff(raw_df["Arrival time (sec)"])

fitter = distfit.distfit(distr="expon")
fit_results = fitter.fit_transform(deltas, verbose=False)
f = plot_distfit_result(fitter, "inter-arrival time")
f.savefig("inter-arrival-time.svg")
fit_results["model"]


In [None]:
# base stations used (should be uniform)
fig, ax = plt.figure(), plt.gca()
sb.histplot(raw_df["Base station "], discrete=True, kde=False, ax=ax)
ax.set_xticks([i for i in range(1, 21)])
plt.show()

# show the same plot but with an integer x-axis
# plt.xticks(np.arange(1, 21, 1))
# plt.show()

base_station_nums = set(raw_df["Base station "])
base_station_nums

station_dist = raw_df["Base station "].value_counts()
sb.barplot(station_dist)


In [None]:
# base station distribution

starting_stations = raw_df["Base station "]

fitter = distfit.distfit(distr="uniform", bins=20)
fit_results = fitter.fit_transform(starting_stations, verbose=False)

f = plot_distfit_result(fitter, "base station")
f.savefig("base-station.svg")
# fig, ax = fitter.plot(chart="pdf", n_top=5)

fit_results["model"]


In [None]:
# call duration dist

call_dur = raw_df["Call duration (sec)"]

fitter = distfit.distfit(distr="expon")
fit_results = fitter.fit_transform(call_dur, verbose=False)
f = plot_distfit_result(fitter, "call duration")
f.savefig("call-duration.svg")
fit_results["model"]



In [None]:
# checking minimum call duration
call_durations = raw_df["Call duration (sec)"].tolist()
min(call_durations)

In [None]:
veh_velocities = raw_df["velocity (km/h)"]

fitter = distfit.distfit(distr="norm")
fit_results = fitter.fit_transform(veh_velocities, verbose=False)

f = plot_distfit_result(fitter, "vehicle velocity")
f.savefig("vehicle-velocity.svg")
fit_results["model"]



# Comparing measurements to simulated call initiation events


In [None]:
sim_df = pd.read_csv("call_init_gen.csv")
sim_df.info()

In [None]:
# call arrival intervals

call_init_times = sim_df["time"].tolist()
call_intervals = np.diff(call_init_times)

fitter = distfit.distfit(distr="expon", bins=200)
fit_results = fitter.fit_transform(call_intervals, verbose=False)
f = plot_distfit_result(fitter, "simulated inter-arrival time")
f.savefig("simulated-inter-arrival-time.svg")
fit_results["model"]

In [None]:
# base station
base_stations = sim_df["station"]
base_stations[:100]
fitter = distfit.distfit(distr="uniform", bins=20)
fit_results = fitter.fit_transform(base_stations, verbose=False)
f = plot_distfit_result(fitter, "simulated base station")
f.savefig("simulated-base-station.svg")
fit_results["model"]


In [None]:
# call durations
call_durations = sim_df["remaining_time"]

fitter = distfit.distfit(distr="expon")
fit_results = fitter.fit_transform(call_durations, verbose=False)
f = plot_distfit_result(fitter, "simulated call duration")
f.savefig("simulated-call-duration.svg")
fit_results["model"]


# print(f"min call duration: {min(call_durations)}")
# sb.histplot(call_durations)

In [None]:
# vehicle velocity

veh_speeds = sim_df["velocity"]
fitter = distfit.distfit(distr="norm")
fit_results = fitter.fit_transform(veh_speeds, verbose=False)
f = plot_distfit_result(fitter, "simulated vehicle velocity")
f.savefig("simulated-vehicle-velocity.svg")
fit_results["model"]

# veh_speeds = sim_df["velocity"]
# sb.histplot(veh_speeds)

In [None]:
plt.figure(figsize=(9,6))
veh_pos = sim_df["position"]
sb.histplot(veh_pos, color="black")
plt.title("simulated relative vehicle position")
plt.savefig("simulated-vehicle-position.svg")
plt.show()

plt.figure(figsize=(9,6))
veh_dir = sim_df["direction"].value_counts()
sb.barplot(veh_dir, color="gray")
plt.title("simulated vehicle direction")
plt.savefig("simulated-vehicle-direction.svg")
plt.show()

cell_tower = sim_df["station"].value_counts()
sb.barplot(cell_tower, color="gray")
plt.show()