In [None]:
import pandas as pd
import numpy as np
import seaborn as sb

from matplotlib import pyplot as plt

import distfit
import scipy

sb.set_theme(palette="rocket")

In [None]:
raw_df = pd.read_csv("PCS_TEST_DETERMINSTIC.csv")
raw_df.info()

## Simple inference
In this section, we will determine the distributions of each column individually.


In [None]:
def plot_distfit_result(
    fitter: distfit.distfit,
    num_top: int = 3,
    pdf_colour: str = "#999999",
    pdf_linewidth: int = 2,
    pdf_bar_multiplier: float = 1.0,

):
    '''
    pdf_color accepts hex strings only. (0xFFFFFF)
    '''

    pdf_props = {
        "color": pdf_colour,
        "linewidth": pdf_linewidth,
    }

    bar_props = {
        "color": manip_rgb(pdf_colour, pdf_bar_multiplier)
    }

    fig, ax = fitter.plot(
        chart="pdf",
        n_top=num_top,
        pdf_properties=pdf_props,
        bar_properties=bar_props,
    )

    return


def manip_rgb(input: str, amount: float) -> str:
    '''Input is a hash-prefixed RGB string: "#FFFFFF"'''

    input_filt = input[1:].ljust(6, "0")

    r = int(float(int(input_filt[0:2], 16)) * amount) % 255
    g = int(float(int(input_filt[2:4], 16)) * amount) % 255
    b = int(float(int(input_filt[4:6], 16)) * amount) % 255

    r_str = hex(r)[2:].ljust(2, "0")
    g_str = hex(g)[2:].ljust(2, "0")
    b_str = hex(b)[2:].ljust(2, "0")

    res = f"#{r_str}{g_str}{b_str}"

    return res

In [None]:
# deltas between arrival times
deltas = np.diff(raw_df["Arrival time (sec)"])

sb.histplot(deltas, kde=True)

fitter = distfit.distfit(distr="expon")
fit_results = fitter.fit_transform(deltas, verbose=False)

num_top = 3
fig, ax = fitter.plot(
    chart="pdf",
    n_top=num_top,
    pdf_properties={"color": "#BBBBBB", "linewidth": 5},
    bar_properties={"color": "#a9c1e8"}
)

fitter.plot_summary()


# plot_distfit_result(
#     fitter,
#     3,
#     "#140369",
#     3,
#     5
# )

# fig, ax = fitter.plot(chart="cdf", n_top=num_top)

In [None]:
# base stations used (should be uniform)
fig, ax = plt.figure(), plt.gca()
sb.histplot(raw_df["Base station "], discrete=True, kde=False, ax=ax)
ax.set_xticks([i for i in range(1, 21)])
plt.show()

# show the same plot but with an integer x-axis
# plt.xticks(np.arange(1, 21, 1))
# plt.show()

base_station_nums = set(raw_df["Base station "])
base_station_nums

station_dist = raw_df["Base station "].value_counts().sort_index().to_list()
# sb.barplot(station_dist)


In [None]:
starting_stations = raw_df["Base station "]

fitter = distfit.distfit()
fit_results = fitter.fit_transform(starting_stations, verbose=False)

plot_distfit_result(fitter)

fig, ax = fitter.plot(chart="pdf", n_top=5)

fit_results["model"]


In [None]:
# call duration

call_dur = raw_df["Call duration (sec)"]

sb.histplot(call_dur, kde=True)
plt.show()

fitter = distfit.distfit(distr="popular")
# fit_results = fitter.fit_transform(call_dur, verbose=False)

# print(fitter.model)

# scipy.stats.t.fit(call_dur)

num_top = 2
# fig, ax = fitter.plot(chart="pdf", n_top=num_top)
# fit_results["model"]
# fig, ax = fitter.plot(chart="cdf", n_top=num_top)

# plot an exponential distribution with lambda 99.9
x = np.linspace(0, 1200, 1000)
y = scipy.stats.expon.pdf(x, scale=99.9)
plt.plot(x, y)
plt.show()



In [None]:
# vehicle velocities
# add an overlapping plot of a normal distribution of mean 120.07 and std 9.01
x = np.linspace(90, 150, 1000)
y = scipy.stats.norm.pdf(x, 120.07, 9.01)
# plt.plot(x, y, color="red")

sb.histplot(raw_df["velocity (km/h)"])
# plt.show()
sb.lineplot(x=x, y=y, color="red")
# plt.show()

In [None]:
veh_velocities = raw_df["velocity (km/h)"]

fitter = distfit.distfit(distr="norm")
fit_results = fitter.fit_transform(veh_velocities, verbose=False)

plot_distfit_result(fitter)

fit_results["model"]

# num_top = 3
# fig, ax = fitter.plot(
#     chart="pdf",
#     n_top=num_top,
#     pdf_properties={"color": "#BBBBBB", "linewidth": 5},
#     bar_properties={"color": "#a9c1e8"}
# )

# fitter.plot_summary()


In [None]:
# call duration
call_dur = raw_df["Call duration (sec)"]

sb.histplot(call_dur, kde=True)
plt.show()



# Comparing measurements to simulated call initiation events


In [None]:
sim_df = pd.read_csv("call_init_gen.csv")
sim_df.info()

In [None]:
# call arrival intervals

call_init_times = sim_df["time"].tolist()
call_intervals = np.diff(call_init_times)

sb.histplot(call_intervals)

In [None]:
# call durations
call_durations = sim_df["remaining_time"].tolist()
sb.histplot(call_durations)

In [None]:
veh_speeds = sim_df["velocity"]
sb.histplot(veh_speeds)

In [None]:
veh_pos = sim_df["position"]
sb.histplot(veh_pos)
plt.show()

veh_dir = sim_df["direction"].value_counts()
sb.barplot(veh_dir)