In [None]:
import os
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import numpy as np
import seaborn as sns

import network_maps
import preprocess_mikrotik_starlink

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
plt.rcParams["figure.dpi"] = 200
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
fig_size = (32, 8)

# Make Matplotlib's automatic date tick formatting show time
mpl.rcParams['date.autoformatter.hour'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.minute'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.microsecond'] = '%H:%M:%S.%f'  # optional

In [None]:
data_dir = 'data/2024-11-07-mikrotik/'

list_of_files = os.listdir(data_dir)
list_of_files.sort()
num_files = len(list_of_files)
print('Number of files: {}'.format(num_files))

df_list = []
for file in list_of_files:
    # Skipping the files we're not using
    if file[-5:] != ".gzip": # .gzip
        continue
    temp_df = pd.read_parquet(data_dir+file)
    df_list.append(temp_df)
df = pd.concat(df_list)

# Infer objects, then convert dtypes
df = df.infer_objects().convert_dtypes()

for column in df.columns:
        try:
            df[column] = pd.to_numeric(df[column])
        except (ValueError, TypeError):
            pass  # Skip columns that cannot be convertedmq

# Rename timestamp from Python and keep it for future use
# It is unreliable if a lot of messages come at the same time due to congestion
#df['timestamp_python'] = df.pop('@timestamp')

# It is better to rely on timestamps from the router rather than ElasticSearch
df['timestamp_router'] = pd.to_datetime(df['date'] + ' ' +  df['time'])
df.drop(columns=['date', 'time'], inplace=True)
df = df.sort_values(by=['timestamp_router'])
df = df.set_index('timestamp_router', drop=False)
#df = df.reset_index()

# Convert Data Class into integer mapping
dataclass_mapping = {'': 0, 'LTE': 1, '5G NSA': 2, '5G SA': 3}
df['lDataClassInt'] = df['lDataClass'].map(dataclass_mapping)

# Convert modulation into fixed bits per hz mapping
modulation_mapping = {'': 0, 'qpsk': 2, '16qam': 4, '64qam': 6, '256qam': 8}
df['lDlModulationInt'] = df['lDlModulation'].map(modulation_mapping)
df['lNrDlModulationInt'] = df['lNrDlModulation'].map(modulation_mapping)

# Fix wrong scaling on Rsrq and NrRsrq
# If the value is -12dB, it is shown as -120
df['lRsrq'] = df['lRsrq'] / 10
df['lNrRsrq'] = df['lNrRsrq'] / 10

# Create handover events
df['handover_CurrentCellid'] = df['lCurrentCellid'].diff().ne(0).astype(float)

print('df before')
df.info(verbose=True, show_counts=True, memory_usage='deep')

# Compress float64/Float64 to float32 and int64/Int64 to int32
dtype_mapping = {col: 'float32' for col in df.select_dtypes(include=['float64', 'Float64']).columns}
dtype_mapping.update({col: 'int32' for col in df.select_dtypes(include=['int64', 'Int64']).columns})

df = df.astype(dtype_mapping)

print('df after')
df.info(verbose=True, show_counts=True, memory_usage='deep')

#df.info()

In [None]:
def get_PrimaryBandMHzNumber_v3(df):
    # Extract 'PrimaryBand' and 'PrimaryBandMHz' using regex
    df[['PrimaryBandName', 'PrimaryBandMHz']] = df['lPrimaryBand'].str.extract(r'(\S+)@(\d+)Mhz')
    
    # Convert 'PrimaryBandMHz' to integer, handling NaN values
    df['PrimaryBandMHz'] = df['PrimaryBandMHz'].astype('Int64')  # Nullable integer type
    
    # Drop the original column
    df.drop(columns=['lPrimaryBand'], inplace=True)
    
    return df


In [None]:
df = get_PrimaryBandMHzNumber_v3(df)

In [None]:
df['speed'] = pd.to_numeric(df['speed'].str.split().str[0])

In [None]:
# maybe plot
#df['speed'].describe()

In [None]:
df = df.between_time('11:30', '17:00')
# start_date = '2025-01-22 09:00:00'
# end_date = '2025-01-22 11:30:00'
# mask = (df['timestamp_router'] > start_date) & (df['timestamp_router'] <= end_date)
# df = df.loc[mask] 

In [None]:
df_D2 = df.query("identity == '5G-D2-WAVELAB'")
df_DTAG = df.query("identity == '5G-DTAG-WAVELAB'")
df_8388 = df.query("identity == 'CAU-8388'")
df_4329 = df.query("identity == 'CAU-4329'")
df_0C = df.query("identity == 'CAU-0C'")

In [None]:
# Get unique CELL IDs in order of appearance
unique_cells = df_D2['lCurrentCellid'].unique()

# Create mapping: big ID -> 1..N
cell_mapping = {cid: i for i, cid in enumerate(unique_cells, start=1)}

# Apply mapping to create a new column
df_D2['cell_id_num'] = df_D2['lCurrentCellid'].map(cell_mapping)

In [None]:
len(df_D2['lCurrentCellid'].unique())

In [None]:
df_D2['lCurrentCellid'].unique()

In [None]:
df_D2['lCurrentCellid'].value_counts()

In [None]:
df_D2['cell_id_num'].plot(figsize=fig_size)

In [None]:
fig, ax = plt.subplots(figsize=fig_size)

# Boolean mask where a handover happened
mask = df_D2['handover_CurrentCellid'] == 1

# x positions = timestamps where value is 1
x_events = df_D2.index[mask]

# Draw vertical lines from y=0 to y=1
ax.vlines(x=x_events, ymin=0, ymax=1, linewidth=1, alpha=0.8)

ax.set_ylim(0, 1)
ax.set_yticks([])  # hide y-axis since it's just "event/no event"
ax.set_title('Handovers over time')
ax.set_xlabel('Time')
ax.grid(True, axis='x', linestyle='--', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
df_D2['PrimaryBandMHz'].plot(figsize=fig_size)

In [None]:
fig, ax = plt.subplots(figsize=(12, 3))

df_D2['lDataClassInt'].rename('Vodafone-DataClassInt').plot(ax=ax)

ax.set_title('DataClassInt over time')
ax.set_ylabel('DataClassInt')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
df_D2[['lCqi', 'lSinr', 'lRsrp', 'lRsrq']].describe()

In [None]:
fig, ax = plt.subplots(figsize=(16, 4))

df_D2['lCqi'].rename('D2-CQI').plot(ax=ax)#, ylim=(0, 1000))
df_D2['lSinr'].rename('D2-SINR').plot(ax=ax)#, ylim=(0, 1000))
df_D2['lRsrp'].rename('D2-RSRP').plot(ax=ax)#, ylim=(0, 1000))
df_D2['lRsrq'].rename('D2-RSRQ').plot(ax=ax)#, ylim=(0, 1000))
df_D2['ltxbitspersecond'].rename('D2-TX-Rate').div(1024*1024).plot(ax=ax)#, ylim=(0, 1000))

ax.set_title('Metrics over time')
#ax.set_ylabel('RTT [ms]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(40, 10))

df_D2['speed'].rename('D2-GPS-speed').plot(ax=ax, )
df_DTAG['speed'].rename('DTAG-GPS-speed').plot(ax=ax, )

ax.set_title('GPS Speed over time')
ax.set_ylabel('GPS Speed (km/h)')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(40, 10))

df_D2['ltxbitspersecond'].div(1024*1024).rename('D2-Tx').plot(ax=ax, )
df_DTAG['ltxbitspersecond'].div(1024*1024).rename('DTAG-Tx').plot(ax=ax, )

ax.set_title('Tx Rate over time')
ax.set_ylabel('Tx Rate [Mbps]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()


In [None]:
FIG_SIZE=(16, 4)

In [None]:
df_starlink = pd.read_csv('data/2024-11-07-webrtc/starlink_interpolated.csv')
df_starlink['timestamp'] = pd.to_datetime(df_starlink['timestamp'])
df_starlink.set_index('timestamp', inplace=True, drop=False)
df_starlink.info()

In [None]:
df_vodafone = pd.read_csv('data/2024-11-07-webrtc/vodafone_interpolated.csv')
df_vodafone['timestamp'] = pd.to_datetime(df_vodafone['timestamp'])
df_vodafone.set_index('timestamp', inplace=True, drop=False)
df_vodafone.info()

In [None]:
df_starlink[['rtt_ms', 'jitter_ms']].describe()

In [None]:
df_vodafone[['rtt_ms', 'jitter_ms']].describe()

In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

df_starlink['rtt_ms'].rename('Starlink RTT').plot(ax=ax, ylim=(0, 1000))
df_vodafone['rtt_ms'].rename('Cellular RTT').plot(ax=ax, ylim=(0, 1000))

ax.set_title('RTT over time')
ax.set_ylabel('RTT [ms]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()

# # show HH:MM:SS on the x-axis
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
# fig.autofmt_xdate()  # optional: rotates labels nicely

plt.tight_layout()
plt.show()

In [None]:
x1 = df_starlink['rtt_ms'].dropna().to_numpy()
x2 = df_vodafone['rtt_ms'].dropna().to_numpy()
x_all = np.concatenate([x1, x2])

bin_width = 1.0
bins = np.arange(x_all.min(), x_all.max() + bin_width, bin_width)

fig, ax = plt.subplots(figsize=(4,3), dpi=200)

# hist returns (counts, bin_edges, patches)
_, _, p1 = ax.hist(x1, bins=bins, alpha=0.5, label='Starlink', density=False)
_, _, p2 = ax.hist(x2, bins=bins, alpha=0.5, label='Cellular', density=False)

# use the same colors as the bars
c1 = p1[0].get_facecolor()  # RGBA
c2 = p2[0].get_facecolor()

m1 = np.median(x1)
m2 = np.median(x2)

ax.axvline(m1, color=c1, alpha=1.0, linestyle='--', linewidth=2, label=f'Starlink median')
ax.axvline(m2, color=c2, alpha=1.0, linestyle='--',  linewidth=2, label=f'Cellular median')

# me1 = np.mean(x1)
# me2 = np.mean(x2)

# ax.axvline(me1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'Starlink mean')
# ax.axvline(me2, color=c2, alpha=1.0, linestyle=':',  linewidth=2, label=f'Cellular mean')

p1 = np.percentile(x1, 95)
p2 = np.percentile(x2, 95)

ax.axvline(p1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'Starlink p95')
ax.axvline(p2, color=c2, alpha=1.0, linestyle=':', linewidth=2, label=f'Cellular p95')

ax.set_xlim(50, 150)
ax.set_xlabel('RTT (ms)')
ax.set_ylabel('Count')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend(loc='upper right')

plt.tight_layout()
plt.savefig("plots/WebRTC-RTT-histogram-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-RTT-histogram-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(4, 3), dpi=200)

# Prepare data (drop NaNs)
starlink = df_starlink['rtt_ms'].dropna().values
vodafone = df_vodafone['rtt_ms'].dropna().values

# Sort values
starlink_sorted = np.sort(starlink)
vodafone_sorted = np.sort(vodafone)

# ECDF values
starlink_ecdf = np.arange(1, len(starlink_sorted) + 1) / len(starlink_sorted)
vodafone_ecdf = np.arange(1, len(vodafone_sorted) + 1) / len(vodafone_sorted)

# Plot ECDFs (capture lines to reuse colors)
(l1,) = ax.step(starlink_sorted, starlink_ecdf, where='post', label='Starlink')
(l2,) = ax.step(vodafone_sorted, vodafone_ecdf, where='post', label='Cellular')

c1, c2 = l1.get_color(), l2.get_color()

mean_starlink = np.mean(starlink)
mean_vodafone = np.mean(vodafone)
print("Starlink RTT mean:", mean_starlink)
print("Cellular RTT mean:", mean_vodafone)

# --- stats (median + p95) ---
s_med, s_p90 = np.percentile(starlink, [50, 90])
v_med, v_p90 = np.percentile(vodafone, [50, 90])
print("Starlink RTT median:", s_med)
print("Cellular RTT median:", v_med)
print("Starlink RTT p90:", s_p90)
print("Cellular RTT p90:", v_p90)
print("Ratio of p90 (Cellular/Starlink):", v_p90 / s_p90)

s_p95 = np.percentile(starlink, 95)
v_p95 = np.percentile(vodafone, 95)
print("Starlink RTT p95:", s_p95)
print("Cellular RTT p95:", v_p95)
print("Ratio of p95 (Cellular/Starlink):", v_p95 / s_p95)
ax.axvline(s_med, color=c1, linestyle='--', linewidth=1.6, alpha=0.9,
           label=f'median')
ax.axvline(v_med, color=c2, linestyle='--', linewidth=1.6, alpha=0.9,
           label=f'median')

ax.axvline(s_p90, color=c1, linestyle=':', linewidth=1.6, alpha=0.9,
           label=f'p90')
ax.axvline(v_p90, color=c2, linestyle=':', linewidth=1.6, alpha=0.9,
           label=f'p90')
# ----------------------------

x_all = np.concatenate([starlink, vodafone])

lo, hi = np.percentile(x_all, [0.5, 95])   # try [0, 99] or [1, 99] too
pad = 0.02 * (hi - lo)

ax.set_xlim(max(0, lo - pad), hi + pad)      # clamp at 0 for RTT/jitter
#ax.set_xlim(50, 250)
ax.set_xlabel('RTT [ms]')
ax.set_ylabel('Cumulative probability')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend(loc='best')
plt.tight_layout()
plt.savefig("plots/WebRTC-RTT-ecdf-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-RTT-ecdf-comparison.png", bbox_inches="tight")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Prepare data (drop NaNs)
starlink_tx = df_starlink['rtt_ms'].dropna()
vodafone_tx = df_vodafone['rtt_ms'].dropna()

fig, ax = plt.subplots()

# Violin plot
data = [starlink_tx, vodafone_tx]
parts = ax.violinplot(
    data,
    positions=[1, 2],
    showmeans=True,
    showmedians=False,
    showextrema=True
)

# Optional: different colors for each violin
colors = ['tab:blue', 'tab:orange']
for body, color in zip(parts['bodies'], colors):
    body.set_facecolor(color)
    body.set_edgecolor('black')
    body.set_alpha(0.7)

ax.set_ylim(0, 1000)
# X-axis labels
ax.set_xticks([1, 2])
ax.set_xticklabels(['Starlink RTT', 'Cellular RTT'])

#ax.set_title('Violin Plot of RTT')
ax.set_ylabel('RTT [ms]')
ax.grid(True, linestyle='--', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

df_starlink['jitter_ms'].rename('Starlink Jitter').plot(ax=ax, ylim=(0, 50))
df_vodafone['jitter_ms'].rename('Cellular Jitter').plot(ax=ax, ylim=(0, 50))

#ax.set_title('Jitter over time')
ax.set_ylabel('Jitter [ms]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
x1 = df_starlink['jitter_ms'].dropna().to_numpy()
x2 = df_vodafone['jitter_ms'].dropna().to_numpy()
x_all = np.concatenate([x1, x2])

bin_width = 1.0  # ms (pick something you can justify)
bins = np.arange(x_all.min(), x_all.max() + bin_width, bin_width)

fig, ax = plt.subplots(figsize=(4,3), dpi=200)

# hist returns (counts, bin_edges, patches)
_, _, p1 = ax.hist(x1, bins=bins, alpha=0.5, label='Starlink', density=False)
_, _, p2 = ax.hist(x2, bins=bins, alpha=0.5, label='Cellular', density=False)

# use the same colors as the bars
c1 = p1[0].get_facecolor()  # RGBA
c2 = p2[0].get_facecolor()

me1 = np.mean(x1)
me2 = np.mean(x2)
print("Starlink jitter mean:", me1)
print("Cellular jitter mean:", me2)
print("Ratio of mean (Cellular/Starlink):", me2 / me1)

# ax.axvline(me1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'Starlink mean')
# ax.axvline(me2, color=c2, alpha=1.0, linestyle=':',  linewidth=2, label=f'Cellular mean')

m1 = np.median(x1)
m2 = np.median(x2)
print("Starlink jitter median:", m1)
print("Cellular jitter median:", m2)

ax.axvline(m1, color=c1, alpha=1.0, linestyle='--', linewidth=2, label=f'median')
ax.axvline(m2, color=c2, alpha=1.0, linestyle='--',  linewidth=2, label=f'median')

p1 = np.percentile(x1, 90)
p2 = np.percentile(x2, 90)
print("Starlink jitter p90:", p1)
print("Cellular jitter p90:", p2)
print("Ratio of p90 (Cellular/Starlink):", p2 / p1)

s_p95 = np.percentile(x1, 95)
v_p95 = np.percentile(x2, 95)
print("Starlink jitter p95:", s_p95)
print("Cellular jitter p95:", v_p95)
print("Ratio of p95 (Cellular/Starlink):", v_p95 / s_p95)
ax.axvline(p1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'p90')
ax.axvline(p2, color=c2, alpha=1.0, linestyle=':', linewidth=2, label=f'p90')

# Only zoom the view (does NOT change binning or the histogram computation)
ax.set_xlim(0, 55)
ax.set_xlabel('Jitter (ms)')
ax.set_ylabel('Count')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend(loc='upper right')
plt.tight_layout()
plt.savefig("plots/WebRTC-Jitter-histogram-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Jitter-histogram-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(4, 3), dpi=200)

# Prepare data (drop NaNs)
starlink = df_starlink['jitter_ms'].dropna().values
vodafone = df_vodafone['jitter_ms'].dropna().values

# Sort values
starlink_sorted = np.sort(starlink)
vodafone_sorted = np.sort(vodafone)

# ECDF values
starlink_ecdf = np.arange(1, len(starlink_sorted) + 1) / len(starlink_sorted)
vodafone_ecdf = np.arange(1, len(vodafone_sorted) + 1) / len(vodafone_sorted)

# Plot ECDFs (capture lines to reuse colors)
(l1,) = ax.step(starlink_sorted, starlink_ecdf, where='post', label='Starlink')
(l2,) = ax.step(vodafone_sorted, vodafone_ecdf, where='post', label='Cellular')

c1, c2 = l1.get_color(), l2.get_color()

# --- stats (median + p95) ---
s_med, s_p95 = np.percentile(starlink, [50, 90])
v_med, v_p95 = np.percentile(vodafone, [50, 90])

ax.axvline(s_med, color=c1, linestyle='--', linewidth=1.6, alpha=0.9,
           label=f'median')
ax.axvline(v_med, color=c2, linestyle='--', linewidth=1.6, alpha=0.9,
           label=f'median')

ax.axvline(s_p95, color=c1, linestyle=':', linewidth=1.6, alpha=0.9,
           label=f'p90')
ax.axvline(v_p95, color=c2, linestyle=':', linewidth=1.6, alpha=0.9,
           label=f'p90')
# ----------------------------

x_all = np.concatenate([starlink, vodafone])

lo, hi = np.percentile(x_all, [0.5, 95])   # try [0, 99] or [1, 99] too
pad = 0.02 * (hi - lo)

#ax.set_xlim(max(0, lo - pad), hi + pad)      # clamp at 0 for RTT/jitter
ax.set_xlim(0, 55)
ax.set_xlabel('Jitter [ms]')
ax.set_ylabel('Cumulative probability')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend(loc='best')
plt.tight_layout()
plt.savefig("plots/WebRTC-Jitter-ecdf-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Jitter-ecdf-comparison.png", bbox_inches="tight")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Prepare data (drop NaNs)
starlink_tx = df_starlink['jitter_ms'].dropna()
vodafone_tx = df_vodafone['jitter_ms'].dropna()

fig, ax = plt.subplots()

# Violin plot
data = [starlink_tx, vodafone_tx]
parts = ax.violinplot(
    data,
    positions=[1, 2],
    showmeans=True,
    showmedians=False,
    showextrema=True
)

# Optional: different colors for each violin
colors = ['tab:blue', 'tab:orange']
for body, color in zip(parts['bodies'], colors):
    body.set_facecolor(color)
    body.set_edgecolor('black')
    body.set_alpha(0.7)

ax.set_ylim(0, 60)
# X-axis labels
ax.set_xticks([1, 2])
ax.set_xticklabels(['Starlink Jitter', 'Cellular Jitter'])

#ax.set_title('Violin Plot of Jitter')
ax.set_ylabel('Jitter [ms]')
ax.grid(True, linestyle='--', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Reset the index for each dataset before concatenation
df_starlink_cleaned = df_starlink[['jitter_ms']].reset_index(drop=True)
df_vodafone_cleaned = df_vodafone[['jitter_ms']].reset_index(drop=True)

# Combine the data into a single DataFrame
data_combined = pd.concat([
    df_starlink_cleaned.assign(Category='Starlink'),
    df_vodafone_cleaned.assign(Category='Cellular'),
], ignore_index=True)

# Create the violin plot
sinr_violin = plt.figure(figsize=(4, 4), dpi=200)
sns.violinplot(data=data_combined, x='Category', y='jitter_ms')  # Properly structured data
plt.xlabel('Router')
plt.ylabel('Jitter (ms)')
plt.ylim(0, 60)
#plt.savefig(f'plots/routers-violinplot-sinr-lte.pdf', bbox_inches='tight')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

df_starlink['burst_lost_packets'].rename('Starlink Burst Lost Packets').plot(ax=ax, ylim=(0, 100))
df_vodafone['burst_lost_packets'].rename('Cellular Burst Lost Packets').plot(ax=ax, ylim=(0, 100))

#ax.set_title('Burst Lost Packets over time')
ax.set_ylabel('Burst Lost Packets')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

df_starlink['fraction_loss_rate'].rename('Starlink-fraction_loss_rate').plot(ax=ax)
df_vodafone['fraction_loss_rate'].rename('Vodafone-fraction_loss_rate').plot(ax=ax)

ax.set_title('Fraction loss rate over time')
ax.set_ylabel('Fraction loss rate')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

df_starlink['rx_rate_mbits'].rename('Starlink-Rx-Rate').plot(ax=ax, ylim=(0, 10))
df_vodafone['rx_rate_mbits'].rename('Vodafone-Rx-Rate').plot(ax=ax, ylim=(0, 10))

ax.axhline(df_starlink['rx_rate_mbits'].mean(), linestyle='--', alpha=0.7, label='Starlink Mean')
ax.axhline(df_vodafone['rx_rate_mbits'].mean(), linestyle=':', alpha=0.7, label='Vodafone Mean')

ax.set_title('Rx rate over time')
ax.set_ylabel('Rate [Mbits]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 4), dpi=200)

df_starlink['tx_rate_mbits'].rename('Starlink').plot(ax=ax, ylim=(0, 7))
df_vodafone['tx_rate_mbits'].rename('Cellular').plot(ax=ax, ylim=(0, 7))

ax.axhline(df_starlink['tx_rate_mbits'].mean(), linestyle='--', alpha=0.7, label='Starlink Mean')
ax.axhline(df_vodafone['tx_rate_mbits'].mean(), linestyle=':', alpha=0.7, label='Cellular Mean')

ax.set_ylabel('Bitrate [Mbits]')
ax.set_xlabel('Time (HH:MM:SS)')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend(loc='best')
plt.tight_layout()
#plt.savefig("plots/WebRTC-Tx-rate-time-series-comparison.pdf", bbox_inches="tight")
#plt.savefig("plots/WebRTC-Tx-rate-time-series-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 4), dpi=200)

# --- Starlink line + color ---
n0 = len(ax.lines)
df_starlink['tx_rate_mbits'].rename('Starlink').plot(ax=ax, alpha=0.8, ylim=(0, 7))
star_line = ax.lines[n0]
c_star = star_line.get_color()

# --- Cellular line + color ---
n1 = len(ax.lines)
df_vodafone['tx_rate_mbits'].rename('Cellular').plot(ax=ax, alpha=0.8, ylim=(0, 7))
cell_line = ax.lines[n1]
c_cell = cell_line.get_color()

# Mean lines in matching colors
ax.axhline(df_starlink['tx_rate_mbits'].mean(),
           color=c_star, linestyle='-.', alpha=1, linewidth=2, label='Mean')
ax.axhline(df_vodafone['tx_rate_mbits'].mean(),
           color=c_cell, linestyle='-.',  alpha=1, linewidth=2, label='Mean')

ax.set_ylabel('Bitrate [Mbits]')
ax.set_xlabel('Time (HH:MM:SS)')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend(loc='best')

plt.tight_layout()
plt.savefig("plots/WebRTC-Tx-rate-time-series-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Tx-rate-time-series-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
# df_cqi: columns ["ts", "cqi"]   (cqi in 1..15)
# df_lat: columns ["ts", "latency_ms"]

df_cqi = df_D2[['lCqi', 'timestamp_router']].copy()
df_lat = df_vodafone[['rtt_ms', 'timestamp']].copy()

In [None]:
df_cqi["ts"] = pd.to_datetime(df_cqi["timestamp_router"], utc=True)
df_lat["ts"] = pd.to_datetime(df_lat["timestamp"], utc=True)

In [None]:
df_cqi = df_cqi.sort_values("ts")
df_lat = df_lat.sort_values("ts")

In [None]:
# Match each latency sample to the nearest CQI sample (within a tolerance)
df_cqi_vs_latency = pd.merge_asof(
    df_lat, df_cqi,
    on="ts",
    direction="nearest",
    tolerance=pd.Timedelta("250ms")  # adjust to your sampling rates
)

df_cqi_vs_latency = df_cqi_vs_latency.dropna(subset=["lCqi", "rtt_ms"])
df_cqi_vs_latency["lCqi"] = df_cqi_vs_latency["lCqi"].astype(int)
df_cqi_vs_latency = df_cqi_vs_latency[df_cqi_vs_latency["lCqi"].between(1, 15)]

In [None]:
def q(x, p): 
    return x.quantile(p)

stats_cq_vs_latency = (
    df_cqi_vs_latency.groupby("lCqi")["rtt_ms"]
      .agg(
          n="size",
          mean="mean",
          median="median",
          p10=lambda s: q(s, 0.10),
          p90=lambda s: q(s, 0.90),
      )
      .reindex(range(1, 16))
)

stats_cq_vs_latency

In [None]:
x = stats_cq_vs_latency.index.values
mean = stats_cq_vs_latency["mean"].values
p10 = stats_cq_vs_latency["p10"].values
p90 = stats_cq_vs_latency["p90"].values

fig, ax = plt.subplots(figsize=(4, 3), dpi=200)

ax.plot(x, mean, marker="o", label="Mean RTT")
ax.fill_between(x, p10, p90, alpha=0.2, label="10-90th percentile")

ax.set_yscale("log")
ax.set_ylim(10, 10000)
ax.set_xticks(range(1, 16))
ax.set_xlabel("CQI")
ax.set_ylabel("RTT (ms)")
ax.grid(True, which="both", linestyle="--", alpha=0.3)
ax.legend()
plt.tight_layout()
plt.savefig("plots/WebRTC-CQI-vs-mean-RTT.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-CQI-vs-mean-RTT.png", bbox_inches="tight")
plt.show()

In [None]:
# df_cqi: columns ["ts", "cqi"]   (cqi in 1..15)
# df_lat: columns ["ts", "latency_ms"]

df_cqi = df_D2[['lCqi', 'timestamp_router']].copy()
df_bitrate = df_vodafone[['tx_rate_mbits', 'timestamp']].copy()

In [None]:
df_cqi["ts"] = pd.to_datetime(df_cqi["timestamp_router"], utc=True)
df_bitrate["ts"] = pd.to_datetime(df_bitrate["timestamp"], utc=True)

In [None]:
df_cqi = df_cqi.sort_values("ts")
df_bitrate = df_bitrate.sort_values("ts")

In [None]:
# Match each latency sample to the nearest CQI sample (within a tolerance)
df_cqi_vs_bitrate = pd.merge_asof(
    df_bitrate, df_cqi,
    on="ts",
    direction="nearest",
    tolerance=pd.Timedelta("250ms")  # adjust to your sampling rates
)

df_cqi_vs_bitrate = df_cqi_vs_bitrate.dropna(subset=["lCqi", "tx_rate_mbits"])
df_cqi_vs_bitrate["lCqi"] = df_cqi_vs_bitrate["lCqi"].astype(int)
df_cqi_vs_bitrate = df_cqi_vs_bitrate[df_cqi_vs_bitrate["lCqi"].between(1, 15)]

In [None]:
def q(x, p): 
    return x.quantile(p)

stats_cqi_vs_bitrate = (
    df_cqi_vs_bitrate.groupby("lCqi")["tx_rate_mbits"]
      .agg(
          n="size",
          mean="mean",
          median="median",
          p10=lambda s: q(s, 0.10),
          p90=lambda s: q(s, 0.90),
      )
      .reindex(range(1, 16))
)

stats_cqi_vs_bitrate

In [None]:
x = stats_cqi_vs_bitrate.index.values
mean = stats_cqi_vs_bitrate["mean"].values
p10 = stats_cqi_vs_bitrate["p10"].values
p90 = stats_cqi_vs_bitrate["p90"].values

fig, ax = plt.subplots(figsize=(4, 3), dpi=200)

ax.plot(x, mean, marker="o", label="Tx Bitrate (Mbps)")
ax.fill_between(x, p10, p90, alpha=0.2, label="10-90th percentile")

#ax.set_yscale("log")
#ax.set_ylim(10, 10000)
ax.set_xticks(range(1, 16))
ax.set_xlabel("CQI")
ax.set_ylabel("Tx Bitrate (Mbps)")
ax.grid(True, which="both", linestyle="--", alpha=0.3)
ax.legend()
plt.tight_layout()
plt.savefig("plots/WebRTC-CQI-vs-mean-Tx-Bitrate.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-CQI-vs-mean-Tx-Bitrate.png", bbox_inches="tight")
plt.show()

In [None]:
# If you currently have ONE "stats" df in your code snippets, you likely actually have:
#   stats_rtt = ...   (mean/p10/p90 of RTT per CQI)
#   stats_tx  = ...   (mean/p10/p90 of Tx bitrate per CQI)
# Replace these two with your real variables.
stats_rtt = stats_cq_vs_latency
stats_tx  = stats_cqi_vs_bitrate

# --- Data (left axis: RTT) ---
x = stats_rtt.index.values
mean_rtt = stats_rtt["mean"].values
p10_rtt  = stats_rtt["p10"].values
p90_rtt  = stats_rtt["p90"].values

# --- Data (right axis: Tx bitrate) ---
mean_tx = stats_tx["mean"].values
p10_tx  = stats_tx["p10"].values
p90_tx  = stats_tx["p90"].values

os.makedirs("plots", exist_ok=True)

fig, ax1 = plt.subplots(figsize=(4.5, 3), dpi=200)

# Left y-axis: RTT
l1, = ax1.plot(x, mean_rtt, marker="o", color='red', label="RTT")
fb1 = ax1.fill_between(x, p10_rtt, p90_rtt, color='red', alpha=0.2, label="10-90th")

ax1.set_yscale("log")
ax1.set_ylim(10, 10000)
ax1.set_xlabel("CQI")
ax1.set_ylabel("RTT (ms)")
ax1.set_xticks(range(1, 16))
ax1.grid(True, which="both", linestyle="--", alpha=0.3)

# Right y-axis: Tx bitrate
ax2 = ax1.twinx()
l2, = ax2.plot(x, mean_tx, marker="o", label="Bitrate")
fb2 = ax2.fill_between(x, p10_tx, p90_tx, alpha=0.2, label="10-90th")
ax2.set_ylim(0,5)
ax2.set_ylabel("Bitrate (Mbps)")

# One combined legend (handles from both axes)
h1, lab1 = ax1.get_legend_handles_labels()
h2, lab2 = ax2.get_legend_handles_labels()
ax1.legend(h1 + h2, lab1 + lab2, loc="lower center", ncols=2)

plt.tight_layout()
plt.savefig("plots/WebRTC-CQI-vs-RTT-and-TxBitrate.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-CQI-vs-RTT-and-TxBitrate.png", bbox_inches="tight")
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

fig, (ax_cat, ax) = plt.subplots(
    nrows=2, ncols=1,
    figsize=(10, 4), dpi=200,
    sharex=True,
    gridspec_kw={"height_ratios": [2, 6]}
)

# -------------------------
# Bottom axis: Throughput
# -------------------------
n0 = len(ax.lines)
df_starlink['tx_rate_mbits'].rename('Starlink').plot(ax=ax, alpha=0.8, ylim=(0, 8))
c_star = ax.lines[n0].get_color()

n1 = len(ax.lines)
df_vodafone['tx_rate_mbits'].rename('Cellular').plot(ax=ax, alpha=0.8, ylim=(0, 8))
c_cell = ax.lines[n1].get_color()

mean_starlink = df_starlink['tx_rate_mbits'].mean()
print("Starlink mean tx rate:", mean_starlink)
mean_vodafone = df_vodafone['tx_rate_mbits'].mean()
print("Vodafone mean tx rate:", mean_vodafone)
print("Starlink higher than Vodafone by factor:", mean_starlink / mean_vodafone)

ax.axhline(df_starlink['tx_rate_mbits'].mean(),
           color=c_star, linestyle='-.', linewidth=2, alpha=1, label='Mean')
ax.axhline(df_vodafone['tx_rate_mbits'].mean(),
           color=c_cell, linestyle='-.', linewidth=2, alpha=1, label='Mean')

ax.set_ylabel('Bitrate (Mbps)')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend(loc='upper left', ncols=2)

# -------------------------
# Top axis: Radio access (categorical segments, no connectors)
# -------------------------

# Common timeline for alignment
idx = df_starlink['tx_rate_mbits'].dropna().index.union(
      df_vodafone['tx_rate_mbits'].dropna().index)

# Clean + sort lDataClass, deduplicate timestamps
s = (df_D2['lDataClass']
     .astype("string")
     .str.strip()
     .replace("", pd.NA)
     .sort_index())
s = s[~s.index.duplicated(keep="last")]

# Forward-fill onto common timeline
cls = s.reindex(idx, method="ffill")

# Keep only valid categories (others become missing -> gaps)
valid = ["LTE", "5G NSA", "5G SA"]
cls = cls.where(cls.isin(valid))

# Category colors
color_map = {
    "5G SA": "purple",
    "5G NSA": "red",
    "LTE": "blue",
}

# Map categories to y positions
y_map = {name: i for i, name in enumerate(valid)}

# --- contiguous run detection (pandas-safe with NA) ---
change = cls.ne(cls.shift()).fillna(True)         # boolean Series, no <NA>
starts = np.flatnonzero(change.to_numpy())        # indices where a new run starts
ends = np.r_[starts[1:] - 1, len(cls) - 1]        # run ends

times = cls.index

# Draw each run as a colored horizontal segment
for s_i, e_i in zip(starts, ends):
    v = cls.iloc[s_i]
    if pd.isna(v):
        continue

    t0 = times[s_i]
    t1 = times[e_i + 1] if (e_i + 1) < len(times) else times[e_i]

    ax_cat.hlines(
        y=y_map[v],
        xmin=t0,
        xmax=t1,
        colors=color_map[v],
        linewidth=6,
        alpha=0.85
    )

ax_cat.set_ylim(-0.5, len(valid) - 0.5)
ax_cat.set_yticks(range(len(valid)))
ax_cat.set_yticklabels(valid)
#ax_cat.set_ylabel("Network")
ax_cat.grid(True, which='both', linestyle='--', alpha=0.2)
ax_cat.tick_params(axis='x', labelbottom=False)

# Legend for radio access colors (top axis)
handles = [Line2D([0], [0], color=color_map[v], lw=6, label=v) for v in valid]
#ax_cat.legend(handles=handles, loc="lower center", frameon=True, fontsize=8)

# Shared x label only at the bottom
ax.set_xlabel('Time (HH:MM:SS)')

plt.tight_layout()
plt.savefig("plots/WebRTC-Tx-rate-with-radio-access.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Tx-rate-with-radio-access.png", bbox_inches="tight")
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# -------------------------
# Create stacked axes (share x)
# -------------------------
fig, (ax_cat, ax_sinr, ax_tx) = plt.subplots(
    nrows=3, ncols=1,
    figsize=(40, 10), dpi=200,
    sharex=True,
    gridspec_kw={"height_ratios": [2, 3, 6]}
)

# Common timeline for x-limits (union of throughput indices)
idx = df_starlink['tx_rate_mbits'].dropna().index.union(
      df_vodafone['tx_rate_mbits'].dropna().index)

# -------------------------
# Bottom axis: Throughput
# -------------------------
n0 = len(ax_tx.lines)
df_starlink['tx_rate_mbits'].rename('Starlink').plot(ax=ax_tx, alpha=0.8, ylim=(0, 7))
c_star = ax_tx.lines[n0].get_color()

n1 = len(ax_tx.lines)
df_vodafone['tx_rate_mbits'].rename('Cellular').plot(ax=ax_tx, alpha=0.8, ylim=(0, 7))
c_cell = ax_tx.lines[n1].get_color()

ax_tx.axhline(df_starlink['tx_rate_mbits'].mean(),
              color=c_star, linestyle='-.', linewidth=2, alpha=1, label='Starlink mean')
ax_tx.axhline(df_vodafone['tx_rate_mbits'].mean(),
              color=c_cell, linestyle='-.', linewidth=2, alpha=1, label='Cellular mean')

ax_tx.set_ylabel('Bitrate (Mbps)')
ax_tx.grid(True, which='both', linestyle='--', alpha=0.3)
ax_tx.legend(loc='best')

# -------------------------
# Middle axis: SINR (NO forward-fill, keep gaps)
# -------------------------
sinr = pd.to_numeric(df_D2['handover_CurrentCellid'], errors='coerce').sort_index()
sinr = sinr[~sinr.index.duplicated(keep="last")]

n_s = len(ax_sinr.lines)
sinr.rename('SINR').plot(ax=ax_sinr, alpha=0.85)
c_sinr = ax_sinr.lines[n_s].get_color()

ax_sinr.axhline(sinr.mean(),
                color=c_sinr, linestyle='-.', linewidth=2, alpha=1, label='SINR mean')

ax_sinr.set_ylabel('SINR (dB)')
ax_sinr.grid(True, which='both', linestyle='--', alpha=0.3)
ax_sinr.legend(loc='best')

# Ensure the SINR panel spans the same time window as throughput
ax_sinr.set_xlim(idx.min(), idx.max())

# -------------------------
# Top axis: Radio access (categorical segments, no connectors)
# -------------------------
s = (df_D2['lDataClass']
     .astype("string")
     .str.strip()
     .replace("", pd.NA)
     .sort_index())
s = s[~s.index.duplicated(keep="last")]

# Forward-fill ONLY for the categorical state timeline (typical for state)
# If you also want gaps here, change method=None and remove ffill.
cls = s.reindex(idx, method="ffill")

valid = ["LTE", "5G NSA", "5G SA"]
cls = cls.where(cls.isin(valid))

color_map = {"5G SA": "purple", "5G NSA": "red", "LTE": "blue"}
y_map = {name: i for i, name in enumerate(valid)}

change = cls.ne(cls.shift()).fillna(True)
starts = np.flatnonzero(change.to_numpy())
ends = np.r_[starts[1:] - 1, len(cls) - 1]
times = cls.index

for s_i, e_i in zip(starts, ends):
    v = cls.iloc[s_i]
    if pd.isna(v):
        continue

    t0 = times[s_i]
    t1 = times[e_i + 1] if (e_i + 1) < len(times) else times[e_i]

    ax_cat.hlines(
        y=y_map[v],
        xmin=t0,
        xmax=t1,
        colors=color_map[v],
        linewidth=6,
        alpha=0.85
    )

ax_cat.set_ylim(-0.5, len(valid) - 0.5)
ax_cat.set_yticks(range(len(valid)))
ax_cat.set_yticklabels(valid)
ax_cat.grid(True, which='both', linestyle='--', alpha=0.2)
ax_cat.tick_params(axis='x', labelbottom=False)

handles = [Line2D([0], [0], color=color_map[v], lw=6, label=v) for v in valid]
ax_cat.legend(handles=handles, loc="upper right", frameon=True, fontsize=8)

# Shared x label only at the bottom
ax_tx.set_xlabel('Time (HH:MM:SS)')

plt.tight_layout()
# plt.savefig("plots/WebRTC-Tx-rate-with-radio-access-and-sinr.pdf", bbox_inches="tight")
# plt.savefig("plots/WebRTC-Tx-rate-with-radio-access-and-sinr.png", bbox_inches="tight")
plt.show()


In [None]:
df_D2[['lDataClass']].value_counts()

In [None]:
mapping = {1: "LTE", 2: "5G NSA", 3: "5G SA"}

ay = df_D2[['lDataClassInt']].plot(figsize=(40, 2), style="|", linewidth=0)
#ay.set_ylabel("Radio Access")

#ay = df_D2[['lDataClass']].plot(figsize=(40,2), style="|")
ax = df_D2[['lDataClassInt']].plot(figsize=(40,2), style="|")
ax = df_vodafone[['tx_rate_mbits']].plot(figsize=(40,2), ylim=(0,7))
ax = df_D2[['handover_CurrentCellid']].plot(figsize=(40,2))
ax = df_D2[['lCqi']].plot(figsize=(40,2))
ax = df_D2[['lSinr']].plot(figsize=(40,2))
#style="o"

# Put ticks only where your classes are (and in the order you want)
ticks = sorted(mapping.keys())
ay.set_yticks(ticks)
ay.set_yticklabels([mapping[t] for t in ticks])

ay.grid(True, axis="y", linestyle="--", alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
x1 = df_starlink['tx_rate_mbits'].dropna().to_numpy()
x2 = df_vodafone['tx_rate_mbits'].dropna().to_numpy()
x_all = np.concatenate([x1, x2])

bin_width = 0.1  # ms (pick something you can justify)
bins = np.arange(x_all.min(), x_all.max() + bin_width, bin_width)

fig, ax = plt.subplots(figsize=(4,3), dpi=200)

# hist returns (counts, bin_edges, patches)
_, _, p1 = ax.hist(x1, bins=bins, alpha=0.5, label='Starlink', density=False)
_, _, p2 = ax.hist(x2, bins=bins, alpha=0.5, label='Cellular', density=False)

# use the same colors as the bars
c1 = p1[0].get_facecolor()  # RGBA
c2 = p2[0].get_facecolor()

m1 = np.median(x1)
m2 = np.median(x2)

ax.axvline(m1, color=c1, alpha=1.0, linestyle='--', linewidth=2, label=f'Starlink median')
ax.axvline(m2, color=c2, alpha=1.0, linestyle='--',  linewidth=2, label=f'Cellular median')

# me1 = np.mean(x1)
# me2 = np.mean(x2)

# ax.axvline(me1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'Starlink mean')
# ax.axvline(me2, color=c2, alpha=1.0, linestyle=':',  linewidth=2, label=f'Cellular mean')

p1 = np.percentile(x1, 10)
p2 = np.percentile(x2, 10)

ax.axvline(p1, color=c1, alpha=1.0, linestyle=':', linewidth=2, label=f'Starlink p10')
ax.axvline(p2, color=c2, alpha=1.0, linestyle=':', linewidth=2, label=f'Cellular p10')

# Only zoom the view (does NOT change binning or the histogram computation)
ax.set_xlim(0, 6.5)
ax.set_xlabel('Transmission Bitrate (Mbps)')
ax.set_ylabel('Count')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend(loc='upper left')
plt.tight_layout()
plt.savefig("plots/WebRTC-Tx-Rate-histogram-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Tx-Rate-histogram-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(4, 3), dpi=200)

# Prepare data (drop NaNs)
starlink = df_starlink['tx_rate_mbits'].dropna().values
vodafone = df_vodafone['tx_rate_mbits'].dropna().values

# Sort values
starlink_sorted = np.sort(starlink)
vodafone_sorted = np.sort(vodafone)

# ECDF values
starlink_ecdf = np.arange(1, len(starlink_sorted) + 1) / len(starlink_sorted)
vodafone_ecdf = np.arange(1, len(vodafone_sorted) + 1) / len(vodafone_sorted)

# Plot ECDFs
ax.step(starlink_sorted, starlink_ecdf, where='post', label='Starlink')
ax.step(vodafone_sorted, vodafone_ecdf, where='post', label='Cellular') 
plt.xlim(0, 6.5)

#ax.set_title('eCDF of Tx Rate')
ax.set_xlabel('Transmission Bitrate (Mbps)')
ax.set_ylabel('Cumulative Probability')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.savefig("plots/WebRTC-Tx-Rate-ecdf-comparison.pdf", bbox_inches="tight")
plt.savefig("plots/WebRTC-Tx-Rate-ecdf-comparison.png", bbox_inches="tight")
plt.show()

In [None]:
df_D2['PrimaryBandMHz'].describe()

In [None]:
df_D2['PrimaryBandMHz'].value_counts()

In [None]:
fig, ax = plt.subplots(figsize=(10, 4), dpi=200)

# Primary axis: tx rates
#df_starlink['tx_rate_mbits'].rename('Starlink-Tx-Rate').plot(ax=ax, ylim=(0, 10), color='tab:blue')
df_vodafone['tx_rate_mbits'].rename('Vodafone-Tx-Rate').plot(ax=ax, ylim=(0, 6), color='tab:red')

# # Means on primary axis
# ax.axhline(df_starlink['tx_rate_mbits'].mean(),
#            linestyle='--', alpha=0.7, label='Starlink Mean')
# ax.axhline(df_vodafone['tx_rate_mbits'].mean(),
#            linestyle=':', alpha=0.7, label='Vodafone Mean')

ax.set_title('Tx rate over time')
ax.set_ylabel('Rate [Mbits]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)

# Secondary axis: cell IDs
ax2 = ax.twinx()
#ax3 = ax.twinx()
#df_D2['cell_id_num'].rename('Cell-ID').plot(ax=ax2, alpha=0.7)
#ax2.set_ylabel('Cell ID (num)')

# df_D2['lDataClassInt'].rename('Vodafone-DataClassInt').plot(ax=ax2, alpha=0.7)
# ax2.set_ylabel('Vodafone Data Class Int')

# df_D2['PrimaryBandMHz'].rename('Vodafone-PrimaryBandMHz').plot(ax=ax2, alpha=0.7)
# ax2.set_ylabel('Vodafone Primary Band MHz')

# df_D2['speed'].rename('Vodafone-GPS-Speed').plot(ax=ax2, alpha=0.7)
# ax2.set_ylabel('Vodafone GPS Speed (km/h)')

# df_D2['lSinr'].rename('Vodafone-SINR').plot(ax=ax2, alpha=0.7, color='tab:green')
# ax2.set_ylabel('Vodafone SINR (dB)')

#df_D2['lCqi'].rename('Vodafone-CQI').plot(ax=ax2, ylim=(0, 16), alpha=0.7, color='tab:green')
df_D2['lDataClassInt'].rename('Vodafone-DataClassInt').plot(ax=ax2, ylim=(0, 4), alpha=0.7, color='tab:orange')
#df_D2['PrimaryBandMHz'].rename('Vodafone-PrimaryBandMHz').plot(ax=ax2, ylim=(0, 100), alpha=0.7, color='tab:orange')
#ax2.set_ylabel('Vodafone CQI')
ax2.set_ylabel('Vodafone DataClassInt')
#ax3.set_ylabel('Vodafone DataClassInt')

# Combine legends from both axes
handles1, labels1 = ax.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax.legend(handles1 + handles2, labels1 + labels2, loc='upper left')
#ax.legend(handles1 + handles2, labels1 + labels2, loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
def plot_series_with_mean(ax, s, label, ylim=None, linestyle_mean='--', alpha=0.9):
    """Plot a pandas Series and a mean line in the SAME color as the series."""
    n0 = len(ax.lines)                 # how many lines before plotting
    s.rename(label).plot(ax=ax)        # pandas adds a new Line2D to ax
    line = ax.lines[n0]                # the newly-added line
    color = line.get_color()

    if ylim is not None:
        ax.set_ylim(*ylim)

    ax.axhline(s.mean(), color=color, linestyle=linestyle_mean, alpha=0.7,
               label=f'{label} mean')
    ax.legend(loc='upper left')
    ax.grid(True, which='both', linestyle='--', alpha=0.3)
    return color

# ---- stacked figure ----
fig, axes = plt.subplots(
    nrows=3, ncols=1,
    figsize=(10, 7), dpi=200,
    sharex=True
)

# 2) DataClassInt
plot_series_with_mean(
    axes[0],
    df_D2['lDataClassInt'],
    label='Vodafone-DataClassInt',
    ylim=(0, 4),
    linestyle_mean=':'
)
axes[0].set_ylabel('DataClassInt')

# 1) Tx rate
plot_series_with_mean(
    axes[1],
    df_vodafone['tx_rate_mbits'],
    label='Vodafone-Tx-Rate',
    ylim=(0, 6),
    linestyle_mean='--'
)
axes[1].set_ylabel('Rate [Mbits]')

# 3) CQI (or SINR, speed, band, cell-id, ...)
plot_series_with_mean(
    axes[2],
    df_D2['speed'],
    label='Vodafone-Speed',
    ylim=(0, 16),
    linestyle_mean='-.'
)
axes[2].set_ylabel('Speed')
axes[2].set_xlabel('Time (HH:MM:SS)')

plt.tight_layout()
#plt.savefig("plots/vodafone_stacked_timeseries.pdf", bbox_inches="tight")
#plt.savefig("plots/vodafone_stacked_timeseries.png", bbox_inches="tight")
plt.show()

In [None]:
df_D2['lDataClass'].value_counts()

In [None]:
df_D2['lDataClass'].info()

In [None]:
df_D2['lDataClass'].describe()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

s = df_D2['lDataClassInt'].dropna()   # categorical over time (index = datetime)
y = df_vodafone['tx_rate_mbits'].reindex(s.index)  # numeric to overlay (optional)

fig, ax = plt.subplots(figsize=(10,3), dpi=200)

# Plot numeric series (optional)
ax.plot(y.index, y.values, label='Tx rate [Mbit/s]', alpha=0.9)

# Build contiguous category segments
cat = s.astype('category')
codes = cat.cat.codes.to_numpy()
t = cat.index

# Find segment boundaries (where category changes)
change = np.r_[True, codes[1:] != codes[:-1], True]
idx = np.flatnonzero(change)

# Choose colors for categories
cats = list(cat.cat.categories)
cmap = plt.get_cmap('tab10')
color_map = {c: cmap(i % 10) for i, c in enumerate(cats)}

# Draw colored spans for each segment
for i in range(len(idx)-1):
    start, end = t[idx[i]], t[idx[i+1]-1]
    c = cat.iloc[idx[i]]
    ax.axvspan(start, end, color=color_map[c], alpha=0.15, linewidth=0)

# Legend for categories (optional)
handles = [plt.Line2D([0],[0], color=color_map[c], lw=6, alpha=0.5) for c in cats]
ax.legend(handles, [f'DataClass={c}' for c in cats], loc='upper right', fontsize=8)

ax.set_ylabel('Tx rate')
ax.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(16, 4))

df_D2['lCqi'].rename('D2-CQI').plot(ax=ax)#, ylim=(0, 1000))
#df_D2['lSinr'].rename('D2-SINR').plot(ax=ax)#, ylim=(0, 1000))
#df_D2['lRsrp'].rename('D2-RSRP').plot(ax=ax)#, ylim=(0, 1000))
#df_D2['lRsrq'].rename('D2-RSRQ').plot(ax=ax)#, ylim=(0, 1000))
df_D2['ltxbitspersecond'].rename('D2-TX-Rate').div(1024*1024).plot(ax=ax)#, ylim=(0, 1000))

plt.ylim(0, 25)
ax.set_title('Metrics over time')
#ax.set_ylabel('RTT [ms]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
# Histogram for Tx rate distributions
fig, ax = plt.subplots(figsize=FIG_SIZE)

# Choose common bins (adjust range/bins as you like)
bins = np.linspace(0, 10, 41)  # 0–10 Mbit/s, 0.25 Mbit steps

ax.hist(
    df_starlink['tx_rate_mbits'].dropna(),
    bins=bins,
    alpha=0.5,
    label='Starlink-Tx-Rate'
)

ax.hist(
    df_vodafone['tx_rate_mbits'].dropna(),
    bins=bins,
    alpha=0.5,
    label='Vodafone-Tx-Rate'
)

ax.axvline(df_starlink['tx_rate_mbits'].mean(), linestyle='--', alpha=0.7, label='Starlink Mean')
ax.axvline(df_vodafone['tx_rate_mbits'].mean(), linestyle=':',  alpha=0.7, label='Vodafone Mean')

ax.set_title('Histogram of Tx Rate')
ax.set_xlabel('Rate [Mbits]')
ax.set_ylabel('Count')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=FIG_SIZE)

# Prepare data (drop NaNs)
starlink = df_starlink['tx_rate_mbits'].dropna().values
vodafone = df_vodafone['tx_rate_mbits'].dropna().values

# Sort values
starlink_sorted = np.sort(starlink)
vodafone_sorted = np.sort(vodafone)

# ECDF values
starlink_ecdf = np.arange(1, len(starlink_sorted) + 1) / len(starlink_sorted)
vodafone_ecdf = np.arange(1, len(vodafone_sorted) + 1) / len(vodafone_sorted)

# Plot ECDFs
ax.step(starlink_sorted, starlink_ecdf, where='post', label='Starlink-Tx-Rate')
ax.step(vodafone_sorted, vodafone_ecdf, where='post', label='Vodafone-Tx-Rate')

plt.xlim(0, 10)

ax.set_title('eCDF of Tx Rate')
ax.set_xlabel('Tx Rate [Mbits]')
ax.set_ylabel('Empirical CDF')
ax.grid(True, linestyle='--', alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Prepare data (drop NaNs)
starlink_tx = df_starlink['tx_rate_mbits'].dropna()
vodafone_tx = df_vodafone['tx_rate_mbits'].dropna()

fig, ax = plt.subplots()

# Violin plot
data = [starlink_tx, vodafone_tx]
parts = ax.violinplot(
    data,
    positions=[1, 2],
    showmeans=True,
    showmedians=False,
    showextrema=True
)

# Optional: different colors for each violin
colors = ['tab:blue', 'tab:orange']
for body, color in zip(parts['bodies'], colors):
    body.set_facecolor(color)
    body.set_edgecolor('black')
    body.set_alpha(0.7)

ax.set_ylim(0, 10)
# X-axis labels
ax.set_xticks([1, 2])
ax.set_xticklabels(['Starlink-Tx-Rate', 'Vodafone-Tx-Rate'])

ax.set_title('Violin Plot of Tx Rate')
ax.set_ylabel('Tx Rate [Mbits]')
ax.grid(True, linestyle='--', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
df_starlink[['tx_rate_mbits', 'rx_rate_mbits']].describe()

In [None]:
df_vodafone[['tx_rate_mbits', 'rx_rate_mbits']].describe()

In [None]:
df_starlink.info()

In [None]:
df_vodafone.info()

In [None]:
df_starlink.columns = df_starlink.columns.str.removeprefix("gps.")
df_vodafone.columns = df_vodafone.columns.str.removeprefix("gps.")

In [None]:
# df_starlink = df_starlink.between_time('13:00', '15:00')
# df_vodafone = df_vodafone.between_time('13:00', '15:00')

In [None]:
fig, ax = plt.subplots(figsize=(16,4), dpi=200)

# Primary axis: tx rates
df_starlink['tx_rate_mbits'].rename('Starlink-Tx-Rate').plot(ax=ax, ylim=(0, 7), color='tab:blue')
df_vodafone['tx_rate_mbits'].rename('Vodafone-Tx-Rate').plot(ax=ax, ylim=(0, 7), color='tab:red')

# # Means on primary axis
# ax.axhline(df_starlink['tx_rate_mbits'].mean(),
#            linestyle='--', alpha=0.7, label='Starlink Mean')
# ax.axhline(df_vodafone['tx_rate_mbits'].mean(),
#            linestyle=':', alpha=0.7, label='Vodafone Mean')

ax.set_title('Tx rate over time')
ax.set_ylabel('Rate [Mbps]')
ax.grid(True, which='both', linestyle='--', alpha=0.3)

# Secondary axis: cell IDs
#ax2 = ax.twinx()

fig2, ax2 = plt.subplots(figsize=(16,4), dpi=200)

df_starlink['rtt_ms'].rename('Starlink-RTT').plot(ax=ax2, ylim=(50, 250), alpha=0.7, color='tab:blue')
df_vodafone['rtt_ms'].rename('Vodafone-RTT').plot(ax=ax2, ylim=(50, 250), alpha=0.7, color='tab:red')
ax2.set_ylabel('RTT [ms]')

# Combine legends from both axes
handles1, labels1 = ax.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax.legend(handles1, labels1, loc='upper left')
ax2.legend(handles2, labels2, loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
df_starlink.info()

In [None]:
df_vodafone.info()

In [None]:
df_starlink.dropna(inplace=True)
df_vodafone.dropna(inplace=True)

In [None]:
# df_starlink.drop(df_starlink[df_starlink['lat'] == 'none'].index, inplace = True)
# df_starlink.drop(df_starlink[df_starlink['lon'] == 'none'].index, inplace = True)

# df_vodafone.drop(df_vodafone[df_vodafone['lat'] == 'none'].index, inplace = True)
# df_vodafone.drop(df_vodafone[df_vodafone['lon'] == 'none'].index, inplace = True)

In [None]:
df_starlink[['lat', 'lon']].info()

In [None]:
df_vodafone[['lat', 'lon']].info()

In [None]:
print(df_starlink['lon'].min())
print(df_starlink['lon'].max())

print(df_starlink['lat'].min())
print(df_starlink['lat'].max())

In [None]:
# network_maps.create_coverage_squares_with_metric(df_starlink, 'WebRTC-Starlink', 'auto', '2024-11-07', 'rtt_ms', lon_step = 0.0002, lat_step = 0.0001)
# network_maps.create_coverage_squares_with_metric(df_starlink, 'WebRTC-Starlink', 'auto', '2024-11-07', 'jitter_ms', lon_step = 0.0002, lat_step = 0.0001)
# network_maps.create_coverage_squares_with_metric(df_starlink, 'WebRTC-Starlink', 'auto', '2024-11-07', 'fraction_loss_rate', lon_step = 0.0002, lat_step = 0.0001)

In [None]:
# network_maps.create_coverage_squares_with_metric(df_vodafone, 'WebRTC-Vodafone', 'auto', '2024-11-07', 'rtt_ms', lon_step = 0.0002, lat_step = 0.0001)
# network_maps.create_coverage_squares_with_metric(df_vodafone, 'WebRTC-Vodafone', 'auto', '2024-11-07', 'jitter_ms', lon_step = 0.0002, lat_step = 0.0001)
# network_maps.create_coverage_squares_with_metric(df_vodafone, 'WebRTC-Vodafone', 'auto', '2024-11-07', 'fraction_loss_rate', lon_step = 0.0002, lat_step = 0.0001)

In [None]:
import folium
import pandas as pd

import matplotlib as mpl
from matplotlib import colors as mcolors

def get_color_based_on_score_adaptive(score, min_val, max_val, cmap_name):
    """
    Get a color based on the normalized score using a colormap.

    Parameters:
        score (float): The metric value.
        min_val (float): Minimum value of the metric.
        max_val (float): Maximum value of the metric.
        cmap_name (str): Name of the colormap (default: 'RdYlGn_r').

    Returns:
        str: Hex color code.
    """
    if pd.isna(score):
        return "#808080"  # neutral gray for NaNs

    if max_val == min_val:  # Avoid division by zero
        normalized_score = 0.5  # Default to middle color
    else:
        normalized_score = (score - min_val) / (max_val - min_val)

    # Clip to [0, 1]
    normalized_score = max(0.0, min(1.0, normalized_score))

    # New API: use matplotlib.colormaps
    colormap = mpl.colormaps[cmap_name]      # or: mpl.colormaps.get_cmap(cmap_name)
    rgba_color = colormap(normalized_score)  # Get RGBA color
    hex_color = mcolors.to_hex(rgba_color)   # Convert to hex

    return hex_color


def create_metric_map(
    df,
    lat_col="lat",
    lon_col="lon",
    value_col="metric",
    vmin=None,
    vmax=None,
    cmap_name="RdYlGn_r",
    map_file="metric_map.html",
    zoom_start=12,
):
    """
    Create an HTML5 map with points colored by a numeric metric using a matplotlib colormap.

    Parameters
    ----------
    df : pandas.DataFrame
        Must contain latitude, longitude, and metric columns.
    lat_col : str
        Name of latitude column in df.
    lon_col : str
        Name of longitude column in df.
    value_col : str
        Name of metric/value column in df (e.g. 'latency_ms', 'throughput').
    vmin : float or None
        Lower bound of the color scale. If None, taken from df[value_col].min().
    vmax : float or None
        Upper bound of the color scale. If None, taken from df[value_col].max().
    cmap_name : str
        Matplotlib colormap name (e.g. 'viridis', 'plasma', 'RdYlGn', 'RdYlGn_r').
    map_file : str
        Output HTML filename.
    zoom_start : int
        Initial zoom level for the map.
    """

    # Drop rows with missing coordinates or values
    df_plot = df.dropna(subset=[lat_col, lon_col, value_col]).copy()
    if df_plot.empty:
        raise ValueError("No valid rows to plot (check NaNs in lat/lon/value).")

    # Center of map
    center_lat = df_plot[lat_col].mean()
    center_lon = df_plot[lon_col].mean()

    # Base map
    m = folium.Map(location=[center_lat, center_lon],
                   zoom_start=zoom_start,
                   tiles="OpenStreetMap")

    # Determine range
    if vmin is None:
        vmin = df_plot[value_col].min()
    if vmax is None:
        vmax = df_plot[value_col].max()

    # Add points
    for _, row in df_plot.iterrows():
        lat = row[lat_col]
        lon = row[lon_col]
        val = row[value_col]

        color = get_color_based_on_score_adaptive(val, vmin, vmax, cmap_name=cmap_name)

        folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.8,
            popup=f"{value_col}: {val:.2f}",
        ).add_to(m)

    #m.save(map_file)
    m.save(f'./HTML-Maps/{map_file}')  # Save the map to an HTML file
    print(f"Map saved to {map_file}")

In [None]:
# Throughput
# "RdYlGn" → low = red, high = green
# Latency
# "RdYlGn_r" → low = green, high = red

In [None]:
# create_metric_map(
#     df_starlink,
#     lat_col="lat",
#     lon_col="lon",
#     value_col="rtt_ms",
#     vmin=0,
#     vmax=200,
#     cmap_name="RdYlGn_r",
#     map_file="starlink_latency_0_200_map.html",
#     zoom_start=12,
# )

In [None]:
# create_metric_map(
#     df_vodafone,
#     lat_col="lat",
#     lon_col="lon",
#     value_col="rtt_ms",
#     vmin=0,
#     vmax=200,
#     cmap_name="RdYlGn_r",
#     map_file="vodafone_latency_0_200_map.html",
#     zoom_start=12,
# )

In [None]:
# create_metric_map(
#     df_starlink,
#     lat_col="lat",
#     lon_col="lon",
#     value_col="tx_rate_mbits",
#     vmin=0,
#     vmax=6,
#     cmap_name="RdYlGn",
#     map_file="starlink_tx_rate_0_6_map.html",
#     zoom_start=12,
# )

In [None]:
# create_metric_map(
#     df_vodafone,
#     lat_col="lat",
#     lon_col="lon",
#     value_col="tx_rate_mbits",
#     vmin=0,
#     vmax=6,
#     cmap_name="RdYlGn",
#     map_file="vodafone_tx_rate_0_6_map.html",
#     zoom_start=12,
# )