In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

import network_maps
import preprocess_mikrotik_starlink

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
plt.rcParams["figure.dpi"] = 200
fig_size = (8, 4)

In [None]:
# List of directories to iterate through
data_dirs = [
    'data/2025-05-06-starlink/',
]

df_list = []

starlink_selected_columns = [
    '@timestamp',
    'dish_status.state',
    'dish_status.pop_ping_drop_rate',
    
    'dish_status.downlink_throughput_bps',
    'dish_status.uplink_throughput_bps',
    'dish_status.pop_ping_latency_ms',

    'dish_status.fraction_obstructed',
    'dish_status.currently_obstructed',
    'dish_status.is_snr_above_noise_floor',
    
    'dish_status.latitude',
    'dish_status.longitude',
    'dish_status.altitude',

    'dish_usage.download_usage',
    'dish_usage.upload_usage',
    ]

for data_dir in data_dirs:
    print(f"Processing directory: {data_dir}")
    list_of_files = os.listdir(data_dir)
    list_of_files.sort()
    num_files = len(list_of_files)
    print(f'Number of files in {data_dir}: {num_files}')

    for file in list_of_files:
        # Skipping files that do not end with ".gzip"
        if not file.endswith(".gzip"):
            continue
        #temp_df = pd.read_parquet(os.path.join(data_dir, file), columns=starlink_selected_columns)
        temp_df = pd.read_parquet(os.path.join(data_dir, file)) # alternatively look at all columns
        df_list.append(temp_df)

# Concatenate all DataFrames
#df = pd.concat(df_list, ignore_index=True)
df = pd.concat(df_list)

print("Final DataFrame shape:", df.shape)
df.info()
df.head()

In [None]:
df = preprocess_mikrotik_starlink.preprocess_data_starlink_mqtt(df)

In [None]:
df = preprocess_mikrotik_starlink.compress_data(df)

In [None]:
df.head()

In [None]:
df.keys()

In [None]:
df[['dish_status.pop_ping_latency_ms']].describe(include='all')

In [None]:
df['dish_status.pop_ping_latency_ms'].plot(figsize=fig_size, style='.-', title='Starlink Latency (ms)')

In [None]:
# start_date = '2025-02-13 11:09:30'
# end_date = '2025-02-13 11:10:00'
# mask = (df['timestamp'] > start_date) & (df['timestamp'] <= end_date)
# df = df.loc[mask] 

In [None]:
df['dish_status.pop_ping_latency_ms'].plot(figsize=fig_size, style='.-', title='Starlink Latency (ms)')

In [None]:
df['dish_status.pop_ping_latency_ms'].plot(kind='hist', title="Starlink Latency")

In [None]:
df['dish_status.is_snr_above_noise_floor'].describe()

In [None]:
df['dish_status.is_snr_above_noise_floor'].plot(figsize=fig_size, style='.-', label='Starlink is_snr_above_noise_floor')

In [None]:
df['dish_status.is_snr_above_noise_floor'].describe()

In [None]:
df[['dish_status.is_snr_above_noise_floor']].plot(figsize=fig_size, style='.-', label='Starlink is_snr_above_noise_floor')

In [None]:
df[['dish_status.downlink_throughput_bps', 'dish_status.uplink_throughput_bps']].div(1024*1024).plot(figsize=fig_size, style='.-', title='Starlink DL and UL Throughput Mbps')

In [None]:
df[['dish_usage.download_usage', 'dish_usage.upload_usage']].div(1024*1024).plot(figsize=fig_size, style='.-', title='Starlink DL and UL Throughput MBps')

In [None]:
df['dish_status.fraction_obstructed'].plot(figsize=fig_size, style='.-', title='Starlink fraction_obstructed')

In [None]:
df['dish_status.currently_obstructed'].plot(figsize=fig_size, style='.-', title='Starlink currently_obstructed')

In [None]:
df['lat'] = df['dish_status.latitude']
df['lon'] = df['dish_status.longitude']

In [None]:
df_obstructed = df.query("`dish_status.currently_obstructed` == 1")

In [None]:
df_obstructed.info()

In [None]:
df_obstructed.head()

In [None]:
# 11:17:00 - 11:18:00
# 14:51:00 - 14:52:00

start_date = '2025-05-06 11:17:00'
end_date = '2025-05-06 11:18:00'
mask = (df['timestamp'] > start_date) & (df['timestamp'] <= end_date)
df_obstructed_part_1 = df.loc[mask] 

start_date = '2025-05-06 14:51:00'
end_date = '2025-05-06 14:52:00'
mask = (df['timestamp'] > start_date) & (df['timestamp'] <= end_date)
df_obstructed_part_2 = df.loc[mask] 

In [None]:
df_obstructed_part_1['dish_status.pop_ping_latency_ms'].plot(figsize=fig_size, style='.-', title='Starlink Latency (ms)')
df_obstructed_part_1[['dish_status.is_snr_above_noise_floor']].plot(figsize=fig_size, style='.-', label='Starlink is_snr_above_noise_floor')

In [None]:
df_obstructed_part_2['dish_status.pop_ping_latency_ms'].plot(figsize=fig_size, style='.-', title='Starlink Latency (ms)')
df_obstructed_part_2[['dish_status.is_snr_above_noise_floor']].plot(figsize=fig_size, style='.-', label='Starlink is_snr_above_noise_floor')

In [None]:
df_high_snr = df.query("`dish_status.is_snr_above_noise_floor` == 1")
df_high_snr.info()

In [None]:
df_low_snr = df.query("`dish_status.is_snr_above_noise_floor` == 0")

In [None]:
df_low_snr.info()

In [None]:
df_low_snr.head()

In [None]:
network_maps.get_map_CircleMarker(df_obstructed, 'Starlink-obstructed-2025-05-06', 'dish_status.pop_ping_latency_ms')
network_maps.get_map_CircleMarker(df_low_snr, 'Starlink-low-snr-2025-05-06', 'dish_status.pop_ping_latency_ms')

In [None]:
df.info()

In [None]:
df.query("`dish_status.pop_ping_latency_ms` > 50", inplace=True)

In [None]:
df.info()