In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import sys
import gc
import shutil
from tqdm.auto import tqdm

cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
base_dir = os.path.dirname(parent_dir)
src_dir = base_dir + "/src"

sys.path.insert(0, src_dir)

from data_analysis import data_processing, load_data, convert_frames, correct_sensor_bias, load_octomag_format, load_navion_format, load_raw_set_of_pkls, plot_positions, apply_navion_transform

In [None]:
# We run this analysis solely on the Navion dataset
data_dir = base_dir + "/data/navion_data/split_dataset/training_data.pkl"

In [None]:
data = pd.read_pickle(data_dir)
data = apply_navion_transform(data)

plot_positions(data, x="x_trans", y="y_trans", z="z_trans")

## Divide based on cylindrical distances

In [None]:
# Compute cylindrical distance from the z-axis
data_cyl = data.copy()
data_cyl['cyl_dist'] = np.sqrt(data_cyl['x_trans']**2 + data_cyl['y_trans']**2)

In [None]:
# Define num segments
num_segments = 3

#  percentile / quantile edges (exclude 0% and 100%)
qs = np.linspace(0, 1, num_segments + 1)[1:-1]          # [1/4, 2/4, 3/4]
edges = np.quantile(data_cyl['cyl_dist'], qs, method="linear")              # boundaries radii

# assign segment index: 0..num_segments-1
# segment 0: r < edges[0], segment 1: edges[0] <= r < edges[1], ...
data_cyl["segment_idx"] = np.digitize(data_cyl['cyl_dist'], edges, right=False)

print(data_cyl["segment_idx"].value_counts().sort_index())

In [None]:
plot_positions(data_cyl, x="x_trans", y="y_trans", z="z_trans", column_colored="segment_idx")

## Divide based on y distance

In [None]:
data_lin = data.copy()

In [None]:
# Define num segments
num_segments = 4

#  percentile / quantile edges (exclude 0% and 100%)
qs = np.linspace(0, 1, num_segments + 1)[1:-1]          # [1/4, 2/4, 3/4]
edges = np.quantile(data_lin['y'], qs, method="linear")              # boundaries radii

# assign segment index: 0..num_segments-1
# segment 0: r < edges[0], segment 1: edges[0] <= r < edges[1], ...
data_lin["segment_idx"] = np.digitize(data_lin['y'], edges, right=False)
print(data_lin["segment_idx"].value_counts().sort_index())

In [None]:
plot_positions(data_lin, x="x_trans", y="y_trans", z="z_trans", column_colored="segment_idx")

## Store

In [None]:
# Extract name
file_name, _ = os.path.splitext(data_dir)

def store_segmented_data(data, segment_type, file_name):

    for segment in data_cyl["segment_idx"].unique():
        df_segment = data_cyl[data_cyl["segment_idx"] == segment].copy()
        df_segment = df_segment[data.columns]  # keep only original columns
        df_segment.reset_index(drop=True, inplace=True)

        segment_file_path = f"{file_name}_{segment_type}_{segment}.pkl"

        df_segment.to_pickle(segment_file_path)
        print(f"Segment {segment} data stored at: {segment_file_path}")

In [None]:
# For cylindrical segmentation
store_segmented_data(data_cyl, "cyl", file_name)

In [None]:
# For linear segmentation
store_segmented_data(data_lin, "lin", file_name)