# Geophysical Navigation version 0.2.0

## Data Clean up

Switching over to a magnetic and gravity anomaly based navigation scheme. I have several recorded datasets from a cell phone stored under `/data`. This notebook is being used to clean up and sync that data.

In [2]:
import pandas as pd
import os
from matplotlib import pyplot as plt
from cartopy import crs as ccrs
from cartopy.io import img_tiles as cimgt


base_dir = os.path.join("data", "raw")

In [3]:
def clean_phone_data(dataset_path: str) -> pd.DataFrame:
    """
    Clean the sensor logger app data from the given dataset path.
    
    Parameters
    ----------
    dataset_path : str
        Path to the dataset file.

    Returns
    -------
    pd.DataFrame
        Cleaned DataFrame with relevant columns.
    """
    assert os.path.exists(dataset_path), f"File {dataset_path} does not exist."
    # Assert the needed .csv files exist
    assert os.path.exists(os.path.join(dataset_path, "Accelerometer.csv")), "Accelerometer.csv does not exist."
    assert os.path.exists(os.path.join(dataset_path, "Gyroscope.csv")), "Gyroscope.csv does not exist."
    assert os.path.exists(os.path.join(dataset_path, "Magnetometer.csv")), "Magnetometer.csv does not exist."
    assert os.path.exists(os.path.join(dataset_path, "Barometer.csv")), "Barometer.csv does not exist."
    assert os.path.exists(os.path.join(dataset_path, "Gravity.csv")), "Gravity.csv does not exist."
    try:
        assert os.path.exists(os.path.join(dataset_path, "LocationGps.csv")), "LocationGps.csv does not exist."
    except AssertionError:
        assert os.path.exists(os.path.join(dataset_path, "Location.csv")), "Location.csv does not exist."
    assert os.path.exists(os.path.join(dataset_path, "Orientation.csv")), "Orientation.csv does not exist."
    # Read in raw data
    accelerometer = pd.read_csv(os.path.join(dataset_path, "Accelerometer.csv"), index_col=0)
    gyroscope = pd.read_csv(os.path.join(dataset_path,     "Gyroscope.csv"), index_col=0)
    magnetometer = pd.read_csv(os.path.join(dataset_path,  "Magnetometer.csv"), index_col=0)
    barometer = pd.read_csv(os.path.join(dataset_path,     "Barometer.csv"), index_col=0)
    gravity = pd.read_csv(os.path.join(dataset_path,       "Gravity.csv"), index_col=0)
    try:
        location = pd.read_csv(os.path.join(dataset_path,      "LocationGps.csv"), index_col=0)
    except FileNotFoundError:
        location = pd.read_csv(os.path.join(dataset_path,      "Location.csv"), index_col=0)
    orientation = pd.read_csv(os.path.join(dataset_path,   "Orientation.csv"), index_col=0)
    # Convert index to datetime
    accelerometer.index = pd.to_datetime(accelerometer.index, utc=True)
    gyroscope.index = pd.to_datetime(gyroscope.index, utc=True)
    magnetometer.index = pd.to_datetime(magnetometer.index, utc=True)
    barometer.index = pd.to_datetime(barometer.index, utc=True)
    gravity.index = pd.to_datetime(gravity.index, utc=True)
    location.index = pd.to_datetime(location.index, utc=True)
    orientation.index = pd.to_datetime(orientation.index, utc=True)
    # Drop "seconds_elapsed" column
    accelerometer.drop(columns=["seconds_elapsed"], inplace=True)
    gyroscope.drop(columns=["seconds_elapsed"], inplace=True)
    magnetometer.drop(columns=["seconds_elapsed"], inplace=True)
    barometer.drop(columns=["seconds_elapsed"], inplace=True)
    gravity.drop(columns=["seconds_elapsed"], inplace=True)
    location.drop(columns=["seconds_elapsed"], inplace=True)
    orientation.drop(columns=["seconds_elapsed"], inplace=True)
    # Rename columns
    magnetometer = magnetometer.rename(columns={"x": "mag_x", "y": "mag_y", "z": "mag_z"})
    accelerometer = accelerometer.rename(columns={"x": "acc_x", "y": "acc_y", "z": "acc_z"})
    gyroscope = gyroscope.rename(columns={"x": "gyro_x", "y": "gyro_y", "z": "gyro_z"})
    gravity = gravity.rename(columns={"x": "grav_x", "y": "grav_y", "z": "grav_z"})
    # Merge dataframes
    data = location.copy()
    data = data.merge(orientation, left_index=True, right_index=True, how="outer")
    data = data.merge(accelerometer, left_index=True, right_index=True, how="outer")
    data = data.merge(gyroscope, left_index=True, right_index=True, how="outer")
    data = data.merge(magnetometer, left_index=True, right_index=True, how="outer")
    data = data.merge(barometer, left_index=True, right_index=True, how="outer")
    data = data.merge(gravity, left_index=True, right_index=True, how="outer")
    # Resample to 1Hz
    data = data.resample("1s").mean()
    data = data.dropna()
    return data   

In [7]:
datasets = os.listdir(base_dir)
os.makedirs(os.path.join("data", "cleaned"), exist_ok=True)
for dataset in datasets:
    dataset_path = os.path.join(base_dir, dataset)
    if os.path.isdir(dataset_path):
        cleaned_data = clean_phone_data(dataset_path)
        cleaned_data.to_csv(os.path.join("data", "cleaned", f"{dataset}.csv"))
        print(f"Cleaned data for {dataset} saved.")
    else:
        print(f"Skipping {dataset}, not a directory.")

    # Plot the route on a map
    # Get bounds and inflate by 10%``
    max_lat = cleaned_data["latitude"].max() + 0.1
    min_lat = cleaned_data["latitude"].min() - 0.1
    max_lon = cleaned_data["longitude"].max() + 0.1
    min_lon = cleaned_data["longitude"].min() - 0.1
    extent = [min_lon, max_lon, min_lat, max_lat]
    request = cimgt.GoogleTiles()
    ax = plt.axes(projection=request.crs)
    ax.set_extent(extent)
    ax.add_image(request, 10)
    ax.scatter(cleaned_data["longitude"], cleaned_data["latitude"], 0.5, color="red", transform=ccrs.PlateCarree())
    #ax.set_title(f"Route for {dataset}")
    plt.savefig(os.path.join("data", "cleaned", f"{dataset}_route.png"), dpi=500)
    plt.close()

Cleaned data for 2023-08-09_163741 saved.
Cleaned data for 2023-08-04_214758 saved.
Cleaned data for 2025-03-01_150426 saved.
Cleaned data for 2024-06-20_165550 saved.
Cleaned data for 2025-03-01_164639 saved.
Cleaned data for 2023-08-06_144805 saved.
Cleaned data for 2023-08-09_124742 saved.
