# Deep Water Bathymetric Particle Filter Experiment

This notebook runs the experiment testing the capabilities of the particle filter to conduct navigation using deep water bathymetry and for long duration.

## Data set preparation

First need to process the .m77t files in `source_data` into our database format.

In [1]:
from src.geophysical import m77t_toolbox as tbx
from src.geophysical import db_tools as db
import os
import pandas as pd

In [None]:
d, n = tbx.process_mgd77('./test')

In [None]:
d[0].head()

In [None]:
splits = tbx.split_and_validate_dataset(d[0], data_types=[["DG"]])

In [None]:
splits

### Preprocess tracklines into sql/df format

In [None]:
# Check and see if the .db directory exists
if not os.path.exists(".db"):
    os.mkdir(".db")

# Check to see if the database exists
if not os.path.exists(".db/tracklines.db"):
    tables = []
else:
    tables = db.get_tables(".db/tracklines.db")

source_data_location = "./source_data/"
# walk through the .m77t files in the source_data directory
for root, dirs, files in os.walk(source_data_location):
    for file in files:
        if file.endswith(".m77t"):
            # check to see if the file has already been processed
            filename = os.path.splitext(file)[0]
            if filename not in tables:
                print("Processing file: " + file)
                data = pd.read_csv(os.path.join(root, file), delimiter="\t", header=0)
                data = tbx.m77t_to_df(data)
                # data.to_sql(
                #    filename, sqlite3.connect(".db/tracklines.db"), if_exists="replace"
                # )
                tbx.save_dataset(
                    [data],
                    [filename],
                    output_location=".db",
                    output_format="db",
                    dataset_name="tracklines",
                )
            else:
                print("Skipping file: " + file + " (already processed)")

### Parse the raw data into tracklines of continuous data collections

In [None]:
max_time = 10  # minutes
max_delta_t = 2  # minutes between points
min_duration = 60  # minutes, minimum duration for a useful trackline

data, names = pdset.parse_tracklines_from_db(
    ".db/tracklines.db",
    # max_time,
    # max_delta_t,
    # min_duration,
    data_types=[
        "bathy",
        "mag",
        "grav",
        ["bathy", "mag"],
        ["bathy", "grav"],
        ["grav", "mag"],
        ["bathy", "grav", "mag"],
    ],
)
# Save the parsed data to the database
pdset.save_dataset(data, names, output_location=".db", output_format="db", dataset_name="parsed")
# summary = pdset.get_parsed_data_summary(data, names)

## Simulation parameters verification

First we need to tune the particle filter propagation noise to be similar to that of a marine-grade inertial navigation system. A low-end marine-grade INS should have a drift of 1 nm per 24 hours.

In [None]:
from src.particle_filter import rmse, propagate
import numpy as np

time = 24 * 60  # minutes
noise = np.array([0, 2.6, 0])
bound = 1852  # meters

errors = []
for v in range(1, 26):
    P = np.asarray([[0, 0, 0, 0, v, 0]])
    T = P.copy()
    t = 0
    for i in range(50000):
        # Eastward
        u = [0, v, 0]
        while t < time:
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0, 0, 0]), noise_calibration_mode=False)
            t += 1
        errors.append(rmse(P, T[0, :2]))
        # Northward
        u = [v, 0, 0]
        while t < time:
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0, 0, 0]), noise_calibration_mode=False)
            t += 1
        errors.append(rmse(P, T[0, :2]))
        # Northeastward
        u = np.array([1, 1, 0]) / np.linalg.norm([1, 1, 0])
        u *= v
        while t < time:
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0, 0, 0]), noise_calibration_mode=False)
            t += 1
        errors.append(rmse(P, T[0, :2]))

print(f"RMSE: {np.mean(errors)}")

In [None]:
from matplotlib import pyplot as plt

if not os.path.exists(".db/plots"):
    os.makedirs(".db/plots")

plt.hist(errors, bins=15, density=True)
plt.xlabel("RMSE (m)")
plt.ylabel("Frequency")
plt.title("RMSE of Particle Filter")
plt.savefig(".db/plots/propagation_tuning.png")
plt.show()

In [None]:
config = {"velocity_noise": [noise[1], noise[1], 0]}

## Develop measurement model

Next we need to develop the measurement value standard deviation. We'll first do some general examination of the data. Namely, investigating the sensor measurements to see if we can build a reasonable sensor model.

In [None]:
from src.gmt_tool import inflate_bounds, get_map_section, get_map_point
import numpy as np
import json

if os.path.exists("config.json"):
    config = json.load(open("config.json", "r"))

tables = pdset.get_tables(".db/parsed.db")
bathy_tables = [table for table in tables if "_D_" in table]

d_bathy = np.array([])

for table in bathy_tables:
    data = pdset.table_to_df(".db/parsed.db", table)
    min_lon = data.LON.min()
    max_lon = data.LON.max()
    min_lat = data.LAT.min()
    max_lat = data.LAT.max()
    min_lon, min_lat, max_lon, max_lat = inflate_bounds(min_lon, min_lat, max_lon, max_lat, 0.25)
    bathy_map = get_map_section(min_lon, max_lon, min_lat, max_lat, "relief", "15s", "temp")
    d_bathy = np.hstack([d_bathy, data["DEPTH"] - (-get_map_point(bathy_map, data.LON, data.LAT))])

config["bathy_mean_d"] = np.mean(d_bathy, where=~np.isnan(d_bathy))
config["bathy_std"] = np.std(d_bathy, where=~np.isnan(d_bathy))

if os.path.exists("config.json"):
    # delete the file
    os.remove("config.json")

with open("config.json", "w") as f:
    json.dump(config, f)

In [None]:
plt.hist(d_bathy, bins=200, density=True)
plt.xlim([-300, 300])
plt.xlabel("Depth Difference (m)")
plt.ylabel("Frequency")
plt.title("Bathymetry Difference")
plt.savefig(".db/plots/bathy_diff.png")
plt.show()

In [None]:
config

In [None]:
import json

with open("config.json", "w") as f:
    json.dump(config, f)

### Populate with velocities

In [None]:
from src.particle_filter import (
    process_particle_filter,
    populate_velocities,
    plot_error,
    plot_estimate,
    summarize_results,
)
import json
import src.process_dataset as pdset
from tqdm import tqdm
import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
tables = pdset.get_tables(".db/parsed.db")
bathy_tables = [table for table in tables if "_D_" in table]
config = json.load(open("config.json", "r"))
config["n"] = 1000
config["cov"] = [
    1 / 60,
    1 / 60,
    0,
    config["velocity_noise"][0],
    config["velocity_noise"][1],
    0,
]

with open("config.json", "w") as f:
    json.dump(config, f)

In [None]:
config

In [None]:
df = pdset.table_to_df(".db/parsed.db", bathy_tables[3])

In [None]:
len(df)

In [None]:
if not os.path.exists(".db/plots2/estimate/"):
    os.makedirs(".db/plots2/estimate/")

if not os.path.exists(".db/plots2/errors/"):
    os.makedirs(".db/plots2/errors/")

summary = None
annotations = {"recovery": 1852, "res": 1852 / 4}
# for table in tqdm(bathy_tables):
#     print(f"Running {table}")
#     df = pdset.table_to_df(".db/parsed.db", table)

df = populate_velocities(df)
results, geo_map = process_particle_filter(df, config)
print("Run complete! Saving results...")
pdset.save_dataset(
    [results],
    ["test"],
    output_location=".db",
    output_format="db",
    dataset_name="results",
)

In [None]:
print("Results saved! Plotting...")
fig, ax = plot_estimate(geo_map, results)
fig.savefig(f".db/plots2/estimate/test_estimate.png")
plt.close(fig)
fig, ax = plot_error(results, annotations=annotations)
fig.savefig(f".db/plots2/errors/test_error.png")
plt.close(fig)

In [None]:
summary = None
results_tables = pdset.get_tables(".db/results.db")
for table in results_tables:
    df = pdset.table_to_df(".db/results.db", table)
    run = summarize_results(df, 1852)
    run["Name"] = table
    if summary is None:
        summary = run  # .copy()
    else:
        summary = pd.concat([summary, run], ignore_index=True)
    summary.to_csv(".db/summary_recovery.csv")

In [None]:
summary = None
results_tables = pdset.get_tables(".db/results.db")
for table in results_tables:
    df = pdset.table_to_df(".db/results.db", table)
    run = summarize_results(df, 452)
    run["Name"] = table
    if summary is None:
        summary = run  # .copy()
    else:
        summary = pd.concat([summary, run], ignore_index=True)
    summary.to_csv(".db/summary_resolution.csv")

# Data Post Processing

Use this section to load and post process the results data set

In [None]:
import src.process_dataset as pdset
import pandas as pd
import matplotlib.pyplot as plt
from src.particle_filter import summarize_results
import os
from datetime import timedelta

In [None]:
results_tables = pdset.get_tables(".db/results.db")
len(results_tables)

In [None]:
for table in results_tables:
    table_df = pdset.table_to_df(".db/results.db", table)
    summary = summarize_results(table, table_df, 1852)
    summary.to_csv(
        ".db/plots/summary.csv",
        mode="a",
        header=(not os.path.exists(".db/plots/summary.csv")),
    )

In [None]:
summary = pd.read_csv(
    ".db/plots/summary.csv",
    header=0,
    dtype={
        "": int,
        "name": str,
        "start": str,
        "stop": str,
        "duration": str,
        "average_error": float,
        "max_error": float,
        "min_error": float,
    },
)
summary["num"] = summary["Unnamed: 0"]
summary = summary.drop(columns=["Unnamed: 0"])
# summary['start'] = pd.to_datetime(summary['start'], format="%Y-%m-%d %H:%M:%S%z")
# summary['end'] = pd.to_datetime(summary['end'], format="%Y-%m-%d %H:%M:%S%z")
summary["start"] = pd.to_timedelta(summary["start"])
summary["end"] = pd.to_timedelta(summary["end"])
summary["duration"] = pd.to_timedelta(summary["duration"])
summary.head()

len(summary)

In [None]:
summary.head()

In [None]:
recovery = summary.loc[summary["min error"] > 452]
len(recovery)

In [None]:
# check to see if all the tables in results_tables are present in summary["name"] and if not capture the missing tables
missing = []
for table in results_tables:
    if table not in summary["name"].values:
        missing.append(table)

total = len(results_tables)
num_recoveries = total - len(missing)
print(
    f"There are {total} total trajectories. We were able to recover at least one position fix below drift error in {num_recoveries} ({num_recoveries / total :0.4f}) trajectories."
)

In [None]:
pixel = summary.loc[summary["min error"] <= 452]

# check to see if the tables in pixel are present in summary["name"] and if not capture the missing tables
missing = []
for table in results_tables:
    if table not in pixel["name"].values:
        missing.append(table)
below_pixel_fixes = total - len(missing)

print(
    f"There are {len(pixel)} total below pixel resolution fixes. We were able to achieve at least one position estimate below drift error in {below_pixel_fixes} ({below_pixel_fixes/total :0.4f}) trajectories."
)
print(f"mean duration: {pixel['duration'].mean()} and median duration: {pixel['duration'].median()}")
print(f"mean error: {pixel['min error'].mean()} and median error: {pixel['min error'].median()}")
print(f"minium duration: {pixel['duration'].min()} and maximum duration: {pixel['duration'].max()}")
print(f"minimum error: {pixel['min error'].min()} and maximum error: {pixel['min error'].max()}")
print(f"mean start: {pixel['start'].mean()} and median start: {pixel['start'].median()}")

In [None]:
summary.sort_values(by="min error").head(3)

In [None]:
summary.sort_values(by="start").head(3)

In [None]:
summary.sort_values(by="duration").tail(3)

In [None]:
summary.sort_values(by="average_error").head(3)

In [None]:
# Find the line in summary that has the closest to the mean duration
summary.loc[abs(summary["duration"] - summary["duration"].median()) <= timedelta(minutes=1)]

In [None]:
summary["duration"].median()

In [None]:
print(
    f"On average we were able to recover a position fix with an mean duration of {summary['duration'].mean()}, median duration of {summary['duration'].median()} and a mean error of {summary['average_error'].mean()} and median error {summary['average_error'].median()}."
)

print(f"Minimum duration {summary['duration'].min()} and maximum duration {summary['duration'].max()}.")
print(f"Minimum error {summary['average_error'].min()} and maximum error {summary['average_error'].max()}.")

In [None]:
first = summary.loc[summary["num"] == 0]
# first.head()

print(
    f"The first position recover occurs with a mean of {first['start'].mean()} and median {first['start'].median()} after the start of the trajectory."
)
print(
    f"with an mean duration of {first['duration'].mean()}, median duration of {first['duration'].median()} and a mean error of {first['average_error'].mean()} and median error {first['average_error'].median()}."
)

print(f"Minimum duration {first['duration'].min()} and maximum duration {first['duration'].max()}.")
print(f"Minimum error {first['average_error'].min()} and maximum error {first['average_error'].max()}.")

# Gravity

Recreate the above simulation and measurment model development this time with gravity

In [None]:
from src.gmt_tool import inflate_bounds, get_map_section, get_map_point
import src.process_dataset as pdset
import numpy as np
import json
from matplotlib import pyplot as plt
import os

config = json.load(open("config.json", "r"))
tables = pdset.get_tables(".db/parsed.db")
gravity_tables = [table for table in tables if "_G_" in table]

In [None]:
d_gravity = np.array([])

for table in gravity_tables:
    data = pdset.table_to_df(".db/parsed.db", table)
    min_lon = data.LON.min()
    max_lon = data.LON.max()
    min_lat = data.LAT.min()
    max_lat = data.LAT.max()
    min_lon, min_lat, max_lon, max_lat = inflate_bounds(min_lon, min_lat, max_lon, max_lat, 0.25)
    gravity_map = get_map_section(min_lon, max_lon, min_lat, max_lat, "gravity", "01m", "temp")
    d_gravity = np.hstack([d_gravity, data["GRAV_ANOM"] - get_map_point(gravity_map, data.LON, data.LAT)])

config["gravity_mean_d"] = np.mean(d_gravity, where=~np.isnan(d_gravity))
config["gravity_std"] = np.std(d_gravity, where=~np.isnan(d_gravity))

if os.path.exists("config.json"):
    # delete the file
    os.remove("config.json")

with open("config.json", "w") as f:
    json.dump(config, f)

plt.hist(d_gravity, bins=100, density=True)
plt.xlim([-50, 75])
plt.xlabel("Gravity Difference (mGal)")
plt.ylabel("Frequency")
plt.title("Gravity Anomaly Difference")
plt.savefig(".db/plots/gravity_diff.png")
plt.show()

# Magnetics

Recreation with magnetics

In [None]:
from src.gmt_tool import inflate_bounds, get_map_section, get_map_point
import numpy as np
import json

config = json.load(open("config.json", "r"))
tables = pdset.get_tables(".db/parsed.db")
mag_tables = [table for table in tables if "_M_" in table]

df = pdset.table_to_df(".db/parsed.db", mag_tables[0])
df.head()

In [None]:
d_magnetics = np.array([])

for table in mag_tables:
    data = pdset.table_to_df(".db/parsed.db", table)
    min_lon = data.LON.min()
    max_lon = data.LON.max()
    min_lat = data.LAT.min()
    max_lat = data.LAT.max()
    min_lon, min_lat, max_lon, max_lat = inflate_bounds(min_lon, min_lat, max_lon, max_lat, 0.25)
    mag_map = get_map_section(min_lon, max_lon, min_lat, max_lat, "magnetic", "02m", "temp")
    d_magnetics = np.hstack([d_magnetics, data["MAG_RES"] - get_map_point(mag_map, data.LON, data.LAT)])

config["magnetic_mean_d"] = np.mean(d_magnetics, where=~np.isnan(d_magnetics))
config["magnetic_std"] = np.std(d_magnetics, where=~np.isnan(d_magnetics))

if os.path.exists("config.json"):
    # delete the file
    os.remove("config.json")

with open("config.json", "w") as f:
    json.dump(config, f)

In [None]:
plt.hist(d_magnetics, bins=100, density=True)
plt.xlim([-500, 500])
plt.xlabel("Magnetic Difference (nT)")
plt.ylabel("Frequency")
plt.title("Magnetic Residual Difference")
plt.savefig(".db/plots/mag_diff.png")
plt.show()

In [None]:
if not "":
    print("EMPTY")

In [None]:
st = [str(i) for i in range(10)]


In [None]:
ST = "".join(st)

In [None]:
ST

In [None]:
"A" in "AaBbCc"

In [2]:
trx, names = tbx.parse_trackline_from_file("./test/test_data.csv")

In [3]:
names

['test_data_0',
 'test_data_1',
 'test_data_2',
 'test_data_3',
 'test_data_4',
 'test_data_5',
 'test_data_6',
 'test_data_7',
 'test_data_8',
 'test_data_9',
 'test_data_10',
 'test_data_11',
 'test_data_12',
 'test_data_13',
 'test_data_14',
 'test_data_15',
 'test_data_16',
 'test_data_17',
 'test_data_18',
 'test_data_19',
 'test_data_20',
 'test_data_21',
 'test_data_22',
 'test_data_23',
 'test_data_24',
 'test_data_25',
 'test_data_26',
 'test_data_27',
 'test_data_28',
 'test_data_29',
 'test_data_30',
 'test_data_31',
 'test_data_32',
 'test_data_33',
 'test_data_34',
 'test_data_35',
 'test_data_36',
 'test_data_37',
 'test_data_38',
 'test_data_39',
 'test_data_40',
 'test_data_41',
 'test_data_42',
 'test_data_43',
 'test_data_44',
 'test_data_45',
 'test_data_46',
 'test_data_47',
 'test_data_48',
 'test_data_49',
 'test_data_50',
 'test_data_51',
 'test_data_52',
 'test_data_53',
 'test_data_54',
 'test_data_55',
 'test_data_56',
 'test_data_57',
 'test_data_58',
 'test_

In [4]:
if not []:
    print("EMPTY")

EMPTY


In [6]:
"G" in "DMG"

True

In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
pd.read_csv("78123006.csv")

Unnamed: 0.1,Unnamed: 0,SURVEY_ID,LAT,LON,BAT_TTIME,CORR_DEPTH,BAT_CPCO,BAT_TYPCO,MAG_TOT,MAG_RES,FREEAIR
0,1979-05-19 02:30:00+00:00,78123006,16.22972,-99.81965,7.117,5354.0,43.0,1,41081.0,69.0,-128.3
1,1979-05-19 02:45:00+00:00,78123006,16.18094,-99.81402,7.044,5298.0,43.0,1,40930.0,-50.0,-123.3
2,1979-05-19 03:00:00+00:00,78123006,16.13214,-99.80842,6.460,4851.0,43.0,1,40871.0,-77.0,-114.3
3,1979-05-19 03:30:00+00:00,78123006,16.09372,-99.72320,6.227,4673.0,43.0,1,40874.0,-57.0,-103.7
4,1979-05-19 11:30:00+00:00,78123006,15.49778,-98.23145,6.157,4619.0,43.0,1,40776.0,85.0,-97.4
...,...,...,...,...,...,...,...,...,...,...,...
169,1979-06-14 09:30:00+00:00,78123006,0.37312,-90.02423,2.047,1527.0,41.0,1,32170.0,-249.0,-2.7
170,1979-06-14 10:00:00+00:00,78123006,0.29111,-90.02782,0.982,736.0,41.0,1,31783.0,-599.0,37.9
171,1979-06-14 11:00:00+00:00,78123006,0.12539,-90.03213,2.069,1543.0,41.0,1,32310.0,1.0,-21.8
172,1979-06-14 12:00:00+00:00,78123006,-0.05231,-90.03878,1.099,823.0,41.0,1,32236.0,5.0,11.5


In [3]:
import sqlalchemy as sa

In [6]:
# Connect to azurite database
engine = sa.create_engine("sql:http://127.0.0.1:10002/devstoreaccount1")

ArgumentError: Could not parse SQLAlchemy URL from string 'sql:http://127.0.0.1:10002/devstoreaccount1'