# Deep Water Bathymetric Particle Filter Experiment

This notebook runs the experiment testing the capabilities of the particle filter to conduct navigation using deep water bathymetry and for long duration.

## Data set preparation

First need to process the .m77t files in `source_data` into our database format.

In [None]:
from src import process_dataset as pdset
import os
import pandas as pd

### Preprocess tracklines into sql/df format

In [None]:
# Check and see if the .db directory exists
if not os.path.exists(".db"):
    os.mkdir(".db")
    
# Check to see if the database exists
if not os.path.exists('.db/tracklines.db'):
    tables = []
else:
    tables = pdset.get_tables('.db/tracklines.db')

source_data_location = "./source_data/"
# walk through the .m77t files in the source_data directory
for root, dirs, files in os.walk(source_data_location):
    for file in files:
        if file.endswith(".m77t"):
            # check to see if the file has already been processed
            filename = os.path.splitext(file)[0]
            if filename not in tables:
                print("Processing file: " + file)
                data = pd.read_csv(os.path.join(root, file), delimiter="\t", header=0)
                data = pdset.m77t_to_df(data)
                # data.to_sql(
                #    filename, sqlite3.connect(".db/tracklines.db"), if_exists="replace"
                # )
                pdset.save_dataset(
                    [data],
                    [filename],
                    output_location=".db",
                    output_format="db",
                    dataset_name="tracklines",
                )
            else:
                print("Skipping file: " + file + " (already processed)")

### Parse the raw data into tracklines of continuous data collections

In [None]:
max_time = 10 # minutes
max_delta_t = 2 # minutes between points
min_duration = 60 # minutes, minimum duration for a useful trackline

data, names = pdset.parse_tracklines_from_db(
    ".db/tracklines.db",
    #max_time,
    #max_delta_t,
    #min_duration,
    data_types=[
        "bathy",
        "mag",
        "grav",
        ["bathy", "mag"],
        ["bathy", "grav"],
        ["grav", "mag"],
        ["bathy", "grav", "mag"],
    ],
)
# Save the parsed data to the database
pdset.save_dataset(
    data, names, output_location=".db", output_format="db", dataset_name="parsed"
)
#summary = pdset.get_parsed_data_summary(data, names)

## Simulation parameters verification

First we need to tune the particle filter propagation noise to be similar to that of a marine-grade inertial navigation system. A low-end marine-grade INS should have a drift of 1 nm per 24 hours.

In [None]:
from src.particle_filter import rmse, propagate
import numpy as np

time = 24*60 # minutes
noise = np.array([0, 2.6, 0])
bound = 1852 # meters

errors = []
for v in range(1, 26):
    P = np.asarray([[0,0,0,0,v,0]])
    T = P.copy()
    t = 0
    for i in range(50000):
        # Eastward
        u = [0,v,0]
        while t < time:    
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0,0,0]), noise_calibration_mode=False)
            t+=1
        errors.append(rmse(P, T[0, :2]))
        # Northward
        u = [v,0,0]
        while t < time:    
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0,0,0]), noise_calibration_mode=False)
            t+=1
        errors.append(rmse(P, T[0, :2]))
        # Northeastward 
        u = np.array([1,1,0]) / np.linalg.norm([1,1,0])
        u *= v
        while t < time:    
            P = propagate(P, u, noise=np.diag(noise), noise_calibration_mode=True)
            T = propagate(T, u, noise=np.diag([0,0,0]), noise_calibration_mode=False)
            t+=1
        errors.append(rmse(P, T[0, :2]))

print(f"RMSE: {np.mean(errors)}")

In [None]:
from matplotlib import pyplot as plt

if not os.path.exists(".db/plots"):
    os.makedirs(".db/plots")

plt.hist(errors, bins=15, density=True)
plt.xlabel("RMSE (m)")
plt.ylabel("Frequency")
plt.title("RMSE of Particle Filter")
plt.savefig(".db/plots/propagation_tuning.png")
plt.show()

In [None]:
config = {"velocity_noise": [noise[1], noise[1], 0]}

Next we need to develop the measurement value standard deviation. We'll first do some general examination of the data. Namely, investigating the sensor measurements to see if we can build a reasonable sensor model.

In [None]:
from src.gmt_tool import inflate_bounds, get_map_section, get_map_point
import numpy as np

tables = pdset.get_tables(".db/parsed.db")
bathy_tables = [table for table in tables if "_D_" in table]

d_bathy = np.array([])

for table in bathy_tables:
    data = pdset.table_to_df(".db/parsed.db", table)
    min_lon = data.LON.min()
    max_lon = data.LON.max()
    min_lat = data.LAT.min()
    max_lat = data.LAT.max()
    min_lon, min_lat, max_lon, max_lat = inflate_bounds(min_lon, min_lat, max_lon, max_lat, 0.25)
    bathy_map = get_map_section(min_lon, max_lon, min_lat, max_lat, 'relief', '15s', "temp")
    d_bathy = np.hstack([d_bathy, data['DEPTH'] - (-get_map_point(bathy_map, data.LON, data.LAT))])

config['bathy_mean_d'] = np.mean(d_bathy, where=~np.isnan(d_bathy))
config['bathy_std'] = np.std(d_bathy, where=~np.isnan(d_bathy))

plt.hist(d_bathy, bins=100, density=True)
plt.xlim([-250,250])
plt.xlabel("Depth Difference (m)")
plt.ylabel("Frequency")
plt.title("Bathymetry Difference")
plt.savefig(".db/plots/bathy_diff.png")
plt.show()

In [None]:
config

In [None]:
import json

with open("config.json", "w") as f:
    json.dump(config, f)

### Populate with velocities

In [None]:
from src.particle_filter import process_particle_filter, populate_velocities, plot_error, plot_estimate, summarize_results
import json
import src.process_dataset as pdset
from tqdm import tqdm
import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
tables = pdset.get_tables(".db/parsed.db")
bathy_tables = [table for table in tables if "_D_" in table]
config = json.load(open("config.json", "r"))
config["n"] = 1000
config["cov"] = [1/60, 1/60, 0, config["velocity_noise"][0], config["velocity_noise"][1], 0]

with open("config.json", "w") as f:
    json.dump(config, f)

In [None]:
config

In [None]:
df = pdset.table_to_df(".db/parsed.db", bathy_tables[3])

In [None]:
len(df)

In [None]:
if not os.path.exists('.db/plots2/estimate/'):
    os.makedirs('.db/plots2/estimate/')

if not os.path.exists('.db/plots2/errors/'):
    os.makedirs('.db/plots2/errors/')

summary = None
annotations = {"recovery": 1852, "res":1852/4}
# for table in tqdm(bathy_tables):
#     print(f"Running {table}")
#     df = pdset.table_to_df(".db/parsed.db", table)
    
df = populate_velocities(df)
results, geo_map = process_particle_filter(df, config)
print("Run complete! Saving results...")
pdset.save_dataset(
    [results], ["test"], output_location=".db", output_format="db", dataset_name="results"
)


In [None]:
print("Results saved! Plotting...")
fig, ax = plot_estimate(geo_map, results)
fig.savefig(f".db/plots2/estimate/test_estimate.png")
plt.close(fig)
fig, ax = plot_error(results, annotations=annotations)
fig.savefig(f".db/plots2/errors/test_error.png")
plt.close(fig)

In [None]:
summary = None
results_tables = pdset.get_tables(".db/results.db")
for table in results_tables:
    df = pdset.table_to_df(".db/results.db", table)
    run = summarize_results(df, 1852)
    run["Name"] = table
    if summary is None:
        summary = run#.copy()
    else:
        summary = pd.concat([summary, run], ignore_index=True)
    summary.to_csv(".db/summary_recovery.csv")

In [None]:
summary = None
results_tables = pdset.get_tables(".db/results.db")
for table in results_tables:
    df = pdset.table_to_df(".db/results.db", table)
    run = summarize_results(df, 452)
    run["Name"] = table
    if summary is None:
        summary = run#.copy()
    else:
        summary = pd.concat([summary, run], ignore_index=True)
    summary.to_csv(".db/summary_resolution.csv")

# Data Post Processing

Use this section to load and post process the results data set

In [41]:
import src.process_dataset as pdset
import pandas as pd
import matplotlib.pyplot as plt
from src.particle_filter import summarize_results
import os
from datetime import timedelta

In [5]:
results_tables = pdset.get_tables(".db/results.db")
len(results_tables)

247

In [6]:
for table in results_tables:
    table_df = pdset.table_to_df(".db/results.db", table)
    summary = summarize_results(table, table_df, 1852)
    summary.to_csv(
        ".db/plots/summary.csv",
        mode="a",
        header=(not os.path.exists(".db/plots/summary.csv")),
    )

In [7]:
summary = pd.read_csv(".db/plots/summary.csv", header=0, dtype={"": int, "name":str, "start": str, "stop": str, "duration":str, "average_error":float, "max_error":float, "min_error":float})
summary["num"] = summary["Unnamed: 0"]
summary = summary.drop(columns=["Unnamed: 0"])
#summary['start'] = pd.to_datetime(summary['start'], format="%Y-%m-%d %H:%M:%S%z")
#summary['end'] = pd.to_datetime(summary['end'], format="%Y-%m-%d %H:%M:%S%z")
summary['start'] = pd.to_timedelta(summary['start'])
summary['end'] = pd.to_timedelta(summary['end'])
summary['duration'] = pd.to_timedelta(summary['duration'])
summary.head()

len(summary)

834

In [8]:
recovery = summary.loc[summary["min error"] > 452]
len(recovery)

606

In [9]:
# check to see if all the tables in results_tables are present in summary["name"] and if not capture the missing tables
missing = []
for table in results_tables:
    if table not in summary["name"].values:
        missing.append(table)

total = len(results_tables)
num_recoveries = total - len(missing)
print(f"There are {total} total trajectories. We were able to recover at least one position fix below drift error in {num_recoveries} ({num_recoveries / total :0.4f}) trajectories.")

There are 247 total trajectories. We were able to recover at least one position fix below drift error in 189 (0.7652) trajectories.


In [20]:
pixel = summary.loc[summary["min error"] <= 452]

# check to see if the tables in pixel are present in summary["name"] and if not capture the missing tables
missing = []
for table in results_tables:
    if table not in pixel["name"].values:
        missing.append(table)
below_pixel_fixes = len(pixel) - len(missing)

print(f"There are {len(pixel)} total below pixel resolution fixes. We were able to recover at least one position fix below drift error in {below_pixel_fixes} ({below_pixel_fixes/total :0.4f}) trajectories.")
print(f"mean duration: {pixel['duration'].mean()} and median duration: {pixel['duration'].median()}")
print(f"mean error: {pixel['min error'].mean()} and median error: {pixel['min error'].median()}")
print(f"minium duration: {pixel['duration'].min()} and maximum duration: {pixel['duration'].max()}")
print(f"minimum error: {pixel['min error'].min()} and maximum error: {pixel['min error'].max()}")
print(f"mean start: {pixel['start'].mean()} and median start: {pixel['start'].median()}")

There are 228 total below pixel resolution fixes. We were able to recover at least one position fix below drift error in 75 (0.3036) trajectories.
mean duration: 0 days 02:58:00.526315789 and median duration: 0 days 01:42:00
mean error: 292.35901430634124 and median error: 284.7940571599156
minium duration: 0 days 00:06:00 and maximum duration: 1 days 04:24:00
minimum error: 61.216608610523686 and maximum error: 451.9352437259879
mean start: 0 days 18:37:17.368421052 and median start: 0 days 05:42:30


In [36]:
summary.sort_values(by="min error").head(3)

Unnamed: 0,name,start,end,duration,average_error,min error,max error,num
495,index17_D_24,2 days 05:50:00,2 days 06:58:00,0 days 01:08:00,622.251331,61.216609,1747.333128,2
705,ew0114_D_11,0 days 09:29:00,0 days 13:11:00,0 days 03:42:00,858.262958,78.277494,1789.011131,7
663,wi933014_D_4,3 days 01:26:00,4 days 05:50:00,1 days 04:24:00,524.449202,101.456949,1837.765265,17


In [35]:
summary.sort_values(by="start").head(3)

Unnamed: 0,name,start,end,duration,average_error,min error,max error,num
489,index17_D_21,0 days 00:01:00,0 days 20:02:00,0 days 20:01:00,1343.368793,1118.905119,1809.413179,0
368,index17_D_3,0 days 00:01:00,0 days 04:52:00,0 days 04:51:00,716.910056,375.996868,1394.561033,0
295,78123006_D_89,0 days 00:07:00,0 days 02:10:00,0 days 02:03:00,1034.328965,603.635294,1390.767323,0


In [37]:
summary.sort_values(by="duration").head(3)

Unnamed: 0,name,start,end,duration,average_error,min error,max error,num
633,wi933014_D_40,0 days 01:28:00,0 days 01:29:00,0 days 00:01:00,1795.937051,1795.937051,1795.937051,2
808,wi933014_D_2,2 days 06:21:00,2 days 06:22:00,0 days 00:01:00,1851.429489,1851.429489,1851.429489,34
343,78123006_D_99,0 days 04:32:00,0 days 04:33:00,0 days 00:01:00,1319.921074,1319.921074,1319.921074,5


In [38]:
summary.sort_values(by="average_error").head(3)

Unnamed: 0,name,start,end,duration,average_error,min error,max error,num
485,ew0102_D_7,0 days 18:19:00,0 days 18:25:00,0 days 00:06:00,318.473064,234.463285,435.610345,21
763,ew0114_D_14,0 days 17:31:00,0 days 17:49:00,0 days 00:18:00,373.538106,241.040038,602.822923,17
47,wi933014_D_58,0 days 00:35:00,0 days 01:45:00,0 days 01:10:00,376.634907,162.525832,1368.393369,0


In [43]:
# Find the line in summary that has the closest to the mean duration
summary.loc[abs(summary["duration"] - summary["duration"].mean()) <= timedelta(minutes=1)]

Unnamed: 0,name,start,end,duration,average_error,min error,max error,num
44,wi933014_D_67,0 days 00:10:00,0 days 01:24:00,0 days 01:14:00,548.896266,303.288242,1537.028573,0
132,ew0102_D_5,0 days 09:10:00,0 days 10:23:00,0 days 01:13:00,881.868665,257.672911,1825.170658,10
138,ew0102_D_5,0 days 13:59:00,0 days 15:12:00,0 days 01:13:00,962.407998,457.759753,1620.329396,16
154,78123006_D_14,0 days 07:55:00,0 days 09:09:00,0 days 01:14:00,1228.805338,904.518903,1821.616206,5
296,78123006_D_87,0 days 01:07:00,0 days 02:20:00,0 days 01:13:00,1150.613494,713.402585,1751.733174,0
400,index17_D_16,0 days 01:26:00,0 days 02:39:00,0 days 01:13:00,826.778716,329.396133,1749.68641,0
537,index17_D_24,6 days 19:13:00,6 days 20:26:00,0 days 01:13:00,1158.909023,581.759518,1846.972948,44


In [44]:
summary["duration"].mean()

Timedelta('0 days 01:13:57.985611510')

In [15]:
print(f"On average we were able to recover a position fix with an mean duration of {summary['duration'].mean()}, median duration of {summary['duration'].median()} and a mean error of {summary['average_error'].mean()} and median error {summary['average_error'].median()}.")

print(f"Minimum duration {summary['duration'].min()} and maximum duration {summary['duration'].max()}.")
print(f"Minimum error {summary['average_error'].min()} and maximum error {summary['average_error'].max()}.")

On average we were able to recover a position fix with an mean duration of 0 days 01:13:57.985611510, median duration of 0 days 00:28:00 and a mean error of 1229.5081611263831 and median error 1260.5435978393161.
Minimum duration 0 days 00:01:00 and maximum duration 1 days 04:24:00.
Minimum error 318.47306422550645 and maximum error 1851.914764547153.


In [16]:
first = summary.loc[summary["num"] == 0]
#first.head()

print(f"The first position recover occurs with a mean of {first['start'].mean()} and median {first['start'].median()} after the start of the trajectory.")
print(f"with an mean duration of {first['duration'].mean()}, median duration of {first['duration'].median()} and a mean error of {first['average_error'].mean()} and median error {first['average_error'].median()}.")

print(f"Minimum duration {first['duration'].min()} and maximum duration {first['duration'].max()}.")
print(f"Minimum error {first['average_error'].min()} and maximum error {first['average_error'].max()}.")

The first position recover occurs with a mean of 0 days 02:00:52.698412698 and median 0 days 00:51:00 after the start of the trajectory.
with an mean duration of 0 days 01:57:46.031746031, median duration of 0 days 00:59:00 and a mean error of 1123.8220877382257 and median error 1125.1604285138226.
Minimum duration 0 days 00:01:00 and maximum duration 1 days 00:34:00.
Minimum error 376.63490730656713 and maximum error 1821.303947921932.
