In [1]:
%cd ..

/home/romet/projects/ut/milrem/waypoint_planner


In [2]:
import yaml
import tqdm.auto as tqdm
from pathlib import Path

import numpy as np
import pandas as pd
from collections import defaultdict 

from data.milrem_tracks import CLEANED_TRACKS
from data.process_milrem import get_metadata

import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

In [3]:
def calculate_distance(df):
    df['delta_x'] = df['camera_position_x'].diff()
    df['delta_y'] = df['camera_position_y'].diff()
    df['distance'] = np.sqrt(df['delta_x']**2 + df['delta_y']**2)
    total_distance = df['distance'].dropna().sum()
    return total_distance

def calculate_gnns_distance(df):
    df['delta_x'] = df['camera_position_x'].diff()
    df['delta_y'] = df['camera_position_y'].diff()
    df['distance'] = np.sqrt(df['delta_x']**2 + df['delta_y']**2)
    total_distance = df['distance'].dropna().sum()
    return total_distance

In [4]:
POSITIONAL_TOLERANCE = 2.0

def def_value(): 
    return {'unclean': 0.0, 'clean': 0.0} 

stats = defaultdict(def_value)
for track in tqdm.tqdm(CLEANED_TRACKS, desc="Trajectories processed"):
    if track[2] == 'train':
        track_path = Path("/home/romet/data2/datasets/extracted_datasets") / track[0]
        data_range = track[1]
        
        raw_metadata = pd.read_csv(track_path / 'csv/extracted_data.csv')
        raw_metadata.sort_values(by=["timestamp"], inplace=True)
        raw_metadata.reset_index(inplace=True)
        filtered_data = raw_metadata.dropna(subset=['camera_position_x', 'camera_position_y', 'camera_position_z']).copy()
    
        filtered_data['diff_x'] = filtered_data['camera_position_x'].diff().abs()
        filtered_data['diff_y'] = filtered_data['camera_position_y'].diff().abs()
        filtered_data['diff_z'] = filtered_data['camera_position_z'].diff().abs()
    
        filtered_metadata = filtered_data[(filtered_data['diff_x'] < POSITIONAL_TOLERANCE) &
                                     (filtered_data['diff_y'] < POSITIONAL_TOLERANCE) &
                                     (filtered_data['diff_z'] < POSITIONAL_TOLERANCE)]
        
        raw_len = calculate_distance(filtered_metadata)
    
        cleaned_metadata = get_metadata(track_path, data_range)
        cleaned_len = calculate_distance(cleaned_metadata)
        percentage = (cleaned_len / raw_len) * 100
        
        track_stat = stats[track[0]]
        track_stat['unclean'] = raw_len
        track_stat['clean'] += cleaned_len
        print(f"{track[0]} {raw_len:.0f}m | {cleaned_len:.0f}m | {percentage:.2f}%")

Trajectories processed:   0%|          | 0/92 [00:00<?, ?it/s]

2023-04-12-16-02-01 2561m | 2281m | 89.07%
2023-04-13-16-50-11 3387m | 2825m | 83.42%
2023-04-19-15-22-36 2929m | 2558m | 87.34%
2023-04-20-17-33-33 1975m | 1806m | 91.42%
2023-04-27-16-42-40 1676m | 1343m | 80.15%
2023-05-03-19-07-25 1926m | 901m | 46.78%
2023-05-03-19-07-25 1926m | 307m | 15.93%
2023-05-04-15-58-50 2315m | 1258m | 54.36%
2023-05-10-15-41-04 1328m | 917m | 69.07%
2023-05-11-17-08-21 662m | 659m | 99.55%
2023-05-17-15-30-02 625m | 385m | 61.63%
2023-05-18-16-40-47 496m | 392m | 79.09%
2023-05-18-16-57-00 1564m | 1365m | 87.28%
2023-05-23-15-40-24 2355m | 1810m | 76.84%
2023-05-25-16-33-18 2604m | 1655m | 63.56%
2023-05-30-15-42-35 2011m | 1821m | 90.58%
2023-06-01-18-10-55 2387m | 2076m | 86.99%
2023-06-06-15-41-21 2815m | 249m | 8.85%
2023-06-06-15-41-21 2815m | 1312m | 46.61%
2023-06-08-18-50-17 975m | 813m | 83.44%
2023-06-13-15-14-21 1089m | 252m | 23.12%
2023-06-13-15-14-21 1089m | 216m | 19.82%
2023-06-13-15-49-17 1983m | 212m | 10.70%
2023-06-13-15-49-17 1983m |

In [5]:
total_clean = 0.0
total_unclean = 0.0

print("track name          | uncleaned | cleaned | percentage")
print("------------------------------------------------------")
for track, track_stats in stats.items():
    clean = track_stats['clean']
    unclean = track_stats['unclean']
    percentage = (clean / unclean) * 100
    print(f"{track} | {unclean:6.0f}m   | {clean:6.0f}m | {percentage:3.2f}%")

    total_clean += clean
    total_unclean += unclean

print("------------------------------------------------------")
total_percentage = (total_clean / total_unclean) * 100
print(f"Total               | {total_unclean:6.0f}m   | {total_clean:6.0f}m | {total_percentage:3.2f}%")

track name          | uncleaned | cleaned | percentage
------------------------------------------------------
2023-04-12-16-02-01 |   2561m   |   2281m | 89.07%
2023-04-13-16-50-11 |   3387m   |   2825m | 83.42%
2023-04-19-15-22-36 |   2929m   |   2558m | 87.34%
2023-04-20-17-33-33 |   1975m   |   1806m | 91.42%
2023-04-27-16-42-40 |   1676m   |   1343m | 80.15%
2023-05-03-19-07-25 |   1926m   |   1208m | 62.71%
2023-05-04-15-58-50 |   2315m   |   1258m | 54.36%
2023-05-10-15-41-04 |   1328m   |    917m | 69.07%
2023-05-11-17-08-21 |    662m   |    659m | 99.55%
2023-05-17-15-30-02 |    625m   |    385m | 61.63%
2023-05-18-16-40-47 |    496m   |    392m | 79.09%
2023-05-18-16-57-00 |   1564m   |   1365m | 87.28%
2023-05-23-15-40-24 |   2355m   |   1810m | 76.84%
2023-05-25-16-33-18 |   2604m   |   1655m | 63.56%
2023-05-30-15-42-35 |   2011m   |   1821m | 90.58%
2023-06-01-18-10-55 |   2387m   |   2076m | 86.99%
2023-06-06-15-41-21 |   2815m   |   1561m | 55.45%
2023-06-08-18-50-17 |  

In [6]:
track_path = Path("/home/romet/data2/datasets/extracted_datasets") / "2023-08-25-15-48-47"
raw_metadata = pd.read_csv(track_path / 'csv/extracted_data.csv')

In [7]:
9120/85349

0.10685538201970732

In [8]:
Total               |  65798m   |   9120m | 13.86%

SyntaxError: invalid syntax (3832540540.py, line 1)