# recslam

> Tools for EDA of visual data collected in the RecSLAM project.


In [2]:
# default_exp tools.recslam_eda

In [3]:
#| hide
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
# | export
# | hide

In [48]:
# | export
# | hide

# basic imports
from __future__ import annotations

# sys and paths imports
import json
import os
import sys
from pathlib import Path
from ds_contrib.core.paths import list_paths

# typing imports
from abc import ABC, abstractmethod
from typing import Any, Generic
from ds_contrib.core.utils import T
from dataclasses import dataclass

# cv and image imports
import cv2
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from PIL import Image as PImage

# visualization imports
from ds_contrib.core.plotting import plot

# widgets imports
from IPython.display import display
import ipywidgets as widgets
from ds_contrib.tools.browser import ImageBrowser
from ds_contrib.tools.io.gscloud import GSBrowser

In [49]:
# | hide

CWD = Path.cwd()
REPO_DIR = Path(*CWD.parts[: CWD.parts.index("ds_contrib") + 1])
CONFIGS_DIR = REPO_DIR / "configs"
ENV_DIR = CONFIGS_DIR / "env/local"

with open(CONFIGS_DIR / "storage/gscloud/projects_vars.json") as f:
    projects = json.load(f)

# choose project
project = projects["dev"]
env_path = Path(ENV_DIR / f'{project["env"]}_roadly.env')

_ = load_dotenv(env_path)  # read local .env file
google_app_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
roadly_cookie = os.getenv("ROADLY_COOKIE")
print(f"Initial configuration has finished:\nProject: {project}")

Initial configuration has finished:
Project: {'project': 'roadly-project-dev', 'env': 'dev', 'coldline_name': 'standard'}


In [50]:
def json_decoder(path: Path):
    with open(path, "r") as f:
        data = json.load(f)
    return data


def csv_decoder(path: Path):
    with open(path, "r") as f:
        data = pd.read_csv(f)
    return data

In [46]:
def get_reclslam_df(recslam_dir: Path, structure: dict):
    return pd.DataFrame.from_records(
        [(k, v["desc"], recslam_dir / v["path"]) for k, v in structure.items()],
        index="name",
        columns=["name", "desc", "path"],
    )

In [14]:
# df = get_reclslam_df(SHARED_ROOT, recslam_structure)

In [15]:
# device = DeviceMetadataDTO.from_json(df.loc["device"]["path"])

In [16]:
# device.ultra_wide_camera.fps

In [17]:
# csv_decoder(df.loc["heading"]["path"])

In [18]:
# browser.cleanup()

1. Choose reclslam path
2. Download video and meta
3. Split video to frames
4. Run video browser
5. Export dataset to csv


In [19]:
sampled_datasets = pd.read_csv("/Users/arseniy/Downloads/выборка.csv")

In [20]:
len(sampled_datasets)

123

In [21]:
sampled_datasets

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Path,Datetime,Country,City,Speed,Route,iri_mean,iri_var,...,ultrawide,video_data,name,local_path,fps,frame_count,duration,Решение,Причина,video_url
0,0,3652,Alanya/2022-07-17_09-05-31_4453D774-04F,2022-07-17 10:05:32,Turkey,,13.399114,"LINESTRING (29.113155 36.661371, 29.113136 36....",10.881955,36.128544,...,True,False,2022-07-17_09-05-31_4453D774-04F,data/downloads/2022-07-17_09-05-31_4453D774-04...,5.000158,529,105.796667,Не подходит?,Брусчатка,https://storage.cloud.google.com/roadly-dev-st...
1,1,3537,2022-12-17_15-33-16_83AB9828-5B0,2022-12-17 14:33:16,Uzbekistan,,13.874303,"LINESTRING (69.074585 41.123241, 69.074628 41....",11.176429,23.879309,...,True,False,2022-12-17_15-33-16_83AB9828-5B0,data/downloads/2022-12-17_15-33-16_83AB9828-5B...,5.000035,1200,239.998333,Подходит,,https://storage.cloud.google.com/roadly-dev-st...
2,2,3471,2022-12-17_16-12-01_83AB9828-5B0,2022-12-17 15:12:01,Uzbekistan,Tashkent,9.621945,"LINESTRING (69.250847 41.292218, 69.250845 41....",8.533605,19.737184,...,True,False,2022-12-17_16-12-01_83AB9828-5B0,data/downloads/2022-12-17_16-12-01_83AB9828-5B...,5.000072,575,114.998333,Подходит,,https://storage.cloud.google.com/roadly-dev-st...
3,3,14,2022-12-17_15-44-08_83AB9828-5B0,2022-12-17 14:44:08,Uzbekistan,,13.776954,"LINESTRING (69.136316 41.178227, 69.136369 41....",10.047226,18.226671,...,True,False,2022-12-17_15-44-08_83AB9828-5B0,data/downloads/2022-12-17_15-44-08_83AB9828-5B...,5.000074,1129,225.796667,Подходит,,https://storage.cloud.google.com/roadly-dev-st...
4,4,3936,2022-12-17_13-32-09_83AB9828-5B0,2022-12-17 12:32:10,Uzbekistan,,10.560251,"LINESTRING (69.0679 41.129758, 69.067904 41.12...",10.391300,16.992052,...,True,False,2022-12-17_13-32-09_83AB9828-5B0,data/downloads/2022-12-17_13-32-09_83AB9828-5B...,5.000132,316,63.198333,Подходит,,https://storage.cloud.google.com/roadly-dev-st...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,118,4086,Antalya/2022-03-08_12-00-17_4453,2022-03-08 13:00:16,Turkey,,8.116465,"LINESTRING (32.096358 36.488346, 32.096334 36....",3.158568,0.611353,...,True,False,2022-03-08_12-00-17_4453,data/downloads/2022-03-08_12-00-17_4453.mp4,5.000138,605,120.996667,Не подходит,Мало дефектов,https://storage.cloud.google.com/roadly-dev-st...
119,119,3805,Dubai/2022-05-28_22-55-34_2A24,2022-05-28 22:55:34,United Arab Emirates,,9.440000,"LINESTRING (55.37714 25.142569, 55.37721 25.14...",4.130825,0.607832,...,True,False,2022-05-28_22-55-34_2A24,data/downloads/2022-05-28_22-55-34_2A24.mp4,1.000000,7,7.000000,Не подходит,это просто стол,https://storage.cloud.google.com/roadly-dev-st...
120,120,4105,Antalya/2022-03-08_11-57-55_4453,2022-03-08 12:57:54,Turkey,,6.728861,"LINESTRING (32.100665 36.485887, 32.100655 36....",3.172593,0.463560,...,True,False,2022-03-08_11-57-55_4453,data/downloads/2022-03-08_11-57-55_4453.mp4,0.000000,0,0.000000,,,https://storage.cloud.google.com/roadly-dev-st...
121,121,3603,Alanya/2022-06-22_21-35-56_4453D774-04F,2022-06-22 21:35:57,Georgia,Batumi,9.462348,"LINESTRING (41.61358 41.60342, 41.61361 41.603...",3.623128,0.448510,...,True,False,2022-06-22_21-35-56_4453D774-04F,data/downloads/2022-06-22_21-35-56_4453D774-04...,5.000222,375,74.996667,Не подходит,Ночь\Лужи,https://storage.cloud.google.com/roadly-dev-st...


In [22]:
sampled_datasets.loc[sampled_datasets["name"] == "2022-02-21_13-18-38_4453"].iloc[
    0
].to_dict()

{'Unnamed: 0.1': 91,
 'Unnamed: 0': 4084,
 'Path': 'Antalya/2022-02-21_13-18-38_4453',
 'Datetime': '2022-02-21 14:18:39',
 'Country': 'Russia',
 'City': nan,
 'Speed': 18.26199036734694,
 'Route': 'LINESTRING (48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80355 55.75171, 48.80353 55.75172, 48.80349 55.75173, 48.80344 55.75175, 48.80339 55.75178, 48.80334 55.75181, 48.80329 55.75185, 48.80324 55.7519, 48.8032 55.75195, 48.80318 55.752, 48.80318 55.75205, 48.80318 55.75209, 48.80319 55.75214, 48.803364 55.752193, 48.80325 55.75226, 48.80332 55.75233, 48.80357 55.75244, 48.8037 55.75254, 48.80384 55.75266, 48.80402 55.75278, 48.80422 55.75292, 48.80443 55.753

In [23]:
items = sampled_datasets["name"].sort_values()

In [24]:
from abc import ABC, abstractmethod
from typing import Any


class Metadata(ABC):
    @abstractmethod
    def __getitem__(self, item) -> dict[str, Any]:
        raise NotImplementedError()


class DataFrameMetadata(Metadata):
    def __init__(self, df: pd.DataFrame, index_column=None):
        # Warning: this may slow down the init
        self._df = df.copy()
        if index_column:
            self._df.set_index(index_column, inplace=True)

    def __getitem__(self, item) -> dict[str, Any]:
        return self._df.loc[item].to_dict()

    def _repr_html_(self):
        return self._df._repr_html_()

In [25]:
meta = DataFrameMetadata(sampled_datasets, index_column="name")

In [26]:
sampled_datasets.iloc[0]

Unnamed: 0.1                                                    0
Unnamed: 0                                                   3652
Path                      Alanya/2022-07-17_09-05-31_4453D774-04F
Datetime                                      2022-07-17 10:05:32
Country                                                    Turkey
City                                                          NaN
Speed                                                   13.399114
Route           LINESTRING (29.113155 36.661371, 29.113136 36....
iri_mean                                                10.881955
iri_var                                                 36.128544
ride_quality                                      RideQuality.BAD
wide                                                         True
ultrawide                                                    True
video_data                                                  False
name                             2022-07-17_09-05-31_4453D774-04F
local_path

In [27]:
meta["2022-02-21_13-18-38_4453"]

{'Unnamed: 0.1': 91,
 'Unnamed: 0': 4084,
 'Path': 'Antalya/2022-02-21_13-18-38_4453',
 'Datetime': '2022-02-21 14:18:39',
 'Country': 'Russia',
 'City': nan,
 'Speed': 18.26199036734694,
 'Route': 'LINESTRING (48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80356 55.7517, 48.80355 55.75171, 48.80353 55.75172, 48.80349 55.75173, 48.80344 55.75175, 48.80339 55.75178, 48.80334 55.75181, 48.80329 55.75185, 48.80324 55.7519, 48.8032 55.75195, 48.80318 55.752, 48.80318 55.75205, 48.80318 55.75209, 48.80319 55.75214, 48.803364 55.752193, 48.80325 55.75226, 48.80332 55.75233, 48.80357 55.75244, 48.8037 55.75254, 48.80384 55.75266, 48.80402 55.75278, 48.80422 55.75292, 48.80443 55.753

In [28]:
import os
from typing import Any, Generic
import ipywidgets as widgets
from IPython.display import display

from ds_contrib.core.utils import T


class MetaDataBox:
    def __init__(self, meta: Metadata) -> None:
        self.item_meta_box = widgets.Textarea(
            value="",
            description="",
            disabled=True,
            layout=widgets.Layout(width="80%", height="30em"),
            placeholder="Row content will be displayed here",
        )
        self._meta = meta

    def set_item_meta(self, index_id):
        items = self._meta[index_id]
        if items:
            meta_str = "\n".join(
                [f"{k}: {v}" for k, v in sorted(items.items()) if k != "Route"]
            )
        else:
            meta_str = "Metadata is empty"
        self.item_meta_box.value = meta_str

    @property
    def widget(self):
        return self.item_meta_box


class ItemsDropdownWithMeta(Generic[T]):
    def __init__(
        self, items: list[T], meta: Metadata | None = None, description=None
    ) -> None:
        self._items = items
        self._meta = meta
        self._description = description
        self._setup_gui()
        self._setup_callbacks()

    def _setup_gui(self):
        self.dropdown = widgets.Dropdown(
            options=self._items,
            description=self._description,
            disabled=False,
        )
        self._metadata_box = MetaDataBox(self._meta)
        self._widget = widgets.VBox([self.dropdown, self._metadata_box.widget])

    def _setup_callbacks(self):
        self.dropdown.observe(self._cb_on_dropdown_change, names="value")

    def _cb_on_dropdown_change(self, change):
        self._metadata_box.set_item_meta(change.new)

    @property
    def widget(self):
        return self._widget

    @property
    def value(self) -> T:
        return self.dropdown.value

In [30]:
dropdown = ItemsDropdownWithMeta(
    items, meta=DataFrameMetadata(sampled_datasets, "name"), description="Videos"
)
display(dropdown.widget)

VBox(children=(Dropdown(description='Videos', options=('2021-11-21_15-17-06_4453', '2022-02-04_14-45-39_4453',…

In [31]:
chosen_video = sampled_datasets[sampled_datasets["name"] == dropdown.value][
    "video_url"
].values[0]

In [32]:
google_app_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
browser = GSBrowser(project=project["project"], credentials=google_app_creds)

In [33]:
browser.download_file(chosen_video)

In [36]:
from ds_contrib.core.paths import list_paths


LOCAL_RECSLAM_PATH = list(list_paths(browser.downloads_path, recursive=True))[0].parent

In [37]:
from ds_contrib.core.data.video import sample_frames_from_video


sample_frames_from_video(
    LOCAL_RECSLAM_PATH / "video_2", LOCAL_RECSLAM_PATH / "frames", time_delta=2
)

Sampling frames:   0%|          | 0/241 [00:00<?, ?it/s]

In [38]:
# from ds_contrib.core.paths import list_paths


# list(list_paths(LOCAL_RECSLAM_PATH / "frames", recursive=True, sort=True))

In [39]:
from ds_contrib.core.paths import list_paths
from ds_contrib.tools.browser import ImageBrowser


ib = ImageBrowser(
    list(list_paths(LOCAL_RECSLAM_PATH / "frames", recursive=True, sort=True))
)
ib.browse()

VBox(children=(Text(value='[1] Image: /Users/arseniy/Projects/dev/ds_contrib/dev_notebooks/annotation/tmpvs8rj…

In [47]:
# pd.read_csv("test.csv")

In [40]:
browser.downloads_dir.cleanup()