In [1]:
import sys
sys.path.append('..')

from root import ROOT
from data.fwf_dataset import FwfDataset

from omegaconf import OmegaConf
import os
import json
import numpy as np
import pandas as pd

In [2]:
# assemble config
cfg = OmegaConf.load(os.path.join(ROOT,'config/default.yaml'))
cfg.general.root =  ROOT
cfg = OmegaConf.merge(cfg, OmegaConf.load(cfg.data.split))
with open(os.path.join(cfg.data.dataset_root, 'class_dict.json'),'r') as f:
    cfg = OmegaConf.merge(cfg, OmegaConf.create({'data':{'label_schema':json.load(f)}}))


# print(OmegaConf.to_yaml(cfg))

In [3]:
# initialize datasets
ds = FwfDataset(cfg, cfg.data.preprocessing._transformsValidation_, cfg.data._trainProjects_ + cfg.data._valProjects_)

Loading '2024-03-22_FW_Koenigshuegel.FwfProj'; Bounding box IDs = default
Loading '2024-04-05_FW_Westbahnhof_02.FwfProj'; Bounding box IDs = default
Loading '2024-04-05_FW_Westbahnhof_03.FwfProj'; Bounding box IDs = default
Loading '2024-04-05_FW_Westbahnhof_04.FwfProj'; Bounding box IDs = default
Loading '2024-04-05_FW_Westbahnhof_05.FwfProj'; Bounding box IDs = default
Loading '2024-05-10_FW_RWTH_Zentrum_01.FwfProj'; Bounding box IDs = default
Loading '2024-07-31_FW_Bruecke_Koenigstr.FwfProj'; Bounding box IDs = [0, 2]
Loading '2024-08-02_FW_Bruecke_A44_VerlautenheidenerStr.FwfProj'; Bounding box IDs = default
Loading '2024-08-02_FW_Bruecke_Deltourserb.FwfProj'; Bounding box IDs = default
Loading '2024-08-02_FW_Bruecke_Kasinostrasse.FwfProj'; Bounding box IDs = [1]
Loading '2024-08-02_FW_Bruecke_RotheErde.FwfProj'; Bounding box IDs = default
Loading '2024-08-02_FW_Bruecke_Rottstrasse.FwfProj'; Bounding box IDs = default
Loading '2023-08-28_FW_EingangBauing.FwfProj'; Bounding box IDs 

In [8]:


labels = np.concatenate([p['labels'] for p in ds.projects], axis=0)
nodes = [ln for ls in [ls for ls in cfg.data.label_schema.values()] for ln in ls]


In [24]:
len(nodes)

41

In [25]:
counts_levels['sink'].min()

3

In [30]:
print(OmegaConf.to_yaml(cfg.data.label_schema.labels_2))

_unspecified: 0
bicycle: 1
car: 2
electricalAppliance: 3
facadeSurface: 4
fence: 5
frameElement: 6
kerbStone: 7
naturalGround: 8
nonTiledPaving: 9
pipeLike: 10
scooter: 11
shaft: 12
sign: 13
stair: 14
tiledPaving: 15
trashCan: 16
vegetation: 17



[0, 3, 11, 29, 41]

In [26]:
level_lengths_cumsum

[0, 3, 11, 32, 58]

In [33]:


# determine cumsum of level lengths
level_lengths_cumsum = [0] + list(np.cumsum([len(x) for x in cfg.data.label_schema.values()]))

counts_levels = []
for source_i, sink_i in zip(range(3), range(1,4)):
    pairs = np.concatenate([labels[:,source_i][:,None], labels[:,sink_i][:,None]],axis=1)
    pairs = pd.DataFrame(data=pairs, columns=['source','sink'])
    counts = pairs.value_counts().reset_index(name='count')
    
    # get flattened index
    counts['source'] += level_lengths_cumsum[source_i]
    counts['sink'] += level_lengths_cumsum[sink_i]
    counts_levels.append(counts)

counts_levels = pd.concat(counts_levels, axis=0, ignore_index=True)
print(counts_levels)

    source  sink     count
0        0     4  67587320
1        0     7  34117310
2        1     9  21380184
3        0     8   3834174
4        2     5   1854635
..     ...   ...       ...
95      21    33      3674
96      24    29      3471
97      27    34       375
98      20    29        92
99      19    39        12

[100 rows x 3 columns]


In [52]:
import plotly.graph_objects as go

nodes = [n.replace("_","") for n in nodes]
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 25,
      thickness = 10,
      line = dict(color = "black", width = 3),
      label = nodes,
      color = 'black'
    ),
    link = dict(
      source = counts_levels['source'], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = counts_levels['sink'],
      value = counts_levels['count'],
      color = 'rgba(1,1,1,0.30)'
      
  ))])

fig.update_layout(
    # title_text="Label structure",
    font_size=25,
    height=900  # Increase the height of the plot
)
fig.show()

In [67]:
import plotly.graph_objects as go
import random

# Generate random colors for nodes
def random_color():
    return f'rgb({random.randint(0,255)}, {random.randint(0,255)}, {random.randint(0,255)})'

nodes = [n.replace("_", "") for n in nodes]
node_colors = [random_color() for _ in nodes]

# Map source nodes to their colors for link coloring
link_colors = [
    f'rgba({int(c.split("(")[1].split(",")[0])},'
    f'{int(c.split(",")[1])},'
    f'{int(c.split(",")[2].split(")")[0])}, 0.15)'
    for c in [node_colors[src] for src in counts_levels['source']]
]

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=25,
        thickness=30,
        line=dict(color="black", width=0),
        label=nodes,
        color=node_colors  # Assign random colors to nodes
    ),
    link=dict(
        source=counts_levels['source'],
        target=counts_levels['sink'],
        value=counts_levels['count'],
        color=link_colors  # Use semi-transparent colors derived from node colors
    )
)])

fig.update_layout(
    font_size=25,
    height=900  # Increase the height of the plot
)

fig.show()

In [19]:
nodes

['manMade',
 'natural',
 'scanArtefact',
 '_unspecified',
 'building',
 'movingObject',
 'naturalGround',
 'sealedSurface',
 'streetFurniture',
 'vegetation',
 'vehicle',
 '_unspecified',
 'bicycle',
 'car',
 'electricalAppliance',
 'facadeSurface',
 'fence',
 'frameElement',
 'kerbStone',
 'naturalGround',
 'nonTiledPaving',
 'pipeLike',
 'scooter',
 'shaft',
 'sign',
 'stair',
 'tiledPaving',
 'trashCan',
 'vegetation',
 '_unspecified',
 'asphalt',
 'brick',
 'cable',
 'concrete',
 'marking',
 'mesh',
 'metal',
 'naturalStone',
 'poster',
 'treeTrunk',
 'vegetation']

In [18]:
print(OmegaConf.to_yaml(cfg.data.label_schema.labels_3))

_unspecified: 0
asphalt: 1
brick: 2
cable: 3
concrete: 4
marking: 5
mesh: 6
metal: 7
naturalStone: 8
poster: 9
treeTrunk: 10
vegetation: 11



In [7]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 60,
      thickness = 10,
      line = dict(color = "black", width = 3),
      label = ["A1", "A2", "B1", "B2", "C1", "C2"],
      color = ["red", "red", "blue", "blue", "green", "green"]
    ),
    link = dict(
      source = [0, 1, 0, 2, 3, 3], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = [2, 3, 3, 4, 4, 5],
      value = [8, 4, 2, 8, 4, 2],
      color = ['rgba(255,0,255,0.3)', "red", "blue", "blue", "green", "green"]
  ))])

fig.update_layout(title_text="Label structure", font_size=30)
fig.show()