In [None]:
import pandas as pd
import os
import glob
import numpy as np

# Create dataframe of Unity bboxes

In [None]:
unity_paths = glob.glob('../data/unity/t8/*.txt')
unity_annots = []
for path in unity_paths:
    with open(path, 'r') as f:
        lines = f.readlines()
        # Yeet anything with not 8 cells
        if len(lines) == 8:
            unity_annots.append({
                'id': int(os.path.basename(path).split('_')[1]),
                'label': lines
            })
df_unity = pd.DataFrame(unity_annots)

In [None]:
def parse_unity_label(label):
    cells = []
    for cell in label:
        meta, mask = cell.split('[')
        # Get the center coords and depth
        cx, cy, depth, _ = meta.strip().split(' ')
        cx, cy, depth = int(cx), int(cy), int(depth)
        # Get the furthest extent of the mask and hence width and height
        coords = mask[:-2].split('), (')
        coords = [tuple(coord.replace('(', '').replace(')', '').split(', ')) for coord in coords]
        xs = [int(x) for x, y in coords]
        ys = [int(y) for x, y in coords]
        w = max(xs) - min(xs)
        h = max(ys) - min(ys)
        cells.append([cx, cy, w, h, depth])
    return cells

df_unity['label'] = df_unity['label'].apply(parse_unity_label)
df_unity

# Load existing data on outcomes and adjacency

In [None]:
df_clinical = pd.read_csv('../data/clinical.csv')
df_adjacency = pd.read_csv('../data/adj-t8.csv')

In [None]:
def parse_adjacency(x):
    # Disgusting list comp to parse adjacency matrix
    return [
        [
            0 if float(value) == 0 else 1 
            for value in [v for v in row.strip().split(' ') if v != '']
        ] 
        for row in x.split('/') if row.strip() != ''
    ]
df_adjacency['adjacency'] = df_adjacency['adjacency'].apply(parse_adjacency)
df_adjacency

# Compute distance matrix

In [None]:
def compute_distance_matrix_without_adjacency(cells, img_side_len=500, dist_per_pixel=0.55, no_slices=11, dist_per_focal=15):
    depth_normalisation_multiplier = (((no_slices-1)*dist_per_focal)/(img_side_len*dist_per_pixel))/(no_slices-1)
    return [
        [
            np.sqrt(((a[0]-b[0])/img_side_len)**2 + ((a[1]-b[1])/img_side_len)**2 + ((a[4]-b[4])*depth_normalisation_multiplier)**2)
            for b in cells
        ]
        for a in cells
    ]

In [None]:
df_unity['distance'] = df_unity['label'].apply(compute_distance_matrix_without_adjacency)
df_unity

# Merge dataframes

In [None]:
df = df_clinical.merge(df_adjacency, on='id')
df = df.merge(df_unity, on='id')
df

In [None]:
df.to_csv('../data/adjacency_and_bbox_dataset.csv')