In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import math
from tqdm import tqdm

from PIL import Image, ImageOps

In [2]:
DATA_DIR = "/root/autodl-tmp/cervical_spine/"
IMAGES_DIR = os.path.join(DATA_DIR, f"train_axial_images_jpeg95")
LABEL_DIR = os.path.join(DATA_DIR, f"segmentation_axial_labels")
XRAY_DIR = os.path.join(DATA_DIR, f"segmentation_sagittal_xray_labels")


In [3]:
label_dir_paths = glob.glob(os.path.join(DATA_DIR, 'segmentation_axial_labels') + '/**')
UIDs = [path.split("/")[-1] for path in label_dir_paths]
len(UIDs)

0

In [4]:
xray_df = pd.read_csv(os.path.join(DATA_DIR, 'meta_xray.csv')).drop_duplicates(subset='UID').set_index('UID')
xray_df = xray_df.loc[UIDs, "aspect"]
print(len(xray_df))
xray_df.head()

FileNotFoundError: [Errno 2] No such file or directory: '/root/autodl-tmp/cervical_spine/meta_xray.csv'

In [None]:
sagittal_df = pd.read_csv(os.path.join(DATA_DIR, 'infer_sagittal_boundary.csv')).set_index('UID')
sagittal_df = sagittal_df.loc[UIDs, ['xmin','xmax']]
sagittal_df = sagittal_df.add_prefix('sagittal_')
sagittal_df.tail()

In [None]:
coronal_df = pd.read_csv(os.path.join(DATA_DIR, 'infer_coronal_boundary.csv')).set_index('UID')
coronal_df = coronal_df.loc[UIDs, ['xmin','xmax']]
coronal_df = coronal_df.add_prefix('coronal_')
coronal_df.tail()

In [None]:
boundary_df = pd.concat((sagittal_df, coronal_df, xray_df), axis=1)
boundary_df.tail()

In [None]:
boundary_df["xmin"] = boundary_df["coronal_xmin"].clip(lower=0)
boundary_df["ymin"] = boundary_df["sagittal_xmin"].clip(lower=0)
boundary_df["xmax"] = boundary_df["coronal_xmax"].clip(upper=512)
boundary_df["ymax"] = boundary_df["sagittal_xmax"].clip(upper=512)
print(len(boundary_df))
boundary_df.tail()

In [None]:
s = boundary_df.iloc[10]
print(s.name)
img = Image.open(os.path.join(IMAGES_DIR, s.name, '247.jpeg'))
plt.axis('off')
plt.imshow(img, cmap='bone')
plt.axvline(s.xmin)
plt.axvline(s.xmax)
plt.axhline(s.ymin)
plt.axhline(s.ymax)

In [None]:
train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df_with_slices.csv')).set_index('StudyInstanceUID')
train_df.head()

In [None]:
df = pd.DataFrame()
for i in tqdm(range(len(boundary_df))):
    s = boundary_df.iloc[i]
    UID = s.name
    num_slices = train_df.loc[UID, 'num_slices']
    new_df = boundary_df.loc[[UID] * num_slices, ['xmin','ymin','xmax','ymax', 'aspect']]
    new_df['axial_index'] = np.arange(num_slices)
    df = pd.concat([df, new_df])

print(len(df))
df.tail()

In [None]:
df.to_csv(os.path.join(DATA_DIR, 'segmentation_axial_bbox.csv'))