In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import math
from tqdm import tqdm

from PIL import Image, ImageOps

In [2]:
# DATA_DIR = "/root/autodl-tmp/cervical_spine/"
DATA_DIR = "/Volumes/SSD970/"


In [3]:
xray_df = pd.read_csv(os.path.join(DATA_DIR, 'meta_xray.csv')).drop_duplicates(subset='UID').set_index('UID')
xray_df = xray_df["aspect"]
print(len(xray_df))
xray_df.head()

2012


UID
1.2.826.0.1.3680043.10005    2.091504
1.2.826.0.1.3680043.10014    3.418803
1.2.826.0.1.3680043.10001    2.461541
1.2.826.0.1.3680043.10016    1.136566
1.2.826.0.1.3680043.10032    1.951216
Name: aspect, dtype: float64

In [5]:
sagittal_df = pd.read_csv(os.path.join(DATA_DIR, 'infer_sagittal_boundary.csv')).set_index('UID')
sagittal_df = sagittal_df[['xmin','xmax']]
sagittal_df = sagittal_df.add_prefix('sagittal_')
print(len(sagittal_df))
sagittal_df.tail()

2012


Unnamed: 0_level_0,sagittal_xmin,sagittal_xmax
UID,Unnamed: 1_level_1,Unnamed: 2_level_1
1.2.826.0.1.3680043.9926,81.474365,330.137
1.2.826.0.1.3680043.9940,43.931564,433.2635
1.2.826.0.1.3680043.9994,132.35954,419.59164
1.2.826.0.1.3680043.9996,11.594978,377.10385
1.2.826.0.1.3680043.9997,6.481827,415.57568


In [7]:
coronal_df = pd.read_csv(os.path.join(DATA_DIR, 'infer_coronal_boundary.csv')).set_index('UID')
coronal_df = coronal_df[['xmin','xmax']]
coronal_df = coronal_df.add_prefix('coronal_')
print(len(coronal_df))
coronal_df.tail()

2012


Unnamed: 0_level_0,coronal_xmin,coronal_xmax
UID,Unnamed: 1_level_1,Unnamed: 2_level_1
1.2.826.0.1.3680043.9926,103.52358,379.6752
1.2.826.0.1.3680043.9940,68.295395,476.78656
1.2.826.0.1.3680043.9994,144.88084,375.04407
1.2.826.0.1.3680043.9996,103.145035,406.66434
1.2.826.0.1.3680043.9997,52.653015,453.41766


In [8]:
boundary_df = pd.concat((sagittal_df, coronal_df, xray_df), axis=1)
print(len(boundary_df))
boundary_df.tail()

2012


Unnamed: 0_level_0,sagittal_xmin,sagittal_xmax,coronal_xmin,coronal_xmax,aspect
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.2.826.0.1.3680043.9926,81.474365,330.137,103.52358,379.6752,2.844444
1.2.826.0.1.3680043.9940,43.931564,433.2635,68.295395,476.78656,2.666667
1.2.826.0.1.3680043.9994,132.35954,419.59164,144.88084,375.04407,1.886792
1.2.826.0.1.3680043.9996,11.594978,377.10385,103.145035,406.66434,1.916167
1.2.826.0.1.3680043.9997,6.481827,415.57568,52.653015,453.41766,2.782613


In [9]:
boundary_df["xmin"] = boundary_df["coronal_xmin"].clip(lower=0)
boundary_df["ymin"] = boundary_df["sagittal_xmin"].clip(lower=0)
boundary_df["xmax"] = boundary_df["coronal_xmax"].clip(upper=512)
boundary_df["ymax"] = boundary_df["sagittal_xmax"].clip(upper=512)
print(len(boundary_df))
boundary_df.tail()

2012


Unnamed: 0_level_0,sagittal_xmin,sagittal_xmax,coronal_xmin,coronal_xmax,aspect,xmin,ymin,xmax,ymax
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.2.826.0.1.3680043.9926,81.474365,330.137,103.52358,379.6752,2.844444,103.52358,81.474365,379.6752,330.137
1.2.826.0.1.3680043.9940,43.931564,433.2635,68.295395,476.78656,2.666667,68.295395,43.931564,476.78656,433.2635
1.2.826.0.1.3680043.9994,132.35954,419.59164,144.88084,375.04407,1.886792,144.88084,132.35954,375.04407,419.59164
1.2.826.0.1.3680043.9996,11.594978,377.10385,103.145035,406.66434,1.916167,103.145035,11.594978,406.66434,377.10385
1.2.826.0.1.3680043.9997,6.481827,415.57568,52.653015,453.41766,2.782613,52.653015,6.481827,453.41766,415.57568


In [10]:
s = boundary_df.iloc[10]
print(s.name)
img = Image.open(os.path.join('train_axial_images_jpeg95', s.name, '247.jpeg'))
plt.axis('off')
plt.imshow(img, cmap='bone')
plt.axvline(s.xmin)
plt.axvline(s.xmax)
plt.axhline(s.ymin)
plt.axhline(s.ymax)

1.2.826.0.1.3680043.10136


FileNotFoundError: [Errno 2] No such file or directory: 'train_axial_images_jpeg95/1.2.826.0.1.3680043.10136/247.jpeg'

In [None]:
train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df_with_slices.csv')).set_index('StudyInstanceUID')
train_df.head()

In [None]:
df = pd.DataFrame()
for i in tqdm(range(len(boundary_df))):
    s = boundary_df.iloc[i]
    UID = s.name
    num_slices = train_df.loc[UID, 'num_slices']
    new_df = boundary_df.loc[[UID] * num_slices, ['xmin','ymin','xmax','ymax', 'aspect']]
    new_df['axial_index'] = np.arange(num_slices)
    df = pd.concat([df, new_df])

print(len(df))
df.tail()

In [None]:
df.to_csv(os.path.join(DATA_DIR, 'train_axial_boundary.csv'))