In [180]:
import sys
sys.path.append("..")

import argparse
import functools
import pickle
from multiprocessing import Pool
import copy

import pydicom
import pandas as pd
from tqdm import tqdm
import numpy as np
import os
np.seterr(over='ignore')

from utils import misc


def create_record(item, dirname):
    #id, labels = item
    
    path = dirname+item
    dicom = pydicom.dcmread(path)
    
    record = {
        'ID': item[:-4],
        'labels': ' ',
        'n_label': 0,
    }
    record.update(misc.get_dicom_raw(dicom))

    raw = dicom.pixel_array
    slope = float(record['RescaleSlope'])
    intercept = float(record['RescaleIntercept'])
    center = misc.get_dicom_value(record['WindowCenter'])
    width = misc.get_dicom_value(record['WindowWidth'])
    bits= record['BitsStored']
    pixel = record['PixelRepresentation']

    image = misc.rescale_image(raw, slope, intercept, bits, pixel)
    doctor = misc.apply_window(image, center, width)
    brain = misc.apply_window(image, 40, 80)

    record.update({
        'raw_max': raw.max(),
        'raw_min': raw.min(),
        'raw_mean': raw.mean(),
        'raw_diff': raw.max() - raw.min(),
        'doctor_max': doctor.max(),
        'doctor_min': doctor.min(),
        'doctor_mean': doctor.mean(),
        'doctor_diff': doctor.max() - doctor.min(),
        'brain_max': brain.max(),
        'brain_min': brain.min(),
        'brain_mean': brain.mean(),
        'brain_diff': brain.max() - brain.min(),
        'brain_ratio': misc.get_windowed_ratio(image, 40, 80),
    })
    return record

tmp = []
for i in os.listdir('/home/islab/kaggle-rsna-intracranial-hemorrhage/TestingData/'):
    tmp.append(create_record(i, f"/home/islab/kaggle-rsna-intracranial-hemorrhage/TestingData/"))
    
test = pd.DataFrame(tmp)

In [181]:
test

Unnamed: 0,ID,labels,n_label,BitsAllocated,BitsStored,Columns,HighBit,ImageOrientationPatient,ImagePositionPatient,Modality,...,raw_diff,doctor_max,doctor_min,doctor_mean,doctor_diff,brain_max,brain_min,brain_mean,brain_diff,brain_ratio
0,Test_013,,0,16,16,512,15,"(1.000000, 0.000000, 0.000000, 0.000000, 1.000...","(-126.437378, -126.437378, -155.964325)",CT,...,4363,102.0,-32.0,-8.339268,134.0,80.0,0.0,12.221603,80.0,0.242413
1,Test_471,,0,16,12,512,11,"(1, 0, 0, 0, 1, 0)","(-112.5, 5.5, 156.199951)",CT,...,2558,76.0,-4.0,13.581676,80.0,80.0,0.0,16.409290,80.0,0.292892
2,Test_475,,0,16,12,512,11,"(1, 0, 0, 0, 1, 0)","(-74.5, -10.5, 194.300049)",CT,...,2205,80.0,0.0,14.474892,80.0,80.0,0.0,14.474892,80.0,0.204533
3,Test_420,,0,16,12,512,11,"(1, 0, 0, 0, 0.939692621, -0.342020143)","(-145.5, 60.887654, 285.486376)",CT,...,2515,80.0,0.0,14.151081,80.0,80.0,0.0,14.151081,80.0,0.297142
4,Test_385,,0,16,12,512,11,"(1, 0, 0, 0, 0.939692621, -0.342020143)","(-125, 59.5384224, 230.562313)",CT,...,4095,80.0,0.0,12.523060,80.0,80.0,0.0,12.523060,80.0,0.258347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,Test_157,,0,16,16,512,15,"(1.000000, 0.000000, 0.000000, 0.000000, 0.933...","(-125.000, -119.798, 109.101)",CT,...,3587,115.0,-35.0,-12.334438,150.0,80.0,0.0,10.872108,80.0,0.192394
596,Test_266,,0,16,16,512,15,"(1.000000, 0.000000, 0.000000, 0.000000, 1.000...","(-126.408875, -126.408875, -285.040558)",CT,...,2829,102.0,-32.0,-4.948746,134.0,80.0,0.0,14.070625,80.0,0.247070
597,Test_335,,0,16,16,512,15,"(1.000000, 0.000000, 0.000000, 0.000000, 1.000...","(-125.000, -118.800, 4.250)",CT,...,3808,115.0,-35.0,-1.998295,150.0,80.0,0.0,16.670753,80.0,0.284008
598,Test_257,,0,16,16,512,15,"(1.000000, 0.000000, 0.000000, 0.000000, 0.927...","(-125.000000, -121.097977, 80.184433)",CT,...,4667,70.0,-10.0,8.661770,80.0,80.0,0.0,16.381516,80.0,0.226406


In [182]:
test_raw = test.sort_values(by=['ID']).reset_index(drop=True)

with open('test_raw.pkl', 'wb') as f:
    pickle.dump(test_raw, f)

In [183]:
import collections
from pprint import pprint

def show_distribution(dataset):
    counter = collections.defaultdict(int)
    for row in dataset.itertuples():
        for label in row.labels.split():
            counter[label] += 1
        if not row.labels:
            counter['negative'] += 1
        counter['all'] += 1
    pprint(counter)

    
def parse_position(df):
    expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x))
    expanded.columns = ['Position1', 'Position2', 'Position3']
    return pd.concat([df, expanded], axis=1)

def add_adjacent_labels(df):
    df = df.sort_values('PositionOrd')

    records = []
    print('making adjacent labels...')
    for index,group in tqdm(df.groupby('StudyInstanceUID')):

        labels = list(group.labels)
        for j,id in enumerate(group.ID):
            if j == 0:
                left = ''
            else:
                left = labels[j-1]
            if j+1 == len(labels):
                right = ''
            else:
                right = labels[j+1]

            records.append({
                'LeftLabel': left,
                'RightLabel': right,
                'ID': id,
            })
    return pd.merge(df, pd.DataFrame(records), on='ID')

show_distribution(test)

test = parse_position(test)

test['WindowCenter'] = test.WindowCenter.apply(lambda x: misc.get_dicom_value(x))
test['WindowWidth'] = test.WindowWidth.apply(lambda x: misc.get_dicom_value(x))
test['PositionOrd'] = test.groupby('SeriesInstanceUID')[['Position3']].rank() / test.groupby('SeriesInstanceUID')[['Position3']].transform('count')

test = add_adjacent_labels(test)
test = test[['ID', 'labels', 'PatientID', 'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel', 'BitsStored', 'PixelRepresentation', 'brain_ratio', 'brain_diff']]

show_distribution(test)

  0%|          | 0/567 [00:00<?, ?it/s]

defaultdict(<class 'int'>, {'all': 600})
making adjacent labels...


100%|██████████| 567/567 [00:00<00:00, 5507.64it/s]

defaultdict(<class 'int'>, {'all': 600})





In [184]:
test = test.sort_values(by=['ID']).reset_index(drop=True)

with open('test.pkl', 'wb') as f:
    pickle.dump(test, f)
test

Unnamed: 0,ID,labels,PatientID,WindowCenter,WindowWidth,RescaleIntercept,RescaleSlope,Position3,PositionOrd,LeftLabel,RightLabel,BitsStored,PixelRepresentation,brain_ratio,brain_diff
0,Test_001,,ID_9df0f9aa,35,135,-1024.0,1.0,37.500000,1.0,,,16,1,0.143860,80.0
1,Test_002,,ID_00859e11,40,150,-1024.0,1.0,76.016000,1.0,,,16,1,0.289101,80.0
2,Test_003,,ID_d28d9540,40,80,-1024.0,1.0,169.275374,1.0,,,12,0,0.102123,80.0
3,Test_004,,ID_78ce1b0d,30,80,-1024.0,1.0,216.173000,1.0,,,16,1,0.001240,80.0
4,Test_005,,ID_08faadaf,36,80,-1024.0,1.0,121.000000,1.0,,,12,0,0.309010,80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,Test_596,,ID_ac57e226,30,80,-1024.0,1.0,95.537000,0.5,,,16,1,0.278336,80.0
596,Test_597,,ID_4ecb1803,40,150,-1024.0,1.0,45.149000,1.0,,,16,1,0.198143,80.0
597,Test_598,,ID_b4d1c8f5,35,135,-1024.0,1.0,117.432632,1.0,,,16,1,0.183155,80.0
598,Test_599,,ID_991ad3c5,40,80,-1024.0,1.0,221.227522,1.0,,,12,0,0.330856,80.0
