In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from collections import Counter, defaultdict
import re, os, cv2, random, warnings, shutil,tqdm
import albumentations as albu

def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

warnings.filterwarnings('ignore')

In [2]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_example(image,ETT_Abnormal,ETT_Borderline,ETT_Normal, NGT_Abnormal, NGT_Borderline, NGT_Incompletely_Imaged, NGT_Normal, CVC_Abnormal,CVC_Borderline, CVC_Normal, SwanGanzCatheterPresent, unique_id, patient_id):
    feature = {
      'image': _bytes_feature(image),
      'ETT - Abnormal':_int64_feature(ETT_Abnormal) ,
      'ETT - Borderline':_int64_feature(ETT_Borderline),
      'ETT - Normal':_int64_feature(ETT_Normal),
      'NGT - Abnormal':_int64_feature(NGT_Abnormal),
      'NGT - Borderline':_int64_feature(NGT_Borderline),
      'NGT - Incompletely Imaged':_int64_feature(NGT_Incompletely_Imaged),
      'NGT - Normal':_int64_feature(NGT_Normal),
      'CVC - Abnormal':_int64_feature(CVC_Abnormal),
      'CVC - Borderline':_int64_feature(CVC_Borderline),
      'CVC - Normal':_int64_feature(CVC_Normal),
      'Swan Ganz Catheter Present':_int64_feature(SwanGanzCatheterPresent),
      'StudyInstanceUID': _bytes_feature(unique_id),
      'PatientID': _bytes_feature(patient_id)
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [3]:
def write_TFREC():
    for tfrec_num in range(5):
        print('\nWriting TFRecord %i of %i...'%(tfrec_num, 5))
        samples = sgkf[sgkf['fold'] == tfrec_num]
        n_samples = len(samples)
        print(f'{n_samples} samples')
        tfrec_path = f"train_{IMSIZE[0]}_{tfrec_num}.tfrec"
        with tf.io.TFRecordWriter(tfrec_path) as writer:
            for idx,sample in tqdm.tqdm(samples.iterrows()):
                ETT_Abnormal = sample['ETT - Abnormal']
                ETT_Borderline = sample['ETT - Borderline']
                ETT_Normal = sample['ETT - Normal']
                NGT_Abnormal = sample['NGT - Abnormal']
                NGT_Borderline = sample['NGT - Borderline']
                NGT_Incompletely_Imaged = sample['NGT - Incompletely Imaged']
                NGT_Normal = sample['NGT - Normal']
                CVC_Abnormal = sample['CVC - Abnormal']
                CVC_Borderline = sample[ 'CVC - Borderline']
                CVC_Normal = sample['CVC - Normal']
                SwanGanzCatheterPresent = sample['Swan Ganz Catheter Present']
                patient_id = sample["PatientID"]
                image_name = sample["StudyInstanceUID"] + ".jpg"
                img_path = f'{PATH}{image_name}'

                img = cv2.imread(img_path)
                img = cv2.resize(img,IMSIZE)
                img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, IMG_QUALITY))[1].tostring()
                example = serialize_example(img,ETT_Abnormal,ETT_Borderline,ETT_Normal, NGT_Abnormal, NGT_Borderline, NGT_Incompletely_Imaged, NGT_Normal, CVC_Abnormal,CVC_Borderline, CVC_Normal, SwanGanzCatheterPresent, str.encode(image_name), str.encode(patient_id))
                writer.write(example)

In [4]:
sgkf = pd.read_csv("../input/ranzcr-sgkf-data/train_folds.csv")
LABELS = ['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
sgkf.head()

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID,fold
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0,0,0,0,0,0,1,0,0,0,0,ec89415d1,3
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0,0,1,0,0,1,0,0,0,1,0,bf4c6da3c,3
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0,0,0,0,0,0,0,0,1,0,0,3fc1c97e5,3
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0,0,0,0,0,0,0,1,0,0,0,c31019814,0
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0,0,0,0,0,0,0,0,0,1,0,207685cd1,0


In [5]:
database_base_path = '../input/ranzcr-clip-catheter-line-classification/'
PATH = f'{database_base_path}train/'
IMGS = os.listdir(PATH)
IMG_QUALITY = 100
IMSIZE = (512,512) 
print(f'Image samples: {len(IMGS)}')

Image samples: 30083


In [6]:
#write_TFREC()