In [None]:
import pandas as pd
import numpy as np
import tqdm
import os
import glob
import cv2

## Data Splitting

In [None]:
chexpert = pd.read_csv('/content/drive/My Drive/mimic-cxr/mimic-cxr-2.0.0-chexpert.csv')

In [None]:
total_meta = pd.read_csv("/content/drive/My Drive/mimic-cxr/mimic-cxr-2.0.0-metadata.csv")

In [None]:
chexpert.count()

subject_id                    227827
study_id                      227827
Atelectasis                    57666
Cardiomegaly                   66799
Consolidation                  23076
Edema                          65833
Enlarged Cardiomediastinum     21837
Fracture                        5831
Lung Lesion                     8287
Lung Opacity                   58425
No Finding                     75455
Pleural Effusion               87272
Pleural Other                   2902
Pneumonia                      59185
Pneumothorax                   53848
Support Devices                70281
dtype: int64

In [None]:
chexpert[(chexpert['Pleural Effusion'] == 1) &(chexpert['Support Devices'] == 1)]

Unnamed: 0,subject_id,study_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
38,10001884,50712381,,1.0,,1.0,,,,,,1.0,,,0.0,1.0
55,10001884,56722923,,1.0,,1.0,,,,,,1.0,,,0.0,1.0
81,10002428,50292543,1.0,,,1.0,,,,1.0,,1.0,,,,1.0
82,10002428,50444997,1.0,,,,,,,1.0,,1.0,,,,1.0
83,10002428,50862960,,,,,0.0,,,,,1.0,,,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227768,19997473,59017551,,1.0,,1.0,,,,,,1.0,,,0.0,1.0
227773,19997911,58942262,,,,1.0,,,,,,1.0,,,,1.0
227794,19998562,56823087,,,,,,,,,,1.0,,,,1.0
227799,19998843,54376373,1.0,,,,,,,,,1.0,,,,1.0


In [None]:
chexpert.head()

Unnamed: 0,subject_id,study_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,10000032,50414267,,,,,,,,,1.0,,,,,
1,10000032,53189527,,,,,,,,,1.0,,,,,
2,10000032,53911762,,,,,,,,,1.0,,,,,
3,10000032,56699142,,,,,,,,,1.0,,,,,
4,10000764,57375967,,,1.0,,,,,,,,,-1.0,,


In [None]:
total_meta.head()

Unnamed: 0,dicom_id,subject_id,study_id,PerformedProcedureStepDescription,ViewPosition,Rows,Columns,StudyDate,StudyTime,ProcedureCodeSequence_CodeMeaning,ViewCodeSequence_CodeMeaning,PatientOrientationCodeSequence_CodeMeaning
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,10000032,50414267,CHEST (PA AND LAT),PA,3056,2544,21800506,213014.531,CHEST (PA AND LAT),postero-anterior,Erect
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,10000032,50414267,CHEST (PA AND LAT),LATERAL,3056,2544,21800506,213014.531,CHEST (PA AND LAT),lateral,Erect
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,10000032,53189527,CHEST (PA AND LAT),PA,3056,2544,21800626,165500.312,CHEST (PA AND LAT),postero-anterior,Erect
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,10000032,53189527,CHEST (PA AND LAT),LATERAL,3056,2544,21800626,165500.312,CHEST (PA AND LAT),lateral,Erect
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,10000032,53911762,CHEST (PORTABLE AP),AP,2705,2539,21800723,80556.875,CHEST (PORTABLE AP),antero-posterior,


In [None]:
meta_merged = total_meta.merge(chexpert, left_on='study_id', right_on='study_id')

In [None]:
meta_merged.head()

In [None]:
meta_merged.drop(columns=['subject_id_y','PatientOrientationCodeSequence_CodeMeaning','ViewCodeSequence_CodeMeaning','ProcedureCodeSequence_CodeMeaning','StudyTime','StudyDate','Columns','Rows','PerformedProcedureStepDescription'], axis=1, inplace=True)
meta_merged = meta_merged.rename(columns={'subject_id_x': 'subject_id'})

In [None]:
subject_with_la = meta_merged[(meta_merged['ViewPosition'] == 'LATERAL') | (meta_merged['ViewPosition'] == 'LL') ]
subject_with_la = subject_with_la['subject_id']
subject_with_la= subject_with_la.unique()

In [None]:
subject_with_ap = meta_merged[(meta_merged['ViewPosition'] == 'AP') | (meta_merged['ViewPosition'] == 'PA')]
subject_with_ap = subject_with_ap['subject_id']
subject_with_ap = subject_with_ap.unique()

In [None]:
subject_with_both = list(set(subject_with_ap) & set(subject_with_la))

In [None]:
len(subject_with_both)

52576

In [None]:
len(meta_merged['subject_id'].unique())

65379

In [None]:
subject_with_ap_1 = np.setdiff1d(subject_with_ap,subject_with_both)

In [None]:
np.random.shuffle(subject_with_both)
np.random.shuffle(subject_with_ap_1)

In [None]:
print(len(subject_with_both))
print(len(subject_with_ap_1))

52576
11369


In [None]:
train_cutpt_la = int(len(subject_with_both) * 0.7)
val_cutpt_la = train_cutpt_la + int(len(subject_with_both) * 0.15)

train_cutpt_ap = int(len(subject_with_ap) * 0.7)
val_cutpt_ap = train_cutpt_ap + int(len(subject_with_ap) * 0.15)

In [None]:
train_la = meta_merged[meta_merged['subject_id'].isin(subject_with_both[:train_cutpt_la])]
val_la = meta_merged[meta_merged['subject_id'].isin(subject_with_both[train_cutpt_la:val_cutpt_la])]
test_la = meta_merged[meta_merged['subject_id'].isin(subject_with_both[val_cutpt_la:])]

In [None]:
train_ap = meta_merged[meta_merged['subject_id'].isin(subject_with_ap_1[:train_cutpt_ap])]
val_ap = meta_merged[meta_merged['subject_id'].isin(subject_with_ap_1[train_cutpt_ap:val_cutpt_ap])]
test_ap = meta_merged[meta_merged['subject_id'].isin(subject_with_ap_1[val_cutpt_ap:])]

In [None]:
train_la.head()

Unnamed: 0,dicom_id,subject_id,study_id,ViewPosition,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,10000032,50414267,PA,,,,,,,,,1.0,,,,,
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,10000032,50414267,LATERAL,,,,,,,,,1.0,,,,,
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,10000032,53189527,PA,,,,,,,,,1.0,,,,,
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,10000032,53189527,LATERAL,,,,,,,,,1.0,,,,,
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,10000032,53911762,AP,,,,,,,,,1.0,,,,,


In [None]:
train_la.sort_values(by=['subject_id','study_id'], inplace=True)
val_la.sort_values(by=['subject_id','study_id'], inplace=True)
test_la.sort_values(by=['subject_id','study_id'], inplace=True)

In [None]:
train_la.to_csv("train_set_both.csv")
val_la.to_csv("val_set_both.csv")
test_la.to_csv("test_set_both.csv")

In [None]:
train_all = pd.concat([train_ap, train_la], ignore_index=True)
val_all = pd.concat([val_ap, val_la], ignore_index=True)
test_all = pd.concat([test_ap, test_la], ignore_index=True)

In [None]:
train_all.sort_values(by=['subject_id','study_id'], inplace=True)
val_all.sort_values(by=['subject_id','study_id'], inplace=True)
test_all.sort_values(by=['subject_id','study_id'], inplace=True)

In [None]:
train_all.to_csv("train_set.csv")
val_all.to_csv("val_set.csv")
test_all.to_csv("test_set.csv")

## Dataloader

In [None]:
import tensorflow as tf

In [None]:
train = pd.read_csv("/content/drive/My Drive/AG-CNN/dataset/train_set_both.csv")
# val = pd.read_csv("/content/drive/My Drive/AG-CNN/dataset/val_set.csv")
# test = pd.read_csv("test_set.csv")

In [None]:
def generate_path(row):
  folder = str(int(row['subject_id']))
  study_id = str(int(row['study_id']))
  dicom = row['dicom_id']
  return '/content/drive/My Drive/mimic-cxr/images512/p{}/p{}/s{}/{}.png'.format(
      folder[:2],
      folder,
      study_id,
      dicom
  )

In [None]:
train['path'] = train.apply(lambda x : generate_path(x), axis=1)
# val['path'] = val.apply(lambda x : generate_path(x), axis=1)
# test['path'] = test.apply(lambda x : generate_path(x), axis=1)

In [None]:
train = train[train['ViewPosition'] == 'AP']

In [None]:
train = train[:20000]

In [None]:
train_labels = train.iloc[:, 5:19]

In [None]:
# train_labels.fillna(0, inplace=True)

# train_labels.replace(-1, 0, inplace=True)

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train['path'],np.array(train_labels), train['study_id']))

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_merge['study_id'],train_merge['frontal'], train_merge['side'], np.array(train_labels)))

In [None]:
def decode_img(img):
  img = tf.image.decode_png(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.float32)
  return tf.image.resize(img, (224,224))

def process_path(path, label, sid):
  img = tf.io.read_file(path)
  img = decode_img(img)
  return img, label, sid

def process_path_mul(sid, front, side, label):
  img_front = tf.io.read_file(front)
  img_front = decode_img(img_front)

  img_side = tf.io.read_file(side)
  img_side = decode_img(img_side)

  return img_front, img_side, label, sid

In [None]:
train_im = train_ds.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .apply(tf.data.experimental.ignore_errors()).cache('/content/train_cache')

In [None]:
train_im = train_ds.map(process_path_mul, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .apply(tf.data.experimental.ignore_errors()).cache('/content/train_cache')

### Create TFRecord

In [None]:
len(train)

75080

In [None]:
BS = 64
# TR_SHARDS = 40
TR_SHARDS = 10
GCS_OUTPUT = '/content/drive/My Drive/AG-CNN/tfrecord_train_ap/' 

In [None]:
import math

tr_size = len(train)
tr_shard_size = math.ceil(1.0 * tr_size / TR_SHARDS)

In [None]:
tr_shard_size

2000

In [None]:
def recompress_image(image, label, sid):
  image = image * 255
  image = tf.cast(image, tf.uint8)
  image = tf.image.encode_jpeg(image, optimize_size=True, chroma_downsampling=False)
  return image, label, sid

In [None]:
def recompress_image_mul(image_front,image_side, label, sid):
  image_front = image_front * 255
  image_front = tf.cast(image_front, tf.uint8)
  image_front = tf.image.encode_jpeg(image_front, optimize_size=True, chroma_downsampling=False)

  image_side = image_side * 255
  image_side = tf.cast(image_side, tf.uint8)
  image_side = tf.image.encode_jpeg(image_side, optimize_size=True, chroma_downsampling=False)
  return image_front, image_side, label, sid

In [None]:
train_imc = train_im.map(recompress_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_imc = train_imc.batch(tr_shard_size).prefetch(1)

In [None]:
# val_imc = val_im.map(recompress_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
# val_imc = val_imc.batch(val_shard_size).prefetch(1)

In [None]:
# Three types of data can be stored in TFRecords: bytestrings, integers and floats
# They are always stored as lists, a single data element will be a list of size 1

def _bytestring_feature(list_of_bytestrings):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))

def _int_feature(list_of_ints): # int64
  return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))

def _float_feature(list_of_floats): # float32
  return tf.train.Feature(float_list=tf.train.FloatList(value=list_of_floats))
  

def to_tfrecord(tfrec_filewriter, img, label, sid):  
  
  feature = {
      "image": _bytestring_feature([img]), 
      "label":         _float_feature(label.reshape(-1)), 
      "study_id": _int_feature([sid])
    }
  return tf.train.Example(features=tf.train.Features(feature=feature))


In [None]:
print("Writing TFRecords")
for shard, (image, label, sid) in tqdm.tqdm(enumerate(train_imc)):
  # batch size used as shard size here
  shard_size = image.numpy().shape[0]
  # good practice to have the number of records in the filename
  filename = GCS_OUTPUT + "{:02d}-{}.tfrec".format(shard, shard_size)
  if os.path.exists(filename): 
    continue
  
  with tf.io.TFRecordWriter(filename) as out_file:
    for i in tqdm.tqdm(range(shard_size)):
      example = to_tfrecord(out_file,
                            image.numpy()[i],
                            label.numpy()[i],
                            sid.numpy()[i])
      out_file.write(example.SerializeToString())
    print("Wrote file {} containing {} records".format(filename, shard_size))

0it [00:00, ?it/s]

Writing TFRecords



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 3/2000 [00:00<01:16, 26.12it/s][A
  0%|          | 7/2000 [00:00<01:12, 27.39it/s][A
  1%|          | 11/2000 [00:00<01:10, 28.39it/s][A
  1%|          | 15/2000 [00:00<01:07, 29.51it/s][A
  1%|          | 19/2000 [00:00<01:05, 30.29it/s][A
  1%|          | 23/2000 [00:00<01:22, 23.87it/s][A
  1%|▏         | 27/2000 [00:00<01:16, 25.95it/s][A
  2%|▏         | 31/2000 [00:01<01:11, 27.73it/s][A
  2%|▏         | 34/2000 [00:01<01:31, 21.57it/s][A
  2%|▏         | 38/2000 [00:01<01:22, 23.92it/s][A
  2%|▏         | 41/2000 [00:01<01:17, 25.27it/s][A
  2%|▏         | 45/2000 [00:01<01:11, 27.33it/s][A
  2%|▏         | 49/2000 [00:01<01:07, 28.97it/s][A
  3%|▎         | 53/2000 [00:02<01:23, 23.37it/s][A
  3%|▎         | 57/2000 [00:02<01:16, 25.56it/s][A
  3%|▎         | 61/2000 [00:02<01:10, 27.50it/s][A
  3%|▎         | 64/2000 [00:02<01:29, 21.67it/s][A
  3%|▎         | 68/2000 [00:02<01:20, 23.91it/s][A
  4

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/00-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 2/2000 [00:00<03:23,  9.82it/s][A
  0%|          | 4/2000 [00:00<03:21,  9.92it/s][A
  0%|          | 5/2000 [00:00<04:53,  6.80it/s][A
  0%|          | 9/2000 [00:00<03:42,  8.94it/s][A
  1%|          | 13/2000 [00:00<02:54, 11.37it/s][A
  1%|          | 16/2000 [00:01<02:32, 13.01it/s][A
  1%|          | 19/2000 [00:01<02:06, 15.64it/s][A
  1%|          | 23/2000 [00:01<01:46, 18.63it/s][A
  1%|▏         | 27/2000 [00:01<01:30, 21.69it/s][A
  2%|▏         | 30/2000 [00:01<02:13, 14.71it/s][A
  2%|▏         | 34/2000 [00:01<01:50, 17.84it/s][A
  2%|▏         | 38/2000 [00:01<01:33, 20.89it/s][A
  2%|▏         | 42/2000 [00:02<01:23, 23.53it/s][A
  2%|▏         | 46/2000 [00:02<01:14, 26.10it/s][A
  2%|▎         | 50/2000 [00:02<01:09, 28.07it/s][A
  3%|▎         | 54/2000 [00:02<01:05, 29.87it/s][A
  3%|▎         | 58/2000 [00:02<01:04, 30.32it/s][A
  3%|▎         | 62/2000 [00:02<01:01, 31.54it/s][A
  3%|

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/01-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 4/2000 [00:00<00:56, 35.13it/s][A
  0%|          | 8/2000 [00:00<00:56, 35.28it/s][A
  0%|          | 10/2000 [00:00<02:23, 13.89it/s][A
  1%|          | 14/2000 [00:00<01:57, 16.89it/s][A
  1%|          | 18/2000 [00:00<01:38, 20.07it/s][A
  1%|          | 22/2000 [00:00<01:26, 23.00it/s][A
  1%|▏         | 26/2000 [00:01<01:16, 25.75it/s][A
  2%|▏         | 30/2000 [00:01<01:09, 28.18it/s][A
  2%|▏         | 34/2000 [00:01<01:05, 29.90it/s][A
  2%|▏         | 38/2000 [00:01<01:03, 30.69it/s][A
  2%|▏         | 42/2000 [00:01<01:01, 31.89it/s][A
  2%|▏         | 46/2000 [00:01<00:59, 32.74it/s][A
  2%|▎         | 50/2000 [00:02<01:43, 18.90it/s][A
  3%|▎         | 54/2000 [00:02<01:29, 21.81it/s][A
  3%|▎         | 58/2000 [00:02<01:18, 24.74it/s][A
  3%|▎         | 62/2000 [00:02<01:11, 27.24it/s][A
  3%|▎         | 66/2000 [00:02<01:05, 29.38it/s][A
  4%|▎         | 70/2000 [00:02<01:01, 31.14it/s][A
  4

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/02-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 4/2000 [00:00<00:58, 34.29it/s][A
  0%|          | 8/2000 [00:00<00:57, 34.36it/s][A
  0%|          | 10/2000 [00:00<02:19, 14.29it/s][A
  1%|          | 13/2000 [00:00<01:57, 16.93it/s][A
  1%|          | 17/2000 [00:00<01:39, 19.92it/s][A
  1%|          | 21/2000 [00:00<01:26, 22.84it/s][A
  1%|          | 24/2000 [00:01<02:13, 14.86it/s][A
  1%|▏         | 27/2000 [00:01<01:53, 17.38it/s][A
  2%|▏         | 31/2000 [00:01<01:36, 20.37it/s][A
  2%|▏         | 35/2000 [00:01<01:24, 23.17it/s][A
  2%|▏         | 38/2000 [00:01<02:10, 15.08it/s][A
  2%|▏         | 42/2000 [00:02<01:49, 17.86it/s][A
  2%|▏         | 46/2000 [00:02<01:33, 20.84it/s][A
  2%|▎         | 50/2000 [00:02<01:22, 23.57it/s][A
  3%|▎         | 53/2000 [00:03<03:48,  8.50it/s][A
  3%|▎         | 56/2000 [00:03<02:59, 10.83it/s][A
  3%|▎         | 60/2000 [00:03<02:23, 13.52it/s][A
  3%|▎         | 64/2000 [00:03<01:57, 16.48it/s][A
  3

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/03-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 4/2000 [00:00<01:02, 32.19it/s][A
  0%|          | 7/2000 [00:00<01:03, 31.40it/s][A
  0%|          | 9/2000 [00:00<02:31, 13.15it/s][A
  1%|          | 13/2000 [00:00<02:06, 15.77it/s][A
  1%|          | 17/2000 [00:00<01:45, 18.73it/s][A
  1%|          | 21/2000 [00:00<01:31, 21.72it/s][A
  1%|▏         | 25/2000 [00:01<02:03, 15.93it/s][A
  1%|▏         | 28/2000 [00:01<01:46, 18.50it/s][A
  2%|▏         | 32/2000 [00:01<01:32, 21.28it/s][A
  2%|▏         | 36/2000 [00:01<01:22, 23.85it/s][A
  2%|▏         | 40/2000 [00:01<01:14, 26.23it/s][A
  2%|▏         | 44/2000 [00:02<01:54, 17.14it/s][A
  2%|▏         | 48/2000 [00:02<01:37, 20.10it/s][A
  3%|▎         | 52/2000 [00:02<01:25, 22.85it/s][A
  3%|▎         | 55/2000 [00:02<02:15, 14.35it/s][A
  3%|▎         | 59/2000 [00:02<01:52, 17.30it/s][A
  3%|▎         | 63/2000 [00:03<01:35, 20.30it/s][A
  3%|▎         | 67/2000 [00:03<02:04, 15.48it/s][A
  4%

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/04-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 3/2000 [00:00<01:07, 29.44it/s][A
  0%|          | 7/2000 [00:00<01:06, 30.15it/s][A
  0%|          | 9/2000 [00:00<04:06,  8.06it/s][A
  1%|          | 13/2000 [00:01<03:11, 10.37it/s][A
  1%|          | 17/2000 [00:01<02:31, 13.05it/s][A
  1%|          | 21/2000 [00:01<02:03, 15.98it/s][A
  1%|▏         | 25/2000 [00:01<01:44, 18.93it/s][A
  1%|▏         | 29/2000 [00:01<01:31, 21.47it/s][A
  2%|▏         | 33/2000 [00:01<01:21, 24.06it/s][A
  2%|▏         | 37/2000 [00:01<01:15, 26.05it/s][A
  2%|▏         | 41/2000 [00:02<02:39, 12.30it/s][A
  2%|▏         | 45/2000 [00:02<02:09, 15.11it/s][A
  2%|▏         | 49/2000 [00:02<01:47, 18.14it/s][A
  3%|▎         | 53/2000 [00:02<01:32, 21.12it/s][A
  3%|▎         | 57/2000 [00:02<01:22, 23.58it/s][A
  3%|▎         | 61/2000 [00:03<01:15, 25.84it/s][A
  3%|▎         | 65/2000 [00:03<01:10, 27.61it/s][A
  3%|▎         | 69/2000 [00:03<01:52, 17.22it/s][A
  4%

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/05-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 4/2000 [00:00<00:58, 34.17it/s][A
  0%|          | 8/2000 [00:00<00:58, 33.98it/s][A
  1%|          | 12/2000 [00:00<00:58, 34.08it/s][A
  1%|          | 16/2000 [00:00<00:58, 33.96it/s][A
  1%|          | 20/2000 [00:00<00:58, 33.82it/s][A
  1%|          | 24/2000 [00:00<00:58, 33.76it/s][A
  1%|▏         | 28/2000 [00:00<00:58, 33.72it/s][A
  2%|▏         | 32/2000 [00:00<00:58, 33.37it/s][A
  2%|▏         | 36/2000 [00:01<00:58, 33.49it/s][A
  2%|▏         | 40/2000 [00:01<00:58, 33.63it/s][A
  2%|▏         | 44/2000 [00:01<00:57, 33.79it/s][A
  2%|▏         | 48/2000 [00:01<00:57, 33.88it/s][A
  3%|▎         | 52/2000 [00:01<00:57, 33.93it/s][A
  3%|▎         | 56/2000 [00:01<00:57, 34.02it/s][A
  3%|▎         | 60/2000 [00:01<00:57, 34.03it/s][A
  3%|▎         | 64/2000 [00:01<00:57, 33.77it/s][A
  3%|▎         | 68/2000 [00:02<00:57, 33.64it/s][A
  4%|▎         | 72/2000 [00:02<00:57, 33.59it/s][A
  4

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/06-2000.tfrec containing 2000 records



  0%|          | 0/2000 [00:00<?, ?it/s][A
  0%|          | 4/2000 [00:00<00:59, 33.33it/s][A
  0%|          | 8/2000 [00:00<00:59, 33.72it/s][A
  1%|          | 12/2000 [00:00<00:58, 34.01it/s][A
  1%|          | 15/2000 [00:02<06:29,  5.10it/s][A
  1%|          | 19/2000 [00:02<04:50,  6.82it/s][A
  1%|          | 23/2000 [00:02<03:40,  8.98it/s][A
  1%|▏         | 27/2000 [00:02<02:51, 11.53it/s][A
  2%|▏         | 31/2000 [00:02<02:16, 14.38it/s][A
  2%|▏         | 35/2000 [00:02<01:53, 17.32it/s][A
  2%|▏         | 39/2000 [00:02<01:36, 20.31it/s][A
  2%|▏         | 43/2000 [00:02<01:27, 22.46it/s][A
  2%|▏         | 47/2000 [00:03<01:19, 24.60it/s][A
  3%|▎         | 51/2000 [00:03<01:12, 26.72it/s][A
  3%|▎         | 55/2000 [00:04<03:15,  9.94it/s][A
  3%|▎         | 59/2000 [00:04<02:34, 12.53it/s][A
  3%|▎         | 63/2000 [00:04<02:05, 15.42it/s][A
  3%|▎         | 67/2000 [00:04<01:45, 18.38it/s][A
  4%|▎         | 71/2000 [00:04<01:31, 21.18it/s][A
  4

Wrote file /content/drive/My Drive/AG-CNN/tfrecord_train_ap/07-2000.tfrec containing 2000 records


In [None]:
def read_tfrecord(example):
    features = {
        "image": tf.io.FixedLenFeature([], tf.string),  # tf.string = bytestring (not text string)
        "label": tf.io.VarLenFeature(tf.float32)        # a certain number of floats0
        "size":  tf.io.FixedLenFeature([2], tf.int64),  # two integers
    }
    # decode the TFRecord
    example = tf.io.parse_single_example(example, features)
    
    # FixedLenFeature fields are now ready to use: exmple['size']
    # VarLenFeature fields require additional sparse_to_dense decoding
    
    image = tf.image.decode_jpeg(example['image'], channels=3)
    label  = tf.reshape(tf.sparse.to_dense(example['label']), [-1, 14])
    height = example['size'][0]
    width  = example['size'][1]
    image = tf.reshape(image, [height, width, 3])
    return image, label

In [None]:
# read from TFRecords. For optimal performance, read from multiple
# TFRecord files at once and set the option experimental_deterministic = False
# to allow order-altering optimizations.

option_no_order = tf.data.Options()
option_no_order.experimental_deterministic = False

filenames = tf.io.gfile.glob(GCS_OUTPUT + "*.tfrec")
train_dsr = tf.data.TFRecordDataset(filenames, num_parallel_reads=tf.data.experimental.AUTOTUNE)
train_dsr = train_dsr.with_options(option_no_order)
train_dsr = train_dsr.map(read_tfrecord, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dsr = train_dsr.shuffle(300)