# Create TFRecord Dataset

In [0]:
!pip install -q efficientnet
!pip install -q iterative-stratification

In [0]:
import os 
import pandas as pd
import math, re, gc
import numpy as np 
import pickle
from datetime import datetime, timedelta
import tensorflow as tf
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import efficientnet.tfkeras as efn
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
import tensorflow.keras.layers as L

print('TensorFlow version', tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE

In [0]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print('Replicas:', strategy.num_replicas_in_sync)

In [0]:
EPOCHS = 5
IMAGE_SIZE = [512, 512] 
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

DATA_PATH = '../input/aimongo-img/'

In [0]:
TRAINING_FILENAMES = tf.io.gfile.glob(DATA_PATH  + '/C1-P1_Train/*.jpg')
TEST_FILENAMES = tf.io.gfile.glob(DATA_PATH  + '/C1-P1_Dev/*.jpg')

In [0]:
TRAINING_LABEL = pd.read_csv(DATA_PATH +'/train.csv')
img_map = {'A':0,'B':1,'C':2}
label_map=  dict(zip(TRAINING_LABEL['image_id'], TRAINING_LABEL['label'].map(img_map)))

train_img_list = [img_file.split('/')[-1] for img_file in TRAINING_FILENAMES]
LABEL = [label_map[img] for img in train_img_list]

In [0]:
%%capture
mskf = MultilabelStratifiedKFold(n_splits=5,random_state=0)

In [0]:
fix_fold = 4
for fold ,(trn_, val_) in enumerate(mskf.split(TRAINING_FILENAMES,pd.get_dummies(LABEL).values)):
    if fold == fix_fold:
        TRAIN_IMG = list(np.array(TRAINING_FILENAMES)[trn_])
        TRAIN_LABEL = list(np.array(LABEL)[trn_])
        VAL_IMG = list(np.array(TRAINING_FILENAMES)[val_])
        VAL_LABEL = list(np.array(LABEL)[val_])

In [0]:
tr_tfrecord_file = 'train.tfrecords'
with tf.io.TFRecordWriter(tr_tfrecord_file) as writer:
    for image, label in zip(TRAIN_IMG, TRAIN_LABEL):
        image = open(image, 'rb').read()     
        feature = {                             
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),  
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))  
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature)) 
        writer.write(example.SerializeToString())   
    writer.close()

In [0]:
val_tfrecord_file = 'val.tfrecords'
with tf.io.TFRecordWriter(val_tfrecord_file) as writer:
    for image, label in zip(VAL_IMG, VAL_LABEL):
        image = open(image, 'rb').read()     
        feature = {                             
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), 
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))  
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature)) 
        writer.write(example.SerializeToString())   
    writer.close()

In [0]:
dev_tfrecord_file = 'dev.tfrecords'
IMG_NAME = [bytes(img_file.split('/')[-1],'utf8') for img_file in TEST_FILENAMES]
with tf.io.TFRecordWriter(dev_tfrecord_file) as writer:
    for image,img_name in zip(TEST_FILENAMES, IMG_NAME):
        image = open(image, 'rb').read()    
        feature = {                             
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), 
            'image_name': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_name])) 
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())   
    writer.close()