In [1]:
import tensorflow.compat.v2 as tfc
import tensorflow as tf

import pathlib
import numpy as np
import pandas as pd
import IPython.display as display

tf.compat.v1.enable_eager_execution()

In [2]:
def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
        
    if isinstance(value, str):
        value = str.encode(value)
        
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [3]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [4]:
def get_labels(img_name, df):
    row = df.loc[df['filename'] == img_name]
    
    labels = {}
    
    for rowname in row:
        value = row[rowname].values[0]
        
        if isinstance(value, str):
            labels['filename'] = _bytes_feature(value)
        else:
            labels[rowname] = _int64_feature(value)
            
    return labels

In [5]:
def create_example(img_string, labels):
    img_shape = tf.image.decode_png(img_string).shape
    
    feature = {
        'height': _int64_feature(img_shape[0]),
        'width': _int64_feature(img_shape[1]),
        'depth': _int64_feature(img_shape[2]),
    }
    
    feature = dict(feature, **labels)
    feature['raw'] = _bytes_feature(img_string)
    
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [6]:
data_dir = pathlib.Path('../../datasets/celeba')
img_dir = pathlib.Path(str(data_dir) + '/img_align_celeba_png')
attr_dir = pathlib.Path(str(data_dir) + '/attr_celeba.csv')

In [7]:
df = pd.read_csv(attr_dir)
df.replace(to_replace = -1, value = False, inplace = True)
df.replace(to_replace = 1, value = True, inplace = True)

In [10]:
df

Unnamed: 0,filename,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.png,False,True,True,False,False,False,False,False,False,...,False,True,True,False,True,False,True,False,False,True
1,000002.png,False,False,False,True,False,False,False,True,False,...,False,True,False,False,False,False,False,False,False,True
2,000003.png,False,False,False,False,False,False,True,False,False,...,False,False,False,True,False,False,False,False,False,True
3,000004.png,False,False,True,False,False,False,False,False,False,...,False,False,True,False,True,False,True,True,False,True
4,000005.png,False,True,True,False,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202594,202595.png,False,False,True,False,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True
202595,202596.png,False,False,False,False,False,True,True,False,False,...,False,True,True,False,False,False,False,False,False,True
202596,202597.png,False,False,False,False,False,False,False,False,True,...,False,True,False,False,False,False,False,False,False,True
202597,202598.png,False,True,True,False,False,False,True,False,True,...,False,True,False,True,True,False,True,False,False,True


In [8]:
img_paths = list(img_dir.glob('*'))

In [9]:
examples = []
counter = 0
for img_path in img_paths:
    img_string = open(img_path, 'rb').read()
    examples.append(create_example(img_string, get_labels(str(img_path).split('/')[-1], df)))
    counter += 1
    print('%d / 202599\r' % (counter), end='', flush=True)
    

202599 / 202599

In [11]:
record_file = 'images.tfrecords'
counter = 0
with tf.io.TFRecordWriter(record_file) as writer:
    for tf_example in examples:
        writer.write(tf_example.SerializeToString())
    
        counter += 1
        print('%d / 202599\r' % (counter), end='', flush=True)
    

202599 / 202599