In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder


datasetDir = "../DataP2_crop/Train"
testDir = "../DataP2_crop/Dev"

datasetCSV = "../DataP2_crop/train.csv"
testCSV = "../DataP2_crop/dev.csv"

header = ["ImageName", "Level","x","y","w","h"]
dataset_df = pd.read_csv(datasetCSV, names=header) #將.csv檔案資料讀入
test_df = pd.read_csv(testCSV, names=header)


# 將 A, B, C 轉成 0, 1, 2
labelencoder = LabelEncoder()
dataset_df['Level'] = labelencoder.fit_transform(dataset_df['Level'])
test_df['Level'] = labelencoder.fit_transform(test_df['Level'])




# 隨機打亂訓練集
# 切成訓練與驗證
data_num = dataset_df.shape[0]
indexes = np.random.permutation(data_num) #將0-data_num的編號打亂

train_indexes = indexes[:int(data_num * 0.7)]
valid_indexes = indexes[int(data_num * 0.7):]

train_df = dataset_df.loc[train_indexes]
valid_df = dataset_df.loc[valid_indexes]

print("Train_df shape: {}".format(train_df.shape[0]))
print("Train_df:\n{}".format(train_df.head()))
print('-' * 30)
print("Valid_df shape: {}".format(valid_df.shape[0]))
print("Valid_df:\n {}".format(valid_df.head()))
print('-' * 30)
print("Test_df shape: {}".format(test_df.shape[0]))
print("Test_df:\n{}".format(test_df.head()))

# 1. 將train dataset轉成train.tfrecord

train_file = []  #每張照片的路徑
train_label = [] #Label
for name in train_df['ImageName']:
    train_file.append(os.path.join(datasetDir, name))
for level in train_df['Level']:
    train_label.append(level) 


train_tfrecord = "train.tfrecord"
with tf.io.TFRecordWriter(train_tfrecord) as writer:
    print("Transforming Train Dataset to tfrecord")
    for filename, label in zip(train_file, train_label):
        image = open(filename, 'rb').read()
        feature = {
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), 
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
        
# 2. 將valid dataset轉成valid.tfrecord
valid_file = []
valid_label = []
for name in valid_df['ImageName']:
    valid_file.append(os.path.join(datasetDir, name))
for level in valid_df['Level']:
    valid_label.append(level)

valid_tfrecord = "valid.tfrecord"
with tf.io.TFRecordWriter(valid_tfrecord) as writer:
    print("Transforming Valid Dataset to tfrecord")
    for filename, label in zip(valid_file, valid_label):
        image = open(filename, 'rb').read()
        feature = { 
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
        
# 3. 將test dataset轉成test.tfrecord
test_file = []
test_label = []
for name in test_df['ImageName']:
    test_file.append(os.path.join(testDir, name))
for level in test_df['Level']:
    test_label.append(level)

test_tfrecord = "test.tfrecord"
with tf.io.TFRecordWriter(test_tfrecord) as writer:
    print("Transforming Test Dataset to tfrecord")
    for filename, label in zip(test_file, test_label):
        image = open(filename, 'rb').read()
        feature = {
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
 

Train_df shape: 31499
Train_df:
       ImageName  Level    x    y    w    h
3120   52165.jpg      0    9    2  818  686
17306  49190.jpg      2  403   36  452  417
31237  75680.jpg      1  367  177  307  285
43933  12325.jpg      1  415  248  341  289
14511  69093.jpg      0   26   91  498  404
------------------------------
Valid_df shape: 13501
Valid_df:
        ImageName  Level    x    y    w    h
23683  36411.jpg      0  326   29  411  358
1406   91514.jpg      1   62  159  546  518
9773   06123.jpg      0  469  253  270  238
16098  20961.jpg      1  473   26  501  607
29654  41257.jpg      0  254  367  413  334
------------------------------
Test_df shape: 7000
Test_df:
   ImageName  Level    x    y    w    h
0  32783.jpg      0  479  268  346  261
1  24935.jpg      1  493  282  319  257
2  30841.jpg      0  493  189  327  269
3  57713.jpg      0  463  258  337  276
4  85593.jpg      0  453  290  331  275
Transforming Train Dataset to tfrecord
Transforming Valid Dataset to tfrecor