In [1]:
import os
import pandas as pd
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm

In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [7]:
# tpu 사용을 위한 준비
# tpu_name = 'grpc://'+os.environ['COLAB_TPU_ADDR']

# resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=tpu_name)

# tf.config.experimental_connect_to_cluster(resolver)
# tf.tpu.experimental.initialize_tpu_system(resolver)

# strategy = tf.distribute.TPUStrategy(resolver)

# gpu사용
strategy = tf.distribute.MirroredStrategy(['GPU:0'])

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
data_dir = './../../data/4d-block/'
train_df = pd.read_csv(os.path.join(data_dir,'train.csv'))
test_df = pd.read_csv(os.path.join(data_dir,'test.csv'))

In [None]:
train_df.head()

Unnamed: 0,id,img_path,A,B,C,D,E,F,G,H,I,J
0,TRAIN_00000,./train/TRAIN_00000.jpg,1,0,0,0,0,0,0,0,0,0
1,TRAIN_00001,./train/TRAIN_00001.jpg,1,0,0,0,0,0,0,0,0,0
2,TRAIN_00002,./train/TRAIN_00002.jpg,1,0,0,0,0,0,0,0,0,0
3,TRAIN_00003,./train/TRAIN_00003.jpg,1,0,0,0,0,0,0,0,0,0
4,TRAIN_00004,./train/TRAIN_00004.jpg,1,0,0,0,0,0,0,0,0,0


In [None]:
train_df.isnull().sum()

id          0
img_path    0
A           0
B           0
C           0
D           0
E           0
F           0
G           0
H           0
I           0
J           0
dtype: int64

In [None]:
train, val = train_test_split(train_df, test_size=0.35, random_state=2023)

In [None]:
train.reset_index(drop=True, inplace=True)
val.reset_index(drop=True, inplace=True)

In [None]:
train.shape, val.shape

((21446, 12), (11548, 12))

In [None]:
def _bytes_feature(value):
    if isinstance(value,type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))

def _float_feature(value):
    return tf.train.Feature(float_list = tf.train.FloatList(value = [value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
train.head(3)

Unnamed: 0,id,img_path,A,B,C,D,E,F,G,H,I,J
0,TRAIN_15562,./train/TRAIN_15562.jpg,1,1,0,1,0,0,1,0,1,1
1,TRAIN_18654,./train/TRAIN_18654.jpg,0,0,1,1,1,1,1,1,0,0
2,TRAIN_17763,./train/TRAIN_17763.jpg,0,1,1,1,0,0,0,1,1,1


In [88]:
writer_image = tf.io.TFRecordWriter(os.path.join(data_dir,'image_train.tfrecord'))

for i in tqdm(range(train.shape[0])):
    src = cv2.imread(os.path.join(data_dir,train.iloc[i,1]))
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst,(img_size,img_size), interpolation = cv2.INTER_CUBIC)
    bimage = dst.tobytes()

    classes = np.array(train.loc[i,'A':'J'], dtype=np.uint8).tobytes()

    example = tf.train.Example(
        features = tf.train.Features(
            feature={
            'image':_bytes_feature(bimage),
            'class': _bytes_feature(classes)
            }
        )
    )

    writer_image.write(example.SerializeToString())

writer_image.close()

NameError: ignored

In [None]:
writer_image = tf.io.TFRecordWriter(os.path.join(data_dir,'image_val.tfrecord'))

for i in tqdm(range(val.shape[0])):
    src = cv2.imread(os.path.join(data_dir,val.iloc[i,1]))
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst,(img_size,img_size), interpolation = cv2.INTER_CUBIC)
    bimage = dst.tobytes()

    classes = np.array(val.loc[i,'A':'J'], dtype=np.uint8).tobytes()

    example = tf.train.Example(
        features = tf.train.Features(
            feature={
            'image':_bytes_feature(bimage),
            'class': _bytes_feature(classes)
            }
        )
    )

    writer_image.write(example.SerializeToString())

writer_image.close()

100%|███████████████████████████████████████████████████████████████████████████| 11548/11548 [00:42<00:00, 274.72it/s]


In [None]:
writer_image = tf.io.TFRecordWriter(os.path.join(data_dir,'image_test.tfrecord'))

for i in tqdm(range(test_df.shape[0])):
    src = cv2.imread(os.path.join(data_dir,test_df.iloc[i,1]))
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst,(img_size,img_size), interpolation = cv2.INTER_CUBIC)
    bimage = dst.tobytes()

    classes = np.array(test_df.loc[i,'A':'J'], dtype=np.uint8).tobytes()

    example = tf.train.Example(
        features = tf.train.Features(
            feature={
            'image':_bytes_feature(bimage),
            'class': _bytes_feature(classes)
            }
        )
    )

    writer_image.write(example.SerializeToString())

writer_image.close()

100%|█████████████████████████████████████████████████████████████████████████████| 1460/1460 [00:09<00:00, 147.62it/s]
