# Import

In [1]:
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import numpy as np
import pathlib
import os
import datetime

AUTOTUNE = tf.data.experimental.AUTOTUNE

In [2]:
print("tensorflow version check : ", tf.__version__)
print("gpu check : ",tf.test.is_gpu_available())

tensorflow version check :  2.0.0
gpu check :  True


# Data processing Pipeline

## Data 불러오기
### 이미지 파일들을 그대로 불러오면 용량이 너무 크니 경로를 지정하여 다룸

In [3]:
train_dir = pathlib.Path('../../before_folder/image_train/Training')
private_test_dir = pathlib.Path('../../before_folder/image_train/PrivateTest')
public_test_dir = pathlib.Path('../../before_folder/image_train/PublicTest')

In [4]:
# 각 경로 별 데이터 갯수 확인
print(len(list(train_dir.glob('*/*.jpg'))))
print(len(list(public_test_dir.glob('*/*.jpg'))))
print(len(list(private_test_dir.glob('*/*.jpg'))))

28708
3589
3589


#### dataset 만들기

In [5]:
list_ds = tf.data.Dataset.list_files(str(train_dir/'*/*.jpg'))
val_list_ds = tf.data.Dataset.list_files(str(public_test_dir/'*/*.jpg'))

In [6]:
#확인하기
for f in list_ds.take(5) : 
    print(f.numpy())

b'..\\..\\before_folder\\image_train\\Training\\Fear\\21462.jpg'
b'..\\..\\before_folder\\image_train\\Training\\Angry\\16652.jpg'
b'..\\..\\before_folder\\image_train\\Training\\Happy\\17203.jpg'
b'..\\..\\before_folder\\image_train\\Training\\Surprise\\25483.jpg'
b'..\\..\\before_folder\\image_train\\Training\\Fear\\11999.jpg'


## Data labeling을 위한 전처리

In [7]:
CLASS_NAMES = np.array([item.name for item in train_dir.glob('*')])

CLASS_NAMES

array(['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'],
      dtype='<U8')

In [8]:
CLASS_NAME_TO_ID = {}
for id, name in enumerate(CLASS_NAMES):
    CLASS_NAME_TO_ID[name] = id
    
print(CLASS_NAME_TO_ID)

{'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3, 'Neutral': 4, 'Sad': 5, 'Surprise': 6}


In [9]:
keys = list(CLASS_NAME_TO_ID.keys())
ids = list(CLASS_NAME_TO_ID.values())
print(keys,ids)

['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'] [0, 1, 2, 3, 4, 5, 6]


In [10]:
table = tf.lookup.StaticHashTable(
    initializer= tf.lookup.KeyValueTensorInitializer(
    keys= tf.constant(keys),
    values=tf.constant(ids),),
    default_value=tf.constant(-1),
    name="class_weight")

In [11]:
BATCH_SIZE = 64
IMG_HEIGHT = 96
IMG_WIDTH = 96

## data labeling을 위한 함수 선언

In [12]:
def get_label(file_path) :
    parts = tf.strings.split(file_path, '\\')
    return table.lookup(parts[-2])

In [13]:
def decode_img(img):
    img = tf.image.decode_jpeg(img,channels=1)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return tf.image.resize(img,[IMG_WIDTH,IMG_HEIGHT])

In [14]:
def process_path(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

### labeling 된 데이터셋 만들기

In [15]:
labeled_ds = list_ds.shuffle(10000).map(process_path, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE)
val_labeled_ds = val_list_ds.map(process_path,num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE)

In [16]:
for image, label in labeled_ds.take(1) :
    print("image shape : ", image.numpy().shape)
    print("Label : ", label.numpy())
    
for i in val_labeled_ds.take(1):
    print('val_img_shape : ',i[0].shape)

image shape :  (64, 96, 96, 1)
Label :  [4 3 4 6 3 4 0 4 2 2 2 3 3 3 2 3 4 3 4 0 3 4 0 0 4 0 3 5 2 3 5 0 4 4 2 3 3
 2 5 5 3 3 3 3 0 3 5 4 5 4 0 3 3 3 6 0 4 0 3 2 3 0 2 5]
val_img_shape :  (64, 96, 96, 1)


# Model 만들기  

In [17]:
def create_model() :
    inputs = keras.Input(shape=(96, 96,1), name='img')

    feature = tf.keras.layers.Conv2D(96, 3, padding = 'same', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3))(inputs)
    feature = tf.keras.layers.BatchNormalization()(feature)
    feature = tf.keras.layers.ReLU()(feature)
    feature = tf.keras.layers.Conv2D(128, 3, padding='same')(feature)
    feature = tf.keras.layers.BatchNormalization()(feature)
    feature = tf.keras.layers.ReLU()(feature)
    feature = tf.keras.layers.MaxPooling2D()(feature)
    feature = tf.keras.layers.Dropout(0.25)(feature)


    feature = tf.keras.layers.Conv2D(96, 3, padding='same')(feature)
    feature = tf.keras.layers.BatchNormalization()(feature)
    feature = tf.keras.layers.ReLU()(feature)
    feature = tf.keras.layers.MaxPooling2D()(feature)
    feature = tf.keras.layers.Dropout(0.25)(feature)

    #feature = tf.keras.layers.Flatten()(feature)
    feature = tf.keras.layers.GlobalAveragePooling2D()(feature)
    feature = tf.keras.layers.Dense(96, activation='relu')(feature)
    feature = tf.keras.layers.Dense(32, activation='relu')(feature)
    outputs = tf.keras.layers.Dense(7, activation='softmax')(feature)

    model = keras.Model(inputs=inputs, outputs=outputs)
    
    return model

In [18]:
model = create_model()

In [19]:
train_len = len(list(list_ds))
val_len = len(list(val_list_ds))

steps_per_epoch = train_len // BATCH_SIZE
validation_steps = val_len // BATCH_SIZE
type(labeled_ds)

tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter

In [26]:
checkpoint_dir = pathlib.Path("../../before_folder/model_save/cp-{epoch:04d}-{val_loss:.2f}.hdf5")

log_dir = pathlib.Path("../../before_folder/logs/"+datetime.datetime.now().strftime("%Y%m%d"))
os.makedirs(log_dir)

In [27]:
EPOCH = 60

def train_model(model):
    
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir,
                                                         histogram_freq = 1)
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                                   period=5)
    early_stopping_checkpoint = tf.keras.callbacks.EarlyStopping(patience=5)
    
    history = model.fit(labeled_ds.repeat(),
                        epochs=EPOCH,
                        steps_per_epoch = steps_per_epoch,
                        validation_data = val_labeled_ds,
                        validation_steps = validation_steps,
                        callbacks = [tensorboard_callback,
                                     model_checkpoint_callback,
                                     early_stopping_checkpoint])
    
    return history

In [30]:
history = train_model(model)

Train for 448 steps, validate for 56 steps
Epoch 1/60
  1/448 [..............................] - ETA: 6:55 - loss: 1.8400 - accuracy: 0.2031

ProfilerNotRunningError: Cannot stop profiling. No profiler is running.

In [26]:
print(len(model.trainable_variables))

model.save_weights(checkpoint_path.format(epoch=0))
print("model save on")
print("-----------------Training Start-----------------")
model.fit(labeled_ds,epochs= 100, callbacks=[cp_callback],
         validation_data=val_labeled_ds,
         verbose=1) #60 #100 #100 #100

18
model save on
-----------------Training Start-----------------
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00009: saving model to ../../before_folder/model_save/cp-0009.ckpt
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 00019: saving model to ../../before_folder/model_save/cp-0019.ckpt
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


KeyboardInterrupt: 

# Model Test

In [20]:
model.evaluate(val_labeled_ds)



[1.726860144682098, 0.48509336]