# 베트남 음식 분류하기

데이터셋: [30VNFoods](https://www.kaggle.com/datasets/quandang/vietnamese-foods)
- 30가지 베트남 음식 이미지를 모아놓은 것
- Banh xeo, Pho 등 다양한 이미지들이 있다.

![image.png](attachment:image.png)

30종류의 베트남 음식 중 10개의 카테고리를 선별해 실습을 진행할 예정이다.


**평가문항**  
1. 주어진 함수들과 TensorFlow Data API를 이용해서 dataloader를 구현했다.
    - dataloader를 만드는 과정이 에러 없이 수행되었다
2. 베트남 음식 사진 분류를 위한 모델을 구현했다.	
    - EfficientNetB0 backbone과 Dense 레이어를 결합하여 모델을 성공적으로 구현했다.
3. TensorFlow GradientTape을 이용해서 custom trainer 클래스를 구현했다.
    - 학습이 진행되면서 training accuracy가 점차 증가하였다.

In [1]:
import os
import pathlib
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

## Load Data

### 손상된 이미지 파일 삭제

In [2]:
print(os.getcwd()) # 현재 작업 디렉토리 출력

/aiffel/aiffel/vnfoods


In [3]:
data_path = '/aiffel/aiffel/vnfoods/data/30vnfoods/'
train_path = data_path + 'Train/'
test_path = data_path + 'Test/'

In [4]:
for path in [train_path, test_path]:
    classes = os.listdir(path)

    for food in classes:
        food_path = os.path.join(path, food)
        images = os.listdir(food_path)
        
        for image in images:
            with open(os.path.join(food_path, image), 'rb') as f:
                bytes = f.read()
            if bytes[:3] != b'\xff\xd8\xff':
                print(os.path.join(food_path, image))
                os.remove(os.path.join(food_path, image))

/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/88.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/781.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/26.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/634.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/45.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/677.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh cuon/350.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh mi/860.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Banh mi/623.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun dau mam tom/351.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun dau mam tom/175.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Chao long/86.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun rieu/647.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun rieu/106.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun rieu/758.jpg
/aiffel/aiffel/vnfoods/data/30vnfoods/Train/Bun rieu/89.jpg
/aiffel/a

### 삭제되고 난 후의 훈련 데이터 수

In [4]:
classes = os.listdir(train_path)
train_length = 0

for food in classes:
    food_path = os.path.join(train_path, food)
    images = os.listdir(food_path)
    
    train_length += len(images)

print('training data의 개수: '+str(train_length))

training data의 개수: 9775


## 해결할 문제

### 문제1: dataloader 구현하기
주어진 함수들과 TensorFlow Data API를 이용해서 dataloader 구현

#### `process_path` 함수
- file_path로 부터 class label을 만들고, 이미지를 읽는 함수
- 이미지 크기를 (224, 224)로 맞춘다.

In [5]:
def process_path(file_path, class_names, img_shape=(224, 224)):
    label = tf.strings.split(file_path, os.path.sep)
    label = label[-2] == class_names

    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, img_shape)
    return img, label

#### `prepare_for_training` 함수
TensorFlow Data API를 이용해 data batch를 만드는 함수

In [6]:
def prepare_for_training(ds, batch_size=32, cache=True, shuffle_buffer_size=1000):
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.repeat(1000)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return ds

#### `load_data` 함수
- 데이터를 만들기 위해 필요한 함수들을 호출하고 데이터를 리턴해주는 함수
- TensorFlow Dataset 객체를 생성하고 process_path 함수로 이미지와 라벨을 묶은 다음,
- prepare_for_training 함수로 batch가 적용된 Dataset 객체를 만들준다.

In [7]:
def load_data(data_path, batch_size=32):
    class_names = [cls for cls in os.listdir(data_path) if cls != '.DS_Store']
    data_path = pathlib.Path(data_path)

    list_ds = tf.data.Dataset.list_files(str(data_path/'*/*'))
    labeled_ds = list_ds.map(lambda x: process_path(x, class_names, img_shape=(224, 224)))
    ds = prepare_for_training(labeled_ds, batch_size=batch_size)

    return ds

### 문제2: 모델 구현하기
- EfficientNetB0 backbone과 Dense 레이어를 결합하여 모델 구현
    - Classification 문제로 접근할 것이기 때문에 맨 마지막 Dense 레이어에 우리가 원하는 클래스 개수만큼을 지정해 준다.
- 다른 방법: 다른 모델을 사용하여 backbone 구현

In [8]:
from tensorflow.keras.applications import EfficientNetB0

class Model(tf.keras.Model):

    def __init__(self, num_classes=5, freeze=False):
        super(Model, self).__init__()
        self.base_model = EfficientNetB0(include_top=False, weights='imagenet')
        if freeze:
            self.base_model.trainable = False
        self.top = tf.keras.Sequential([tf.keras.layers.GlobalAveragePooling2D(name="avg_pool"),
                                       tf.keras.layers.BatchNormalization(),
                                       tf.keras.layers.Dropout(0.5, name="top_dropout")])
        self.classifier = tf.keras.layers.Dense(num_classes, activation="softmax", name="pred")

    def call(self, inputs, training=True):
        x = self.base_model(inputs)
        x = self.top(x)
        x = self.classifier(x)
        return x

### 문제3: custom trainer 구현하기
- TensorFlow GradientTape를 이용해서 custom trainer 클래스 구현
- 학습이 진행되면서 training accuracy가 점차 증가할 수 있도록 한다.

In [9]:
class Trainer:
    
    def __init__(self, model, epochs, batch, ds_length, loss_fn, optimizer):
        self.model = model
        self.epochs = epochs
        self.batch = batch
        self.ds_length = ds_length
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        
    def train(self, train_dataset, train_metric):
        for epoch in range(self.epochs):
            print("\nStart of epoch %d" % (epoch+1,))
            # 매 batch 마다 반복적으로 학습
            for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    logits = model(x_batch_train, training=True)
                    loss_value = self.loss_fn(y_batch_train, logits)
                grads = tape.gradient(loss_value, model.trainable_weights)
                self.optimizer.apply_gradients(zip(grads, model.trainable_weights))
                # train metric 업데이트
                train_metric.update_state(y_batch_train, logits)
                # 5 배치마다 로깅
                if step % 5 == 0:
                    print(
                        "Training loss (for one batch) at step %d: %.4f"
                        % (step, float(loss_value))
                    )
                    print("Seen so far: %d samples" % ((step + 1) * self.batch))
                    print(train_metric.result().numpy())

            # 마지막 epoch 학습이 끝나면 train 결과를 보여줌
            train_acc = train_acc_metric.result()
            print("Training acc over epoch: %.4f" % (float(train_acc),))

### 모델 학습

In [None]:
train_path = "/aiffel/aiffel/vnfoods/data/30vnfoods/Train"

epoch = 10
batch = 16

model = Model(num_classes=10)
dataset = load_data(data_path=train_path, batch_size=batch)
loss_function = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
trainer = Trainer(model=model,
                epochs=epoch,
                batch=batch,
                ds_length=train_length,
                loss_fn=loss_function,
                optimizer=optimizer)

trainer.train(train_dataset=dataset,
            train_metric=train_acc_metric)


Start of epoch 1
Training loss (for one batch) at step 0: 4.2520
Seen so far: 16 samples
0.0
Training loss (for one batch) at step 5: 2.9466
Seen so far: 96 samples
0.15625
Training loss (for one batch) at step 10: 3.1226
Seen so far: 176 samples
0.2215909
Training loss (for one batch) at step 15: 2.8298
Seen so far: 256 samples
0.2421875
Training loss (for one batch) at step 20: 1.8093
Seen so far: 336 samples
0.2827381
Training loss (for one batch) at step 25: 2.1559
Seen so far: 416 samples
0.3221154
Training loss (for one batch) at step 30: 1.1919
Seen so far: 496 samples
0.3467742
Training loss (for one batch) at step 35: 3.4917
Seen so far: 576 samples
0.36458334
Training loss (for one batch) at step 40: 2.3134
Seen so far: 656 samples
0.3871951
Training loss (for one batch) at step 45: 2.0515
Seen so far: 736 samples
0.4035326
Training loss (for one batch) at step 50: 1.7643
Seen so far: 816 samples
0.41911766
Training loss (for one batch) at step 55: 1.6483
Seen so far: 896 sa

**Error**
1. ResourceExhaustedError: failed to allocate memory [Op:Mul]
 > 해결: 인터넷 캐시 지움  
 밑에 방법은 해결을 하지 못했다.
 
2. Kernel Restarting  
The kernel appears to have died. It will restart automatically. 
 > 해결: 배치 사이즈 바꿈  
 거진 6시간 돌렸더니 에러떠서 다시 돌리는 중..  
 95% 정확도까지 올라갔다.

### 모델 테스트

In [None]:
test_path = "/aiffel/aiffel/vnfoods/data/30vnfoods/Test"

test_ds = load_data(data_path=test_path)

for step_train, (x_batch_train, y_batch_train) in enumerate(test_ds.take(10)):
    prediction = model(x_batch_train)
    print("{}/{}".format(np.array(tf.equal(tf.argmax(y_batch_train, axis=1), tf.argmax(prediction, axis=1))).sum(), tf.argmax(y_batch_train, axis=1).shape[0]))