In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

# resnet ablation study
## resnet의 구조
![resnet](https://github.com/user-attachments/assets/065687c6-2711-4296-8844-7bf3099437b9)
![resnet_block](https://github.com/user-attachments/assets/e9ed45ab-5189-49cb-8205-4626c1ec89c8)
![resnet_structure](https://github.com/user-attachments/assets/2a0b8e9e-8a3c-465f-92e4-ea9a9a5c69fd)

## 작성해볼 resnet은 34와 50
### 공통점
- conv block을 반복해서 쌓은 형태
- 7x7커널의 conv1과 3x3의 maxpool층을 거친 후 conv block으로 진행  

### 차이점
- conv block의 내부 구조가 다름
    - 34에서는 3x3커널을 사용한 2개의 conv 레이어로 구성되어 있지만 50에서는 1x1, 3x3, 1x1의 3개의 conv 레이어로 구성되어 있음
    - bottleneck 구조
        - 계산 효율성을 높이고 네트워크의 깊이를 증가 시킬 수 있음  
        
## 코드 진행
1. resnet의 conv블럭과 이것을 포함한 resnet 빌드함수 구현
    - 34층 50층을 선택할 수 있도록 is_50 구성
    - plain모델과 residual connection기능을 구현한 is_plain 구성
2. 데이터셋 구성
3. resnet_34부터 plain_34, resnet_50, plain_50순으로 빌드 및 훈련, 기록
    - 각 모델별 사용 후 메모리 해제를 통해 메모리 관리
4. 결과 시각화

In [2]:
def build_resnet_block(input_layer, num_cnn=3, channel=64, block_num=1,is_50 = False,is_plain = False):
    x = input_layer
    if is_50:
        is50_n = 1
    else:
        is50_n = 0
    if is_plain:
        isplain_n = 1
    else:
        isplain_n = 0
        
    if not is_50:
        for i in range(num_cnn):
            identity = x
            if block_num !=0 and i==0:
                stride = 2
            else:
                stride = 1
            x = keras.layers.Conv2D(filters=channel, kernel_size = 3, activation='relu', kernel_initializer='he_normal', strides=stride, padding = 'same', name=f'stage{block_num}_{i}_conv1_{is50_n}{isplain_n}')(x)
            x = keras.layers.BatchNormalization()(x)
            x = keras.layers.Activation('relu')(x)

            x = keras.layers.Conv2D(filters=channel, kernel_size = 3, activation='relu', kernel_initializer='he_normal', padding = 'same', name=f'stage{block_num}_{i}_conv2_{is50_n}{isplain_n}')(x)
            x = keras.layers.BatchNormalization()(x)

            #플레인 모델에서는 잔차연결x
            if not is_plain:
                # identity mapping의 channel 수를 맞추기 위해 1x1 Convolution을 사용합니다.
                if i==0:
                    identity = keras.layers.Conv2D(filters=x.shape[-1], kernel_size=1, strides=stride, padding='same', name=f'identity_{block_num}_{i}_{is50_n}{isplain_n}')(identity)
                #add
                x = keras.layers.Add()([x, identity])
                x = keras.layers.Activation('relu')(x)
    else:
        for i in range(num_cnn):
            identity = x
            if block_num !=0 and i==0:
                stride = 2
            else:
                stride = 1
            x = keras.layers.Conv2D(filters=channel, kernel_size = 1, activation='relu', kernel_initializer='he_normal', strides=stride, padding = 'same', name=f'stage{block_num}_{i}_conv1_{is50_n}{isplain_n}')(x)
            x = keras.layers.BatchNormalization()(x)
            x = keras.layers.Activation('relu')(x)

            x = keras.layers.Conv2D(filters=channel, kernel_size = 3, activation='relu', kernel_initializer='he_normal', padding = 'same', name=f'stage{block_num}_{i}_conv2_{is50_n}{isplain_n}')(x)
            x = keras.layers.BatchNormalization()(x)
            x = keras.layers.Activation('relu')(x)

            x = keras.layers.Conv2D(filters=channel*4, kernel_size = 1, activation='relu', kernel_initializer='he_normal', padding = 'same', name=f'stage{block_num}_{i}_conv3_{is50_n}{isplain_n}')(x)
            x = keras.layers.BatchNormalization()(x)

            #플레인 모델에서는 잔차연결x
            if not is_plain:
                identity = keras.layers.Conv2D(filters=channel*4, kernel_size = 1, activation='relu', kernel_initializer='he_normal', strides=stride, padding = 'same', name=f'identity_{block_num}_{i}_{is50_n}{isplain_n}')(identity)
                identity = keras.layers.BatchNormalization()(identity)
                #add
                x = keras.layers.Add()([x, identity])
                x = keras.layers.Activation('relu')(x)
    return x

In [3]:
def build_resnet(input_shape=(32,32,3),
              num_cnn_list=[3,4,6,3],
              channel_list=[64,128,256,512],
              num_classes=10,is_50 = False, is_plain = False):

    assert len(num_cnn_list) == len(channel_list)

    input_layer = keras.layers.Input(shape=input_shape)
    output = input_layer
    output = keras.layers.Conv2D(filters=64, kernel_size = 7, activation='relu', strides = 2, padding = 'same')(output)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.Activation('relu')(output)

    # conv2 pooling
    output = keras.layers.MaxPooling2D(pool_size = 3, strides = 2, padding = 'same')(output)

    for i, (num_cnn, channel) in enumerate(zip(num_cnn_list, channel_list)):
        output = build_resnet_block(
            output,
            num_cnn=num_cnn,
            channel=channel,
            block_num=i,
            is_50 = is_50,
            is_plain = is_plain
        )

    output = keras.layers.AveragePooling2D(pool_size=2, padding = 'same', name='avg_pool')(output)
    output = keras.layers.Flatten(name='flatten')(output)
    output = keras.layers.Dense(num_classes, activation='softmax', name='fc1000')(output)

    model = keras.models.Model(inputs=input_layer, outputs=output)
    return model

In [4]:
resnet_34 = build_resnet(input_shape=(224,224,3))
resnet_34.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 112, 112, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 112, 112, 64) 256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 112, 112, 64) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [5]:
import tensorflow_datasets as tfds
import urllib3
urllib3.disable_warnings()

In [6]:
# 데이터셋 로드 및 전처리
def preprocess(image, label, target_size=(224, 224)):
    image = tf.image.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image, label

def load_data(dataset_name='cats_vs_dogs', batch_size=32, target_size=(224, 224)):
    (train_ds, val_ds), ds_info = tfds.load(
        dataset_name,
        split=['train[:80%]', 'train[80%:]'],
        as_supervised=True,
        with_info=True,
    )
    num_classes = ds_info.features['label'].num_classes
    train_ds = train_ds.map(lambda x, y: preprocess(x, y, target_size)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.map(lambda x, y: preprocess(x, y, target_size)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return train_ds, val_ds, num_classes

In [7]:
BATCH_SIZE = 32
EPOCH = 15

setattr(tfds.image_classification.cats_vs_dogs, '_URL',"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip")
dataset_name = 'cats_vs_dogs'

ds_train, ds_test, num_classes = load_data(dataset_name, batch_size=BATCH_SIZE, target_size=(224, 224))

In [8]:
resnet_34.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(lr=0.01, clipnorm=1.),
    metrics=['accuracy'],
)

history_res34 = resnet_34.fit(
    ds_train,
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)



Epoch 1/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 2/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 3/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 4/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 5/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 6/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 7/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 8/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 9/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 10/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 11/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 12/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 13/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 14/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 15/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9




In [9]:
del resnet_34

plain_34 = build_resnet(input_shape=(224,224,3),is_plain=True)
plain_34.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 64)      9472      
_________________________________________________________________
batch_normalization_33 (Batc (None, 112, 112, 64)      256       
_________________________________________________________________
activation_33 (Activation)   (None, 112, 112, 64)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 64)        0         
_________________________________________________________________
stage0_0_conv1_01 (Conv2D)   (None, 56, 56, 64)        36928     
_________________________________________________________________
batch_normalization_34 (Batc (None, 56, 56, 64)        256 

In [None]:
plain_34.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(lr=0.01, clipnorm=1.),
    metrics=['accuracy'],
)

history_plain34 = plain_34.fit(
    ds_train,
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

Epoch 1/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 2/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 3/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 4/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


Epoch 5/15

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9








Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 128 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9




In [None]:
del plain_34

resnet_50 = build_resnet(input_shape=(224,224,3),is_50=True)
resnet_50.summary()

In [None]:
resnet_50.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(lr=0.01, clipnorm=1.),
    metrics=['accuracy'],
)

history_res50 = resnet_50.fit(
    ds_train,
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

In [None]:
del resnet_50

plain_50 = build_resnet(input_shape=(224,224,3),is_50=True, is_plain=True)
plain_50.summary()

In [None]:
plain_50.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(lr=0.01, clipnorm=1.),
    metrics=['accuracy'],
)

history_plain50 = plain_50.fit(
    ds_train,
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# loss
axes[0][0].plot(history_plain34.history['loss'], 'r', label='plain34')
axes[0][0].plot(history_res34.history['loss'], 'b', label='res34')
axes[0][0].set_title('Model Training Loss', fontsize=14)
axes[0][0].set_ylabel('Loss', fontsize=12)
axes[0][0].set_xlabel('Epoch', fontsize=12)
axes[0][0].legend(loc='upper left', fontsize=10)
axes[0][0].grid(True, linestyle='--', alpha=0.7)

# acc
axes[0][1].plot(history_plain34.history['accuracy'], 'r', label='plain34')
axes[0][1].plot(history_res34.history['accuracy'], 'b', label='res34')
axes[0][1].set_title('Model Training Accuracy', fontsize=14)
axes[0][1].set_ylabel('Accuracy', fontsize=12)
axes[0][1].set_xlabel('Epoch', fontsize=12)
axes[0][1].legend(loc='upper left', fontsize=10)
axes[0][1].grid(True, linestyle='--', alpha=0.7)

# val_loss
axes[1][0].plot(history_plain34.history['val_loss'], 'r', label='plain34')
axes[1][0].plot(history_res34.history['val_loss'], 'b', label='res34')
axes[1][0].set_title('Model Validation Loss', fontsize=14)
axes[1][0].set_ylabel('Loss', fontsize=12)
axes[1][0].set_xlabel('Epoch', fontsize=12)
axes[1][0].legend(loc='upper left', fontsize=10)
axes[1][0].grid(True, linestyle='--', alpha=0.7)

# var_acc
axes[1][1].plot(history_plain34.history['val_accuracy'], 'r', label='plain34')
axes[1][1].plot(history_res34.history['val_accuracy'], 'b', label='res34')
axes[1][1].set_title('Model Validation Accuracy', fontsize=14)
axes[1][1].set_ylabel('Accuracy', fontsize=12)
axes[1][1].set_xlabel('Epoch', fontsize=12)
axes[1][1].legend(loc='upper left', fontsize=10)
axes[1][1].grid(True, linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# loss
axes[0][0].plot(history_plain50.history['loss'], 'r', label='plain50')
axes[0][0].plot(history_res50.history['loss'], 'b', label='res50')
axes[0][0].set_title('Model Training Loss', fontsize=14)
axes[0][0].set_ylabel('Loss', fontsize=12)
axes[0][0].set_xlabel('Epoch', fontsize=12)
axes[0][0].legend(loc='upper left', fontsize=10)
axes[0][0].grid(True, linestyle='--', alpha=0.7)

# acc
axes[0][1].plot(history_plain50.history['accuracy'], 'r', label='plain50')
axes[0][1].plot(history_res50.history['accuracy'], 'b', label='res50')
axes[0][1].set_title('Model Training Accuracy', fontsize=14)
axes[0][1].set_ylabel('Accuracy', fontsize=12)
axes[0][1].set_xlabel('Epoch', fontsize=12)
axes[0][1].legend(loc='upper left', fontsize=10)
axes[0][1].grid(True, linestyle='--', alpha=0.7)

# val_loss
axes[1][0].plot(history_plain50.history['val_loss'], 'r', label='plain50')
axes[1][0].plot(history_res50.history['val_loss'], 'b', label='res50')
axes[1][0].set_title('Model Validation Loss', fontsize=14)
axes[1][0].set_ylabel('Loss', fontsize=12)
axes[1][0].set_xlabel('Epoch', fontsize=12)
axes[1][0].legend(loc='upper left', fontsize=10)
axes[1][0].grid(True, linestyle='--', alpha=0.7)

# var_acc
axes[1][1].plot(history_plain50.history['val_accuracy'], 'r', label='plain50')
axes[1][1].plot(history_res50.history['val_accuracy'], 'b', label='res50')
axes[1][1].set_title('Model Validation Accuracy', fontsize=14)
axes[1][1].set_ylabel('Accuracy', fontsize=12)
axes[1][1].set_xlabel('Epoch', fontsize=12)
axes[1][1].legend(loc='upper left', fontsize=10)
axes[1][1].grid(True, linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

### cifar-10
![34cifar10](https://github.com/user-attachments/assets/579d0686-4a8f-419d-8033-42d26ae4878d)
![50cifar10](https://github.com/user-attachments/assets/0e767743-47df-42b4-b0d7-e1e6f1a561a4)

### 회고
- cifar-10의 데이터를 사용할 때보다 cats_vs_dogs를 사용할 때 이미지의 크기에 비례한 훈련속도 증가가 있었습니다.
- 처음 실수로 resnet_50에서 약 1.4억개의 파라미터를 가진 모델을 만들어서 훈련이 진행되었는데 마찬가지로 훈련시간이 비례적으로 증가했습니다.
    - cifar-10에서 약 2100만개의 파라미터 : 약 20초
    - cifar-10에서 약 1.4억개의 파라미터 : 약 120초
- cifar-10에서 validation loss와 validation accuracy가 많이 요동치는 모습을 보여줬지만 에폭수가 적어서 그렇다고 생각했습니다.
- cifar-10에서 학습이 진행됨에 따라 res_34가 plain_34보다 좋은 성능을 보여주었습니다.
- cifar-10에서 res_50과 plain_50은 결과가 비슷했습니다. 더 많은 에폭을 두고 결과를 관찰해야 할 것 같습니다.
- 모델을 구성할 때 논문상의 conv3_1, conv4_1, conv5_1에서 다운샘플링을 진행하는데 이를 위해서 1x1의 커널을 갖는 conv층을 하나 두게 되었습니다. 이걸 conv층을 두지않고 다르게 처리할 수 있는지를 공부해야 합니다.
    - 파라미터 수가 증가하기 때문에 파라미터가 증가하지 않고 다운샘플링하는 법을 찾아봐야 할 것입니다.
    - conv레이어의 필터가 1x1이라 하더라도 3채널간의 convolution은 일어나기 때문에 좀더 데이터가 가공되지 않는 방향으로의 방법이 궁금합니다.
- cats_vs_dogs결과는 학습시간이 너무 오래 걸려서 완료 되는데로 추가하겠습니다.