In [1]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import seaborn as sns
import shutil
import glob
import seaborn as sns
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.preprocessing import image


In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('submission.csv')


In [3]:
train_df.head()  

# image 경로와 label 존재 

Unnamed: 0,image,label
0,google_pork_belly_89.jpg,pork_belly
1,google_pasta_877.jpg,pasta
2,google_janchi_guksu_211.jpg,janchi_guksu
3,google_pizza_598.jpg,pizza
4,naver_pasta_316.jpg,pasta


In [4]:
train_df['label'].unique()  # 총 12개의 클래스 존재 

array(['pork_belly', 'pasta', 'janchi_guksu', 'pizza', 'pork_cutlet',
       'cake', 'jajangmyeon', 'steak', 'ramen', 'tteokbokki',
       'grilled_eel', 'fried_chicken'], dtype=object)

In [5]:
train_df['label'].value_counts()

cake             1184
pasta            1072
steak            1048
fried_chicken    1034
pizza             963
grilled_eel       951
ramen             881
pork_cutlet       869
janchi_guksu      856
tteokbokki        841
jajangmyeon       833
pork_belly        789
Name: label, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split

data1, data2 = train_test_split(train_df, test_size=0.2, stratify=train_df['label'])  # 80:20
data1, data3 = train_test_split(data1, test_size=0.25, stratify=data1['label'])       # 80 => 60 : 20 
data1, data4 = train_test_split(data1, test_size=0.35, stratify=data1['label'])       # 60 => 40 : 20
data1, data5 = train_test_split(data1, test_size=0.5, stratify=data1['label'])



data1 = data1.reset_index(drop=True)
data2 = data2.reset_index(drop=True)
data3 = data3.reset_index(drop=True)
data4 = data4.reset_index(drop=True)
data5 = data5.reset_index(drop=True)

In [7]:
path = os.path.join('train')
cls = lambda x : os.path.join(path, '_'.join(x.split('_')[1:-1])+'\\'+x)

data1['image'] = data1['image'].map(lambda x : cls(x))
data2['image'] = data2['image'].map(lambda x : cls(x))
data3['image'] = data3['image'].map(lambda x : cls(x))
data4['image'] = data4['image'].map(lambda x : cls(x))
data5['image'] = data5['image'].map(lambda x : cls(x))


In [9]:
# train set 
set1 = pd.concat([data2, data3, data4, data5], ignore_index=True) 
set2 = pd.concat([data1, data3, data4, data5], ignore_index=True)
set3 = pd.concat([data1, data2, data4, data5], ignore_index=True)
set4 = pd.concat([data1, data2, data3, data5], ignore_index=True)
set5 = pd.concat([data1, data2, data3, data4], ignore_index=True)

print(set1.shape)  
print(set2.shape)
print(set3.shape)
print(set4.shape)
print(set5.shape)

(9114, 2)
(9056, 2)
(9057, 2)
(8943, 2)
(9114, 2)


### Data aug

In [34]:
height, width, channel = (224, 224, 3)
labels = train_df.label.unique().tolist()


train_datagen= ImageDataGenerator(rescale=1./255,
                              rotation_range=30,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              shear_range=0.2,
                              zoom_range=[0.8, 1],
                              horizontal_flip=True)

valid_datagen = ImageDataGenerator(rescale=1./255)


In [35]:
valid_list = [data1, data2, data3, data4, data5]
train_list = [set1, set2, set3, set4, set5]

# Modeling

In [29]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Input, Dense, Activation, GlobalAveragePooling2D, Dropout, BatchNormalization, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import tensorflow as tf 


In [36]:
# 조기멈춤
stop = EarlyStopping(patience=5)

# 자동 저장
checkpoint = ModelCheckpoint("Xceptionfold_model/model_weights", monitor='val_accuracy',
                            save_weights_only=True, mode='max',verbose=2,save_best_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=3, min_lr=0.0003)

callbacks = [stop, checkpoint, reduce_lr]

In [43]:
base_model = Xception(include_top=False, input_tensor=Input(shape=(224, 224, 3)), weights='imagenet', classes=12)
base_model.trainable = False   # 가중치 동결 후 학습 

model = tf.keras.models.Sequential([
    base_model,
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dense(1024, activation='relu'),
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dense(len(labels), activation='softmax'),  # output

])


In [44]:
scores = []
best_model = []

for iter in range(5):
    print(f"\n========={iter+1}번째 학습=========\n")

    train_generator = train_datagen.flow_from_dataframe(train_list[iter],
                                                   x_col='image',
                                                   y_col='label',
                                                   batch_size=64,
                                                   color_mode= 'rgb',
                                                   target_size=(height, width), shuffle=True)

    valid_generator = valid_datagen.flow_from_dataframe(valid_list[iter],
                                                   x_col='image',
                                                   y_col='label',
                                                   batch_size=64,
                                                   color_mode= 'rgb',
                                                   target_size=(height, width), shuffle=True) 
    
    model.compile(optimizer=Adam(0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    print("\n======== train start! ========\n")
    history = model.fit(train_generator, 
                        epochs=10, 
                        callbacks=callbacks,
                        validation_data=valid_generator)

    best_model.append(model)
    scores.append(history.history['val_accuracy'])




Found 9114 validated image filenames belonging to 12 classes.
Found 2207 validated image filenames belonging to 12 classes.


Epoch 1/10
  6/143 [>.............................] - ETA: 2:00 - loss: 2.3811 - accuracy: 0.2240



Epoch 1: val_accuracy improved from -inf to 0.77753, saving model to Xceptionfold_model\model_weights
Epoch 2/10
Epoch 2: val_accuracy improved from 0.77753 to 0.79203, saving model to Xceptionfold_model\model_weights
Epoch 3/10
Epoch 3: val_accuracy improved from 0.79203 to 0.80743, saving model to Xceptionfold_model\model_weights
Epoch 4/10
Epoch 4: val_accuracy improved from 0.80743 to 0.81106, saving model to Xceptionfold_model\model_weights
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.81106
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.81106
Epoch 7/10
Epoch 7: val_accuracy improved from 0.81106 to 0.82148, saving model to Xceptionfold_model\model_weights
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.82148
Epoch 9/10
Epoch 9: val_accuracy improved from 0.82148 to 0.83416, saving model to Xceptionfold_model\model_weights
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.83416


Found 9056 validated image filenames belonging to 12 classes.
Foun



Epoch 1: val_accuracy improved from 0.83416 to 0.87594, saving model to Xceptionfold_model\model_weights
Epoch 2/10
Epoch 2: val_accuracy improved from 0.87594 to 0.87770, saving model to Xceptionfold_model\model_weights
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.87770
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.87770
Epoch 5/10
Epoch 5: val_accuracy improved from 0.87770 to 0.89934, saving model to Xceptionfold_model\model_weights
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.89934
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.89934
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.89934
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.89934
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.89934


Found 9057 validated image filenames belonging to 12 classes.
Found 2264 validated image filenames belonging to 12 classes.


Epoch 1/10
 12/142 [=>............................] - ETA: 1:44 - loss: 0.3074 - accuracy: 0.9062



Epoch 1: val_accuracy improved from 0.89934 to 0.91387, saving model to Xceptionfold_model\model_weights
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.91387
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.91387
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.91387
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.91387
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.91387


Found 8943 validated image filenames belonging to 12 classes.
Found 2378 validated image filenames belonging to 12 classes.






Epoch 1/10
Epoch 1: val_accuracy improved from 0.91387 to 0.92893, saving model to Xceptionfold_model\model_weights
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.92893
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.92893
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.92893
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.92893
Epoch 6/10
Epoch 6: val_accuracy improved from 0.92893 to 0.93103, saving model to Xceptionfold_model\model_weights
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.93103
Epoch 8/10
Epoch 8: val_accuracy improved from 0.93103 to 0.93440, saving model to Xceptionfold_model\model_weights
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.93440
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.93440


Found 9114 validated image filenames belonging to 12 classes.
Found 2207 validated image filenames belonging to 12 classes.


Epoch 1/10
 11/143 [=>............................] - ETA: 1:42 - loss: 0.2871 - accurac



Epoch 1: val_accuracy improved from 0.93440 to 0.95242, saving model to Xceptionfold_model\model_weights
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.95242
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.95242
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.95242
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.95242
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.95242


In [46]:
model.save("Xceptionfold.h5")

### Tuning

In [47]:
tuned_scores = []
tuend_best_model = []

for layer in model.layers[:-20]:
    layer.trainable = True

In [51]:
for iter in range(5):
    print(f"\n========={iter+1}번째 학습=========\n")

    train_generator = train_datagen.flow_from_dataframe(train_list[iter],
                                                        x_col='image',
                                                        y_col='label',
                                                        batch_size=64,
                                                        color_mode='rgb',
                                                        target_size=(height, width), shuffle=True)

    valid_generator = valid_datagen.flow_from_dataframe(valid_list[iter],
                                                        x_col='image',
                                                        y_col='label',
                                                        batch_size=64,
                                                        color_mode='rgb',
                                                        target_size=(height, width), shuffle=True)
    
    model.compile(optimizer=Adam(0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

    print("\n======== train start! ========\n")
    history = model.fit(train_generator, 
                        epochs=20, 
                        callbacks=callbacks,
                        validation_data=valid_generator)

    best_model.append(model)
    scores.append(history.history['val_accuracy'])




Found 9114 validated image filenames belonging to 12 classes.
Found 2207 validated image filenames belonging to 12 classes.


Epoch 1/20
 25/143 [====>.........................] - ETA: 1:30 - loss: 0.1897 - accuracy: 0.9385



Epoch 1: val_accuracy improved from 0.95242 to 0.96783, saving model to Xceptionfold_model\model_weights
Epoch 2/20

In [None]:
model.save("Xception_tuned_fold.h5")

# Test 

In [None]:
batch_size = len(test_df["image"])
height, width, channel = 224, 224, 3

path = os.path.join('test')

test_image = np.zeros((batch_size, height, width, channel))
print(test_image.shape)

cnt = 0


In [None]:
for i in range(len(test_df["image"])):
    path_img = test_df['image'][i]
    img = image.load_img(os.path.join(path, path_img),
                         target_size=(height, width))

    img_tensor = image.img_to_array(img)
    img_tensor = np.array(img_tensor, dtype="float32")

    img_tensor /= 255

    img_tensor = np.expand_dims(img_tensor, axis=0)

    test_image[i] = img_tensor

    if (i % 100 == 0):
        print(str(i) + " 완료")  # 진행 상황 확인용


In [None]:

pred2label = {}
for x in train_generator.class_indices.keys():
    pred2label[train_generator.class_indices[x]] = x


predictions = model.predict(test_image)
test_df['label'] = [pred2label[np.argmax(pred)] for pred in predictions]

print(test_df)
test_df.to_csv('/submissions/Xception_5fold.csv', index=False)
