<a href="https://colab.research.google.com/github/hyesukim1/German-Load-Sign-Classification/blob/main/%EA%B5%90%ED%86%B5%ED%91%9C%EC%A7%80%ED%8C%90%EB%B6%84%EB%A5%98_%EB%AA%A8%EB%8D%B8%20%EB%B9%84%EA%B5%90.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load data
### We should get the data set from Kaggle by using Kaggle API

In [None]:
!pip install kaggle # 케글 데이터 설치

# kaggle.json 파일 코렙드라이브로 마운팅하기
from google.colab import files 
files.upload()

In [None]:
# 케글 파일 만들기
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# Permission Warning 이 일어나지 않도록 
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# 케글 제이슨 파일 제대로 설치 됬는지 확인
!ls -lha kaggle.json # kaggle.json 이렇게 뜸

! kaggle competitions list

In [None]:
! kaggle datasets download -d meowmeowmeowmeowmeow/gtsrb-german-traffic-sign\

In [None]:
!unzip gtsrb-german-traffic-sign.zip # zip 파일 풀기

# Import the packages & Check the data set

In [None]:
import pandas as pd
import numpy as np

import os
import pathlib

import cv2 #영상처리에 사용하는 오픈소스 라이브러리, 컴퓨터가 사람 눈처럼 인식할 수 있게 처리
from PIL import Image # 파이썬 이미지 처리 pillow 라이브러리
from tensorflow.keras.preprocessing import image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator #imagedatagenerater는 이미지를 학습시킬 때 학습 데이터의 양이 적을 경우 학습데이터를 조금씩 변형 시켜서 학습데이터의 양을 늘리는 방식중 하나
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img, load_img
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('fivethirtyeight')#그래프 스타일 지정

#난수 랜덤성 고정
np.random.seed(42)

%matplotlib inline

In [None]:
# 데이터 프레임으로 데이터 생김새 확인
meta_df = pd.read_csv('Meta.csv')
train_df = pd.read_csv('Train.csv')
test_df = pd.read_csv('Test.csv')

print(meta_df.info())
print('====================================')
print(train_df.info())
print('====================================')
print(test_df.info())

# Roi는 Region of interest의 약자로 데이터에서 표지판이 있는 부분을 의미

In [None]:
# Roi 데이터를 사용하면 명확하게 표지판 부분만 crop할 수 있고 이러한 데이터 전 처리를 통해 분류의 성능을 높일 수 있음(근데 이번 프로젝트에선 스킵)
from PIL import Image
from PIL import ImageDraw

img_sample = Image.open('/content/'+train_df['Path'][0])

draw = ImageDraw.Draw(img_sample)
draw.rectangle([train_df['Roi.X1'][1], train_df['Roi.Y1'][1], train_df['Roi.X2'][1], train_df['Roi.Y2'][1]], outline="red")
img_sample_resized = img_sample.resize((300,300))
img_sample_resized

In [None]:
# 코렙은 좌측 파일 아이콘 클릭해서 마운팅된 파일 하나를 오른쪽 마우스 클릭하면 경로 카피할 수 있음 
data_dir = pathlib.Path('/content/Meta')
train_path = pathlib.Path('/content/Train')
test_path = pathlib.Path('/content/Test')

# Data preprocessing
- Data Transformation: Normalization
- Make image data labeling
- Cateforical variable: One Hot Encoding
- Image size distribution(The size and resolution of images are different) 


- To normalizing values between 0 and 1 instead of 0 to 255

In [None]:
# 카테고리 수 확인
NUM_CATEGORIES = len(os.listdir(train_path))
NUM_CATEGORIES

In [None]:
# 데이터 라벨링
classes = { 0:'0_Speed limit (20km/h)',
            1:'1_Speed limit (30km/h)', 
            2:'2_Speed limit (50km/h)', 
            3:'3_Speed limit (60km/h)', 
            4:'4_Speed limit (70km/h)', 
            5:'5_Speed limit (80km/h)', 
            6:'6_End of speed limit (80km/h)', 
            7:'7_Speed limit (100km/h)', 
            8:'8_Speed limit (120km/h)', 
            9:'9_No passing', 
            10:'10_No passing veh over 3.5 tons', 
            11:'11_Right-of-way at intersection', 
            12:'12_Priority road', 
            13:'13_Yield', 
            14:'14_Stop', 
            15:'15_No vehicles', 
            16:'16_Veh > 3.5 tons prohibited', 
            17:'17_No entry', 
            18:'18_General caution', 
            19:'19_Dangerous curve left', 
            20:'20_Dangerous curve right', 
            21:'21_Double curve', 
            22:'22_Bumpy road', 
            23:'23_Slippery road', 
            24:'24_Road narrows on the right', 
            25:'25_Road work', 
            26:'26_Traffic signals', 
            27:'27_Pedestrians', 
            28:'28_Children crossing', 
            29:'29_Bicycles crossing', 
            30:'30_Beware of ice/snow',
            31:'31_Wild animals crossing', 
            32:'32_End speed + passing limits', 
            33:'33_Turn right ahead', 
            34:'34_Turn left ahead', 
            35:'35_Ahead only', 
            36:'36_Go straight or right', 
            37:'37_Go straight or left', 
            38:'38_Keep right', 
            39:'39_Keep left', 
            40:'40_Roundabout mandatory', 
            41:'41_End of no passing', 
            42:'42_End no passing veh > 3.5 tons' }

In [None]:
# 이미지별 크기 빈도 확인
plt.figure(figsize=(30,10))
ax = sns.countplot(x="Width", data=train_df)

In [None]:
# 가장 많은 너비를가지는 이미지를 10단위로 묶어서 확인
# 너무 작은 이미지는 큰 이미지의 정보 손실을 발생하며, 너무 큰 이미지는 작은 이미지의 부족한 정보량을 부각 시킬 것
df_cutWidth = pd.cut(train_df['Width'], np.arange(0,200,10)).value_counts(sort=False)

fig, ax = plt.subplots(figsize=(20,10))
ax.bar(range(len(df_cutWidth)),df_cutWidth.values)
ax.set_xticks(range(len(df_cutWidth)))
ax.set_xticklabels(df_cutWidth.index)
fig.show()

In [None]:
# 전처리: 이미지 별로 사이즈가 다르기 때문에 이미지의 폭과 높이를 같은 크기로 통일
IMG_HEIGHT = 32
IMG_WIDTH = 32
channels = 3

# Data Exploration & Visualization

In [None]:
plt.figure(figsize=(10, 15))
for i in range (0,43):
    plt.subplot(7,7,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    path = "/content/Meta/{0}.png".format(i)
    img = plt.imread(path)
    plt.imshow(img)
    plt.xlabel(i)

In [None]:
# 모든 교통표시판 시각화
# pathlib모듈을 사용하면, 파일, 디렉토리(폴더)의 경로를 객체로써 조작하거나 처리할 수 있다

img_dir = pathlib.Path('/content/train')
plt.figure(figsize=(30,30))
index = 0
for i in range(NUM_CATEGORIES):
    plt.subplot(7, 7, i+1)
    plt.grid(False)
    plt.xticks([]) #눈금 설정 없음
    plt.yticks([])
    sign = list(img_dir.glob(f'{i}/*'))[0] #glob모듈의 glob함수는 사용자가 제시한 조건에 맞는 파일명을 리스트형식으로 반환 #*(아스터리스크)는 임의 길이의 모든 문자열을 의미
    img = load_img(sign, target_size=(40, 40))
    plt.imshow(img)
plt.show()

In [None]:
folders = os.listdir('/content/train') #os.listdir() 매써드는 지정한 디렉토리 내의 모든 파일과 디렉토리 리스트(list)를 리턴

train_num = []
class_num = []

for folder in folders:
  train_files = os.listdir(str(train_path) + '/'+ folder) #리스트로 가져오면 에러떠서 str로 변환해줌
  train_num.append(len(train_files))
  class_num.append(classes[int(folder)])

# 각각의 클래스의 이미지의 수에 기초해 데이터셋 분류하기
zipped_lists =  zip(train_num, class_num)
sorted_pairs = sorted(zipped_lists)
tuples =  zip(*sorted_pairs) # sorted(정렬할 데이터), 새로운 정렬된 리스트로 만들어서 반환
train_num, class_num = [ list(tuple) for tuple in tuples]

# 시각화
plt.figure(figsize = (21, 10))
plt.bar(class_num, train_num)
plt.xticks(class_num, rotation='vertical')
plt.show()

In [None]:
# discover dataset balance
fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(25, 6))
axs[0].set_title('Train classes distribution')
axs[0].set_xlabel('Class')
axs[0].set_ylabel('Count')
axs[1].set_title('Test classes distribution')
axs[1].set_xlabel('Class')
axs[1].set_ylabel('Count')

sns.countplot(train_df.ClassId, ax=axs[0],  order = train_df['ClassId'].value_counts(ascending=True).index, palette="Blues")
sns.countplot(test_df.ClassId, ax=axs[1], order = test_df['ClassId'].value_counts(ascending=True).index, palette="Blues")
axs[0].set_xlabel('Class ID');
axs[1].set_xlabel('Class ID');

# Train and Test subset of dataset have similar balance distribution.

# CNN 모델

In [None]:
def load_data(data_dir):
    images = list()
    labels = list()
    for category in range(NUM_CATEGORIES):
        categories = os.path.join(data_dir, str(category))
        for img in os.listdir(categories):
            img = load_img(os.path.join(categories, img), target_size=(32, 32))
            image = img_to_array(img) # 이미지를 넘파이 배열로 변환
            images.append(image) 
            labels.append(category)
    
    return images, labels

images, labels = load_data(train_path)

image_data = np.array(images)
image_labels = np.array(labels)

In [None]:
# 트레인, 테스트 셋 데이터 나누기
x_train, x_val, y_train, y_val = train_test_split(image_data, image_labels, test_size=0.3, random_state=42, shuffle=True)
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

In [None]:
# x_train, x_val, y_train, y_val = train_test_split(image_data, image_labels, test_size=0.4)
# x_train = x_train.astype('float32')/255 
# x_val = x_val.astype('float32')/255
# y_train = keras.utils.to_categorical(y_train, NUM_CATEGORIES)
# y_val = keras.utils.to_categorical(y_val, NUM_CATEGORIES)

In [None]:
# 이미지 데이터의 픽셀정보를 0~1.0사이의 값으로 가지게 만들기, 로컬 미니멈 빠지지 않도록
x_train = x_train.astype('float32')/255 
x_val = x_val.astype('float32')/255

# 라벨에 원핫 인코딩 적용

y_train = keras.utils.to_categorical(y_train, NUM_CATEGORIES)
y_val = keras.utils.to_categorical(y_val, NUM_CATEGORIES)

print(y_train.shape)
print(y_val.shape)

In [None]:
# 베이스 모델 작성_기본적인 cnn 모델

model = Sequential()

# 첫번째 Convolutional Layer : 입력 데이터로부터 특징을 추출
model.add(Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(IMG_HEIGHT,IMG_WIDTH,3)))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# 두번째 Convolutional Layer
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25)) # 인풋데이터의 25%를 무작위로 0으로 만듦

# 세번째 Convolutional Layer
model.add(Conv2D(filters=64, kernel_size=3, activation='relu')) # 특징을 추출하는 기능을 하는 필터, 비선형 값으로 바꿔주는 activation 함수->relu

# Flattening the layer and adding Dense Layer
model.add(Flatten())
model.add(Dense(units=64, activation='relu'))
model.add(Dense(NUM_CATEGORIES, activation='softmax'))

model.summary()

In [None]:
model = keras.models.Sequential([    
    keras.layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(IMG_HEIGHT,IMG_WIDTH,channels)),
    keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2, 2)),
    keras.layers.BatchNormalization(axis=-1),
    
    keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2, 2)),
    
    keras.layers.BatchNormalization(axis=-1),
    
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate=0.5),
    
    keras.layers.Dense(43, activation='softmax')
])

In [None]:
# Compiling the model
# model.compile(
#     loss='categorical_crossentropy', # 다중분류의 로스 함수
#     optimizer='adam',
#     metrics=['accuracy']
# )

In [None]:
lr = 0.001
EPOCHS = 50
opt = Adam(lr=lr, decay=lr / (EPOCHS * 0.5))
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode="nearest")

In [None]:
# Fitting the model
history = model.fit(aug.flow(x_train, y_train, batch_size =32),
                    validation_data = (x_val, y_val), 
                    epochs=EPOCHS, 
                    steps_per_epoch=60
                   )

model.evaluate(x_val,  y_val, verbose=2)

In [None]:
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

# VGG-16 모델

In [None]:
def load_data(data_dir):
    images = list()
    labels = list()
    for category in range(NUM_CATEGORIES):
        categories = os.path.join(data_dir, str(category))
        for img in os.listdir(categories):
            img = load_img(os.path.join(categories, img), target_size=(64, 64))
            image = img_to_array(img) # 이미지를 넘파이 배열로 변환
            images.append(image) 
            labels.append(category)
    
    return images, labels

images, labels = load_data(train_path)

image_data = np.array(images)
image_labels = np.array(labels)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(image_data, image_labels, test_size=0.4)
x_train = x_train.astype('float32')/255 
x_val = x_val.astype('float32')/255
y_train = keras.utils.to_categorical(y_train, NUM_CATEGORIES)
y_val = keras.utils.to_categorical(y_val, NUM_CATEGORIES)

In [None]:
from tensorflow.keras import layers


inputs = keras.Input(shape=(64, 64, 3))

x= inputs
x=layers.Conv2D(64, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(64, 3, activation='relu', padding="same")(x)
x=layers.MaxPooling2D(2)(x)
x=layers.Conv2D(128, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(128, 3, activation='relu', padding="same")(x)
x=layers.MaxPooling2D(2)(x)
x=layers.Conv2D(256, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(256, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(256, 3, activation='relu', padding="same")(x)
x=layers.MaxPooling2D(2)(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x=layers.MaxPooling2D(2)(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x=layers.Conv2D(512, 3, activation='relu', padding="same")(x)
x = layers.Dense(4096, activation='relu', name='fc1')(x)
x = layers.Dense(4096, activation='relu', name='fc2')(x)
x = layers.Dense(1000, activation='softmax', name='predictions')(x)

x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dense(256)(x)
x = layers.Dense(43, activation='softmax')(x)
outputs = x

model_1 = keras.Model(inputs, outputs)
model_1.summary()

In [None]:
# EPOCHS = 30
# lr = 0.001
# opt = Adam(lr=lr, decay=lr / (EPOCHS * 0.5))

# aug = ImageDataGenerator(
#     rotation_range=10,
#     zoom_range=0.15,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     shear_range=0.15,
#     horizontal_flip=False,
#     vertical_flip=False,
#     fill_mode="nearest")

# model_1.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# history_1 = model_1.fit(x_train, y_train, epochs=3, validation_data = (x_val, y_val), steps_per_epoch=60)
# model_1.evaluate(x_val,  y_val, verbose=2)

In [None]:
from time import time
classes = 43
batch = 256
epochs = 3
learning_rate = 0.0001

def results(model):
  adam = Adam(lr=learning_rate)

  model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

  start = time()
  history = model.fit(x_train, y_train, batch_size=batch, epochs=epochs, validation_split=0.2, shuffle = True, verbose=1)
  train_time = time() - start

  model.summary()

  plt.figure(figsize=(12, 12))
  plt.subplot(3, 2, 1)
  plt.plot(history.history['accuracy'], label = 'train_accuracy')
  plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
  plt.xlabel('epoch')
  plt.ylabel('accuracy')
  plt.legend()
  plt.subplot(3, 2, 2)
  plt.plot(history.history['loss'], label = 'train_loss')
  plt.plot(history.history['val_loss'], label = 'val_loss')
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.legend()
  plt.show()

  start = time()
  test_loss, test_acc = model.evaluate(x_test, y_test)
  test_time = time() - start
  print('\nTrain time: ', train_time)
  print('Test accuracy:', test_acc)
  print('Test loss:', test_loss)
  print('Test time: ', test_time)

In [None]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import BatchNormalization

model = Sequential()
model.add(VGG19(weights='imagenet', include_top=False, input_shape=(64,64,3)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(1024, activation='sigmoid'))
model.add(Dense(43, activation='softmax'))

results(model)

In [None]:
plt.figure(0)
plt.plot(history_1.history['accuracy'], label='training accuracy')
plt.plot(history_1.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()

plt.figure(1)
plt.plot(history_1.history['loss'], label='training loss')
plt.plot(history_1.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

# MobileNet

In [None]:
from tensorflow.keras.layers import BatchNormalization
from keras.layers import Conv2D, GlobalAvgPool2D, AvgPool2D, MaxPool2D, Flatten, Dense, Softmax, DepthwiseConv2D, BatchNormalization, ReLU

model = Sequential()
model.add(Conv2D(32, (3, 3), strides=2, input_shape=(64, 64, 3), padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(64, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(128, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(128, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(256, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(256, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(512, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

for i in range(5):
    model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(Conv2D(512, (1, 1)))
    model.add(BatchNormalization())
    model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(1024, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

model.add(Conv2D(1024, (1, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(ReLU())

# model.add(AvgPool2D((7, 7)))
model.add(GlobalAvgPool2D())
model.add(Flatten())
model.add(Dense(43))
model.add(Dropout(0.25))
model.add(Softmax())

EPOCHS =30

lr = 0.001
opt = Adam(lr=lr, decay=lr / (EPOCHS * 0.5))

model.compile(
    loss='categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

model.summary()

In [None]:
# history = model.fit(aug.flow(x_train, y_train, batch_size =32), epochs=5, validation_data = (x_val, y_val), steps_per_epoch=60)
# model_1.evaluate(x_val,  y_val, verbose=2)

# plt.figure(0)
# plt.plot(history.history['accuracy'], label='training accuracy')
# plt.plot(history.history['val_accuracy'], label='val accuracy')
# plt.title('Accuracy')
# plt.xlabel('epochs')
# plt.ylabel('accuracy')
# plt.legend()

# plt.figure(1)
# plt.plot(history.history['loss'], label='training loss')
# plt.plot(history.history['val_loss'], label='val loss')
# plt.title('Loss')
# plt.xlabel('epochs')
# plt.ylabel('loss')
# plt.legend()

In [None]:
history = model.fit(x_train, y_train, epochs=5, validation_data = (x_val, y_val), steps_per_epoch=60)
model.evaluate(x_val,  y_val, verbose=2)

plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

In [None]:
# 파라미터 통일하지 않고 진행했을때
from tensorflow.keras.layers import BatchNormalization
from keras.layers import Conv2D, GlobalAvgPool2D, AvgPool2D, MaxPool2D, Flatten, Dense, Softmax, DepthwiseConv2D, BatchNormalization, ReLU


model = Sequential()
model.add(Conv2D(32, (3, 3), strides=2, input_shape=(64, 64, 3), padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(64, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(128, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(128, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(256, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(256, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(512, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
for i in range(5):
    model.add(DepthwiseConv2D((3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(Conv2D(512, (1, 1)))
    model.add(BatchNormalization())
    model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(1024, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(DepthwiseConv2D((3, 3), strides=2, padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Conv2D(1024, (1, 1)))
model.add(BatchNormalization())
model.add(ReLU())
# model.add(AvgPool2D((7, 7)))
model.add(GlobalAvgPool2D())
model.add(Flatten())
model.add(Dense(1))
model.add(Softmax())

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()


aug = image.ImageDataGenerator(
    rotation_range=40,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    fill_mode='nearest'
)
testDataGen = image.ImageDataGenerator(

)

In [None]:
trainDataGen = image.ImageDataGenerator(
    rotation_range=40,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    fill_mode='nearest'
)
testDataGen = image.ImageDataGenerator(

)

In [None]:
trainDataGenerator = trainDataGen.flow_from_dataframe(
    dataframe=train_df,
    directory='/content/',
    x_col='Path',
    y_col='ClassId',
    target_size=(224, 224),
    batch_size=64,
    class_mode='raw'
)

testDataGenerator = testDataGen.flow_from_dataframe(
    dataframe=test_df,
    directory='/content/',
    x_col='Path',
    y_col='ClassId',
    target_size=(224, 224),
    batch_size=16,
    class_mode='raw'
)

history = model.fit_generator(
    trainDataGenerator,
    steps_per_epoch=500,
    epochs=3,
    validation_data=testDataGenerator,
#     validation_steps=800,
    verbose=1)

In [None]:

history = model.fit(aug.flow(x_train, y_train, batch_size =64), epochs=16, validation_data = (x_val, y_val), steps_per_epoch=500)
model.evaluate(x_val,  y_val, verbose=2)

In [None]:
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

# Model Evaluation

In [None]:
Y_test = pd.read_csv('/content/Test.csv')
test_labels = Y_test["ClassId"].values
test_images = Y_test["Path"].values

a =  pathlib.Path('/content/')

output = list()
for img in test_images:
    image = load_img(os.path.join(a, img), target_size=(32, 32))
    output.append(np.array(image))

X_test=np.array(output)
y_prob = model.predict(X_test) # 가장 정확도 높은 모델로 수정
pred = y_prob.argmax(axis=-1)

#테스트 데이터의 정확도
print('Test Data accuracy: ',accuracy_score(test_labels, pred)*100)

In [None]:
from sklearn.metrics import classification_report
labels = test_df["ClassId"].values
print(classification_report(labels, pred))

In [None]:
plt.figure(figsize = (13, 13))

start_index = 0
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    prediction = pred[start_index + i]
    actual = test_labels[start_index + i]
    col = 'g'
    if prediction != actual:
        col = 'r'
    plt.xlabel('Actual={} || Pred={}'.format(actual, prediction), color = col)
    plt.imshow(X_test[start_index + i])
plt.show()

In [None]:
# 분류 잘 안된 것 시각화
# rows = 3
# cols = 4
# fig, axs = plt.subplots(rows, cols, sharex=True, sharey=True, figsize=(25, 8))
# visualize = train_df.sample(rows*cols)

# analys_df_copy = analys_df[analys_df['prediction_type'] == 'Wrong'].copy()
# analys_df_copy = analys_df_copy.sample(frac=1)

# idx = 0
# for i in range(rows):
#     for j in range(cols):
#         img = cv2.imread(analys_df_copy.iloc[idx]['image'])
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         img = cv2.resize(img, (100, 100))
        
#         gt = analys_df_copy.iloc[idx]['gt']
#         pred = analys_df_copy.iloc[idx]['prediction']
        
#         axs[i,j].imshow(img)
#         axs[i,j].set_title('Predicted: {}\nGround truth {}'.format(labels[pred], labels[gt]), fontsize=14)
#         axs[i,j].get_xaxis().set_visible(False)
#         axs[i,j].get_yaxis().set_visible(False)
#         idx += 1
        
# fig.suptitle("Wrong prediction", fontsize=30, y=2.1, x=0.515);
# plt.subplots_adjust(left=None, bottom=None, right=0.9, top=1.9, wspace=None, hspace=None)