In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Libaray 설치**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import pickle
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization

# ignoring warnings
import warnings
warnings.simplefilter("ignore")

import os, cv2, json
from PIL import Image
from tqdm import tqdm
import albumentations as A
import random

**데이터 불러오기**

In [None]:
#공통 경로
path = '/content/drive/MyDrive/usg_2023/usg1_dataset/usg1_dataset/'

In [None]:
train_labels = pd.read_csv(path + 'train.csv')

**탐색적 자료 분석**

In [None]:
#class별 불균형 문제 확인
class_dict = dict(zip(train_labels['label'].value_counts().index, train_labels['label'].value_counts().values ))
class_dict

In [None]:
#class별 불균형 문제 시각화
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize = (6, 4))

for i in ['top', 'right', 'left']:
    ax.spines[i].set_visible(False)
ax.spines['bottom'].set_color('black')

sns.countplot(train_labels.label, edgecolor = 'black',
              palette = reversed(sns.color_palette("viridis", 5)))
plt.xlabel('Classes', fontfamily = 'serif', size = 15)
plt.ylabel('Count', fontfamily = 'serif', size = 15)
plt.xticks(fontfamily = 'serif', size = 12)
plt.yticks(fontfamily = 'serif', size = 12)
ax.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

**데이터 전처리**

In [None]:
train_data = []
for i  in tqdm(range(train_labels.shape[0])):
    filename = train_labels.loc[i,'image_name']
    img = cv2.imread(path + 'train_images/'+ filename)
    resized = cv2.resize(img, (150, 150))
    train_data.append(resized)

In [None]:
len(train_data)

In [None]:
Y_train = list(train_labels['label'].values)

In [None]:
# albumentations을 사용하여 이미지 데이터 증강
transform = A.Compose([
    A.Blur(blur_limit=(3, 7), p=0.5),
    A.Rotate(limit=(-20, 20), p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=20, p=0.5),
    A.ImageCompression(always_apply=False, p=0.5, quality_lower=56, quality_upper=100, compression_type=1),
    A.GaussNoise(var_limit=(10.0, 50.0), mean=0),
]) 

In [None]:
#class max값 저장
max_classnum = max(class_dict.values())
max_classnum

In [None]:
# max_classnum에 맞춰 오버샘플링
for key, value in tqdm(class_dict.items()):
    idxlist = train_labels[train_labels['label']==key].index
    for i in range(max_classnum-value):
        transformed = transform(image=train_data[random.choice(idxlist)])
        transformed_image = transformed['image']
        train_data.append(transformed_image)
        Y_train.append(key)   

In [None]:
len(train_data), len(Y_train)

In [None]:
X_train = np.array(train_data)
X_train.shape

In [None]:
Y_train = np.array(Y_train)
Y_train.shape

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2, stratify = Y_train, random_state=42)
x_train.shape, x_val.shape, y_train.shape, y_val.shape

In [None]:
y_train = tf.keras.utils.to_categorical(y_train)
y_val = tf.keras.utils.to_categorical(y_val)
y_val

In [None]:
x_train.shape, x_val.shape, y_train.shape, y_val.shape

## 모델

**모델 학습**

In [None]:
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss', patience=7,    
    min_delta=0.0005, 
    restore_best_weights=True,
)

model = Sequential([
            Input(shape = (150,150,3)),
            Conv2D(16, 3, activation= 'relu'),
            BatchNormalization(),
            Conv2D(16, 3, activation= 'relu'), 
            BatchNormalization(),
            MaxPooling2D(strides=(2,2)),
            Conv2D(64,3, activation= 'relu'),
            MaxPooling2D((2, 2)),
            Dropout(0.25),

            Conv2D(32, 3, activation= 'relu'),
            BatchNormalization(),
            Conv2D(32, 3, activation= 'relu'),
            BatchNormalization(),
            MaxPooling2D(strides=(2,2)),
            Dropout(0.25),

            Flatten(),
            Dense(512, activation='relu'),
            Dropout(0.25),
            Dense(1024, activation='relu'),
            Dropout(0.5),
            Dense(10, activation='softmax')
            ])

model.summary()

In [None]:
x_train.shape

In [None]:
model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train,y_train, batch_size=32,
                           epochs=55, 
                           verbose=2,  
                           validation_data=(x_val, y_val),
                           callbacks=early_stopping)

In [None]:
#loss, accuracy 그래프 확인
def plot_loss_curve(history):
    plt.figure(figsize = (5,3))
    
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['train','val'], loc = 'upper right')
    plt.show
    
def plot_accuracy_curve(history):
    plt.figure(figsize = (5,3))
    
    plt.plot(history['accuracy'])
    plt.plot(history['val_accuracy'])
    
    plt.title('model Accuracy')
    plt.xlabel('epoch')
    plt.ylabel('Acc')
    plt.legend(['train','val'], loc = 'upper right')
    plt.show

In [None]:
plot_loss_curve(history.history)


print('train loss ={}, validation loss = {}'.format( history.history['loss'][-1], history.history['val_loss'][-1]))

plot_accuracy_curve(history.history)

print('train Acc ={}, validation Acc = {}'.format( history.history['accuracy'][-1], history.history['val_accuracy'][-1]))

**모델 저장**

In [None]:
model.save(path + '문제1/모델/model.h5') #모델 저장