In [None]:
# 라이브러리 import
!pip install tensorflow
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, recall_score
import numpy as np
import os
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd "/content"
!mkdir img4
!unzip '/content/drive/MyDrive/Colab Notebooks/img4.zip' -d '/content/img4'

In [None]:
# 데이터 디렉토리 설정
base_dir = '/content/img4'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

In [None]:
# data generator
train_gen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,         # 이미지 굴절 비율
    brightness_range = (0.8,1.2), # 이미지 밝기
    horizontal_flip=True,    # 이미지 수평 반전 여부
)

img_gen = ImageDataGenerator(rescale=1./255)

train_set = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical')

val_set = img_gen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

test_set = img_gen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 32855 images belonging to 5 classes.
Found 8268 images belonging to 5 classes.
Found 1670 images belonging to 5 classes.


In [None]:
# VGG16 모델 설정 및 학습
vgg16 = VGG16(weights='imagenet', include_top=False)

for layer in vgg16.layers:
    layer.trainable = False

x = vgg16.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.7)(x)   # dropout 레이어
predictions = Dense(5, activation='softmax')(x)
model = Model(inputs=vgg16.input, outputs=predictions)

adam = Adam(lr=0.0001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5




In [None]:
# 라벨과 언라벨 데이터 비율 설정
labeled_ratio = 0.1  # 라벨 데이터 비율
unlabeled_ratio = 0.9  # 언라벨 데이터 비율

In [None]:
# 라벨 데이터 개수 계산
num_labeled_data = int(len(train_set) * labeled_ratio)
num_unlabeled_data = int(len(train_set) * unlabeled_ratio)

In [None]:
# 라벨과 언라벨 데이터 분할
labeled_data_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=num_labeled_data,
    class_mode='categorical'
)

unlabeled_data_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=num_unlabeled_data,
    class_mode='categorical',
    shuffle=False
)


Found 32855 images belonging to 5 classes.
Found 32855 images belonging to 5 classes.


In [None]:
# 라벨 데이터로 모델 학습
model.fit(
    labeled_data_generator,
    epochs=10,
    validation_data=val_set
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9f8c849de0>

In [None]:
# Pseudo Labeling을 위한 예측 결과 생성
test_set.reset()
pseudo_labels = model.predict(test_set)
pseudo_labels = np.argmax(pseudo_labels, axis=1)



In [None]:
from tensorflow.keras.utils import to_categorical

# Pseudo Labeling된 unlabeled data를 기존 labeled data에 추가
labeled_data = np.concatenate((labeled_data_generator[0][0], unlabeled_data_generator[0][0]))
labeled_labels = np.concatenate((labeled_data_generator[0][1], to_categorical(pseudo_labels, num_classes=5)))

In [None]:
# 모델 재설정 및 학습
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Pseudo Labeling된 데이터로 모델 재학습
model.fit(
    labeled_data[:num_labeled_data],
    labeled_labels[:num_labeled_data],
    epochs=10,
    validation_data=val_set
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9f8c2e31f0>

In [None]:
# 모델 저장
%cd "/content/drive/MyDrive/DL"
model.save('semi_vgg16_epoch10.h5')

/content/drive/MyDrive/DL


In [None]:
import tensorflow as tf

from keras.utils import load_img, img_to_array, array_to_img
from PIL import Image
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import OneHotEncoder

images = []
labels = []

def load_image():
  global images, labels
  dir = "/content/img4/test/"
  class_dir = ['노균병','노균병유사','정상','흰가루병','흰가루병유사']
  for index in range(len(class_dir)):
    list_dir = dir+class_dir[index]+"/"
    for j in os.listdir(list_dir):
      if j == ".ipynb_checkpoints":
        continue
      img = load_img(list_dir+j)
      test_array = img_to_array(img)
      test_x = tf.expand_dims(img,0)
      images.append(test_x)
      labels.append(index)
load_image()
labels = np.array(labels)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

y = []
for i in range(len(images)):
  predictions = model.predict(images[i]/255)
  pred = np.argmax(predictions)
  y.append(pred)
class_names = list(test_set.class_indices.keys())
confusion_matrix = confusion_matrix(y, labels)
print(confusion_matrix)
print('Classification Report:')
print(classification_report(y, labels, target_names=class_names))

[[125   0   0   0   0]
 [190  55  15   0   9]
 [ 11  97 367   4  39]
 [ 72   2   1 380   4]
 [  1 245  16  15  22]]
Classification Report:
              precision    recall  f1-score   support

         노균병       0.31      1.00      0.48       125
       노균병유사       0.14      0.20      0.16       269
          정상       0.92      0.71      0.80       518
        흰가루병       0.95      0.83      0.89       459
      흰가루병유사       0.30      0.07      0.12       299

    accuracy                           0.57      1670
   macro avg       0.52      0.56      0.49      1670
weighted avg       0.65      0.57      0.58      1670



In [None]:
from sklearn import metrics

confusion_matrix = metrics.confusion_matrix(y, labels) #test_set
print(confusion_matrix)

[[125   0   0   0   0]
 [190  55  15   0   9]
 [ 11  97 367   4  39]
 [ 72   2   1 380   4]
 [  1 245  16  15  22]]
