In [None]:
# 라이브러리 import
!pip install tensorflow

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, recall_score
import numpy as np
import os
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd "/content"
!mkdir img4
!unzip '/content/drive/MyDrive/Colab Notebooks/img4.zip' -d '/content/img4'

In [3]:
# 데이터 디렉토리 설정
base_dir = '/content/img4'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

In [4]:
# data generator
train_gen = ImageDataGenerator(rescale=1./255,
    shear_range= 0.2,
    brightness_range = (0.8,1.2),
    horizontal_flip=True,
)

img_gen = ImageDataGenerator(rescale=1./255)

train_set = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical')

val_set = img_gen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

test_set = img_gen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 32855 images belonging to 5 classes.
Found 8268 images belonging to 5 classes.
Found 1670 images belonging to 5 classes.


In [5]:
# ResNet50 모델 설정 및 학습
res = ResNet50(weights='imagenet', include_top=False)

for layer in res.layers:
    layer.trainable = False

x = res.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.7)(x)   # dropout 레이어
predictions = Dense(5, activation='softmax')(x)
model = Model(inputs=res.input, outputs=predictions)

adam = Adam(lr=0.0001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5




In [6]:
# 라벨과 언라벨 데이터 비율 설정
labeled_ratio = 0.1  # 라벨 데이터 비율
unlabeled_ratio = 0.9  # 언라벨 데이터 비율

In [8]:
# 라벨 데이터 개수 계산
num_labeled_data = int(len(train_set) * labeled_ratio)
num_unlabeled_data = int(len(train_set) * unlabeled_ratio)

In [9]:
# 라벨과 언라벨 데이터 분할
labeled_data_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=num_labeled_data,
    class_mode='categorical'
)

unlabeled_data_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=num_unlabeled_data,
    class_mode='categorical',
    shuffle=False
)


Found 32855 images belonging to 5 classes.
Found 32855 images belonging to 5 classes.


In [10]:
# 라벨 데이터로 모델 학습
model.fit(
    labeled_data_generator,
    epochs=10,
    validation_data=val_set
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fcc9cbcc070>

In [11]:
# Pseudo Labeling을 위한 예측 결과 생성
test_set.reset()
pseudo_labels = model.predict(test_set)
pseudo_labels = np.argmax(pseudo_labels, axis=1)



In [12]:
from tensorflow.keras.utils import to_categorical

# Pseudo Labeling된 unlabeled data를 기존 labeled data에 추가
labeled_data = np.concatenate((labeled_data_generator[0][0], unlabeled_data_generator[0][0]))
labeled_labels = np.concatenate((labeled_data_generator[0][1], to_categorical(pseudo_labels, num_classes=5)))

In [13]:
# 모델 재설정 및 학습
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
# Pseudo Labeling된 데이터로 모델 재학습
model.fit(
    labeled_data[:num_labeled_data],
    labeled_labels[:num_labeled_data],
    epochs=10,
    validation_data=val_set
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fcc644fa050>

In [15]:
# 모델 저장
%cd "/content/drive/MyDrive/DL"
model.save('semi_resnet50_epoch10.h5')

/content/drive/MyDrive/DL


In [16]:
import tensorflow as tf

from keras.utils import load_img, img_to_array, array_to_img
from PIL import Image
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import OneHotEncoder

images = []
labels = []

def load_image():
  global images, labels
  dir = "/content/img4/test/"
  class_dir = ['노균병','노균병유사','정상','흰가루병','흰가루병유사']
  for index in range(len(class_dir)):
    list_dir = dir+class_dir[index]+"/"
    for j in os.listdir(list_dir):
      if j == ".ipynb_checkpoints":
        continue
      img = load_img(list_dir+j)
      test_array = img_to_array(img)
      test_x = tf.expand_dims(img,0)
      images.append(test_x)
      labels.append(index)

load_image()
labels = np.array(labels)

In [17]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

y = []
for i in range(len(images)):
  predictions = model.predict(images[i]/255)
  print(predictions)
  # break
  # pred = np.argmax(predictions, axis=1)
  pred = np.argmax(predictions)
  y.append(pred)
print(y)
# print(one_label)
class_names = list(test_set.class_indices.keys())
confusion_matrix = confusion_matrix(y, labels)
print('Classification Report:')
print(classification_report(y, labels, target_names=class_names))

[[0.31386793 0.19364177 0.22705789 0.15054855 0.11488389]]
[[0.2589748  0.2531761  0.21525835 0.14065948 0.13193123]]
[[0.19361095 0.2864363  0.37252086 0.11096472 0.03646717]]
[[0.25240266 0.29175383 0.26736382 0.1324352  0.05604447]]
[[0.34003752 0.20130777 0.26301    0.17326105 0.02238367]]
[[0.29082114 0.21751213 0.2760257  0.14589831 0.06974271]]
[[0.21176757 0.283271   0.32457358 0.15002121 0.03036665]]
[[0.27229327 0.22574878 0.29381818 0.1375404  0.0705994 ]]
[[0.2534798  0.26690668 0.20831712 0.12739426 0.14390215]]
[[0.26422405 0.22403933 0.2689326  0.12060042 0.12220358]]
[[0.29738867 0.20157573 0.2690692  0.16006231 0.07190406]]
[[0.27005547 0.24174508 0.22617877 0.12266093 0.1393597 ]]
[[0.21237235 0.31415975 0.27779093 0.10919891 0.08647805]]
[[0.3443486  0.19503671 0.22956622 0.16803695 0.06301154]]
[[0.24272516 0.22294784 0.19135274 0.11602805 0.2269462 ]]
[[0.27467138 0.25148317 0.25400078 0.1441519  0.07569273]]
[[0.26820648 0.27510417 0.23033622 0.14155114 0.08480199

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
from sklearn import metrics

confusion_matrix = metrics.confusion_matrix(y, labels) #test_set
print(confusion_matrix)

[[163  45  62 289  14]
 [ 88  39  35   7  12]
 [116 121 243   4  33]
 [  0   0   0   0   0]
 [ 32 194  59  99  15]]
