In [18]:
import zipfile
import os
import glob

from PIL import Image

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
import numpy as np

# 데이터 압축 풀기

In [50]:
# 이미지 압출 푸는 소스
def unzip_file(root_path,foldername):
    path = root_path+foldername
    zip_file_names = [fn for fn in os.listdir(path) if fn.endswith('zip')]
    print(zip_file_names)
    count = 0
    for zip_name in zip_file_names:
        with zipfile.ZipFile(path+'/'+zip_name) as myzip:            
            image_list = myzip.infolist()
            for i, image in enumerate(image_list):
                image.filename = f'{foldername}_{i+count}'
                myzip.extract(image, path=root_path+"extracted/"+foldername)
            count += len(image_list)
            print(f'count{count}')
         

In [51]:
path = os.getenv('HOME')+'/aiffel/rock_scissor_paper_data/'
hand_type = ['rock', 'paper', 'scissor']
for foldername in hand_type:
    print(foldername)
    unzip_file(path,foldername)


rock
['rock.zip', 'rocks_complete.zip', 'rock(1).zip', 'rock(3).zip', 'rock(2).zip']
count100
count1205
count1305
count1405
count1505
paper
['papers_complete.zip', 'paper(2).zip', 'paper.zip', 'paper(1).zip', 'paper(3).zip']
count1108
count1208
count1308
count1408
count1508
scissor
['scissor(2).zip', 'scissor.zip', 'scissors.zip', 'scissors_complete.zip', 'scissor(1).zip']
count100
count200
count300
count1294
count1394


## 파일 경로 설정

In [3]:
root_path = os.getenv('HOME')+'/aiffel/rock_scissor_paper_data/'
extracted_path = root_path + "extracted/"
test_path = root_path + "test/"

# 데이터 준비

## 사이즈 전처리

In [53]:
img = Image.open(path+'extracted/rock/rock_0')
img.size

(224, 224)

In [54]:
def resize_image(img_path):
    images=glob.glob(img_path+"/"+"*")  
    print(f"{len(images)} will be resized")
    
    target_size = (28,28)
    for img in images:
        old_image = Image.open(img)
        new_image = old_image.resize(target_size, Image.ANTIALIAS)
        new_image.save(img, "JPEG")
        
    print(f"{len(images)} image resized")

In [55]:
for foldername in os.listdir(extracted_path):
    print(foldername)
    resize_image(extracted_path+foldername)

rock
1505 will be resized
1505 image resized
paper
1508 will be resized
1508 image resized
scissor
1394 will be resized
1394 image resized


In [56]:
img = Image.open(extracted_path+'rock/rock_0')
img.size

(28, 28)

## 가위, 바위, 보 가져오는 함수

In [138]:
""" 이미지 load
Args:
    img_root_path: 이미지 루트 경로
    number_of_data: 로드할 데이터 개수
Returns:
    이미지 데이터, 해당 이미지의 라벨
"""
def load_data(img_root_path, number_of_data = 300):
    
        """ 원하는 타입의 이미지 데이터를 원하는 만큼 랜덤하게 뽑아내는 generator
        Args:
            path: 이미지 가져올 위치
            hand_type: 분류 id -> {0:'scissor', 1:'rock', 2:'paper'}
            count_data: 랜덤하게 뽑을 개수
        Yield:
            이미지 데이터를 int32 타입의 ndarray로 반환
        """
        def get_image_generator(path, hand_type, count_data):
            _path = path+hand_type+'/*'
            result = np.array([])
            rng = np.random.default_rng()
            test = rng.choice(np.array(glob.glob(_path)), count_data, replace=False)
            for file in test:
                img = np.array(Image.open(file),dtype=np.int32)  
                yield img
    
    
        img_size=  28
        color=3
        # container 생성
        imgs = np.zeros(number_of_data * img_size * img_size * color, dtype=np.int32).reshape(number_of_data, img_size,img_size,color)
        labels = np.zeros(number_of_data, dtype=np.int32)
        # 가위 = 0, 바위 = 1, 보 = 2    
        count = 0
        # number_of_data가 3의 배수가 아니여서 쓰레기 값이 들어가는 것을 막기 위해서 나머지만큼 클래스 순서대로 채운다.
        quot, remainder = divmod(number_of_data, 3)
        hand_type = {0:'scissor', 1:'rock', 2:'paper'}
        for idx, _type in hand_type.items():
            # 나머지와 동일한 type의 개수를 나머지만큼 더한다.
            if idx == remainder:
                _quot = quot+remainder
            else:
                _quot = quot
                
            for img in get_image_generator(img_root_path, _type, _quot):
                imgs[count,:,:,:] = img
                labels[count] = idx
                count += 1
            
        return imgs, labels
        
        
""" 테스트 이미지 load
Args:
    number_of_data: 로드할 데이터 개수
Returns:
    이미지 데이터, 해당 이미지의 라벨
"""        
def load_test_data(number_of_data = 300):
    _path = test_path
    return load_data(_path, number_of_data)

""" 훈련 이미지 load
Args:
    number_of_data: 로드할 데이터 개수
Returns:
    이미지 데이터, 해당 이미지의 라벨
""" 
def load_train_data(number_of_data = 300):
    _path = extracted_path
    return load_data(_path, number_of_data)    

# 딥러닝 네트워크 설계하기

In [139]:
""" 이미지 증강 레이어 생성 함수
Returns:
    모델
""" 
def make_augmentaion():
    model = keras.models.Sequential()
    model.add(keras.layers.experimental.preprocessing.RandomRotation(0.1))
    return model

In [140]:
""" 이미지 딥러닝 네트워크 생성 함수
Args:
    n_conv_1 = 1번째 합성곱층 하이퍼파라미터
    n_conv_2 = 2번째 합성곱층 하이퍼파라미터
    n_conv_3 = 3번째 합성곱층 하이퍼파라미터
    n_dense = Dense 층 하이퍼파라미터
Returns:
    모델
""" 
def make_model(n_conv_1 = 16, n_conv_2 = 32, n_conv_3=32, n_dense = 32):
    model = keras.models.Sequential()
    # 입력부
    model.add(keras.layers.Conv2D(n_conv_1,(3,3), activation='relu', input_shape=(28,28,3), strides=1))
    model.add(keras.layers.MaxPool2D((2,2)))
    model.add(make_augmentaion())
    model.add(keras.layers.Conv2D(n_conv_2, (3,3), activation='relu'))
    model.add(keras.layers.MaxPool2D((2,2)))
    model.add(keras.layers.Conv2D(n_conv_3, (3,3), activation='relu'))
    model.add(keras.layers.MaxPool2D((2,2)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(n_dense, activation='relu'))
    model.add(keras.layers.Dropout(0.1))
    # 출력부
    model.add(keras.layers.Dense(3, activation='softmax'))
    model.summary()
    return model

# 모델 훈련

In [150]:
model = make_model()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
for _ in range(5):
    x_train, y_train = load_train_data(1000)
    # 정규화
    x_train_norm = x_train/255.0
    model.fit(x_train_norm, y_train, epochs=15)
    test_loss, test_accuracy = model.evaluate(x_train_norm, y_train, verbose=2)
    print(test_loss, test_accuracy)


Model: "sequential_60"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_82 (Conv2D)           (None, 26, 26, 16)        448       
_________________________________________________________________
max_pooling2d_81 (MaxPooling (None, 13, 13, 16)        0         
_________________________________________________________________
sequential_61 (Sequential)   (None, 13, 13, 16)        0         
_________________________________________________________________
conv2d_83 (Conv2D)           (None, 11, 11, 32)        4640      
_________________________________________________________________
max_pooling2d_82 (MaxPooling (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_84 (Conv2D)           (None, 3, 3, 32)          9248      
_________________________________________________________________
max_pooling2d_83 (MaxPooling (None, 1, 1, 32)        

Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
32/32 - 0s - loss: 0.1880 - accuracy: 0.9270
0.18802893161773682 0.9269999861717224


# 모델 평가

In [151]:
# 테스트 데이터 load
x_test, y_test = load_test_data()
# 정규화
x_test_norm = x_test/255.0

In [152]:
test_loss, test_accuracy = model.evaluate(x_test_norm, y_test, verbose=2)
print(test_loss, test_accuracy)

10/10 - 0s - loss: 1.8913 - accuracy: 0.7233
1.8912900686264038 0.7233333587646484
