# import package

In [1]:
# 資料處理
import pandas as pd
import numpy as np
# 影像讀取
from keras.preprocessing import image
# 進度條
from tqdm import tqdm

Using TensorFlow backend.


# 影像

## 定義&讀取

In [2]:
def load_image(file_name, dataset_name, dataset_path, input_size, array_name):
    data = pd.read_csv(file_name)
    dataset_name = []
    for i in tqdm(range(data.shape[0])):
        # target_size為載入圖片後將大小縮放至設定的大小
        img = image.load_img(dataset_path + data["File_name"][i], 
                             target_size = input_size)
        img = image.img_to_array(img)
        img = img/255 # 圖像歸一化(x-0/255-0)
        dataset_name.append(img)
    array_name = np.array(dataset_name)
    return array_name

In [3]:
train_image = load_image("D:/10979104/碩論程式整理/data_csv/train_image.csv"
                         , "train_image"
                         , "D:/10979104/碩論程式整理/train_data/"
                         , (299,299,3)
                         , "X_train")
valid_image = load_image("D:/10979104/碩論程式整理/data_csv/valid_image.csv"
                         , "valid_image"
                         , "D:/10979104/碩論程式整理/valid_data/"
                         , (299,299,3)
                         , "X_valid")
k_train = np.vstack([train_image, valid_image])

100%|█████████████████████████████████████████████████████████████████████████████| 3328/3328 [00:21<00:00, 157.45it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 832/832 [00:09<00:00, 88.76it/s]


In [4]:
k_test = load_image("D:/10979104/碩論程式整理/data_csv/test_image.csv"
                         , "test_image"
                         , "D:/10979104/碩論程式整理/test_data/"
                         , (299,299,3)
                         , "X_test")

100%|█████████████████████████████████████████████████████████████████████████████| 1040/1040 [00:10<00:00, 101.70it/s]


In [5]:
train_label = pd.read_csv("D:/10979104/碩論程式整理/data_csv/train_label.csv")
valid_label = pd.read_csv("D:/10979104/碩論程式整理/data_csv/valid_label.csv")
k_test_label = pd.read_csv("D:/10979104/碩論程式整理/data_csv/test_label.csv")

In [6]:
k_train_label = pd.concat([train_label, valid_label], ignore_index=True)
k_train_label = np.array(k_train_label)
k_test_label = k_test_label.values.tolist()
k_test_label = np.array(k_test_label)

# model

## 讀取模型

In [7]:
from keras.models import load_model
from keras.models import Sequential
from keras.models import model_from_json
def load_model(file_name, weight):
    with open(file_name, "r")as text_file:
        json_string = text_file.read()
        model = Sequential()
        model = model_from_json(json_string)
        model.load_weights(weight, by_name = False)
    return model

In [8]:
model = load_model("CNN_50_b100_Adam.json", "CNN_50_b100_Adam.h5")

# K-fold

## 參數設定

In [9]:
from sklearn.model_selection import KFold
from sklearn.metrics import roc_curve, auc

In [10]:
# 疊數
num_folds = 10

# 放置各疊分數
acc_per_fold = []
loss_per_fold = []
auc_per_fold = []

# 設定K-fold代碼
fold_no = 1

In [11]:
kfold = KFold(n_splits=num_folds, shuffle=True)

### 釋放記憶體

In [18]:
# 釋放記憶體
import gc
# del(img)
# del(train_image, valid_image)
gc.collect()

16587

### 確認target類型

In [None]:
from sklearn.utils.multiclass import type_of_target

In [None]:
print(type_of_target(k_test_label))

### k-fold & result

In [19]:
for train, valid in kfold.split(k_train, k_train_label):
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print('--------------------------------------------')
    print(f'Training for fold {fold_no}...')
    
    history = model.fit(k_train[train],
                        k_train_label[train],
                        batch_size=100,
                        epochs=1,
                        verbose=1)
    score = model.evaluate(k_test,
                           k_test_label,
                           verbose=0)
    
    y_score = model.predict_proba(k_test)
    
#     y_list = k_test_label.values.tolist() # 先將標籤轉為list
#     y_npa = np.array(y_list)
    
    
    fpr, tpr, thresholds = roc_curve(k_test_label.ravel(), y_score.ravel())
    
    roc = auc(fpr, tpr)
    
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {score[0]}; {model.metrics_names[1]} of {score[1]*100}%')
    
    acc_per_fold.append(score[1]*100)
    loss_per_fold.append(score[0])
    auc_per_fold.append(roc)
    
    fold_no = fold_no + 1
    
    print('---------------------------------------------------')
    print('score of fold')
    
    for i in range(0, len(acc_per_fold)):
        print('-------------------------------------------------')
        print(f'>fold {i+1} - loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}% - Auc: {auc_per_fold[i]}')
    
    print('-------------------------------------------------------')
    print('Average scores for all folds: ')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+-{np.std(acc_per_fold)})')
    print(f'> Loss: {np.mean(acc_per_fold)} (+-{np.std(loss_per_fold)})')
    print(f'> Auc: {np.mean(auc_per_fold)}')      

--------------------------------------------
Training for fold 1...
Epoch 1/1
Score for fold 1: loss of 0.6323962750169463; accuracy of 87.9807710647583%
---------------------------------------------------
score of fold
-------------------------------------------------
>fold 1 - loss: 0.6323962750169463 - Accuracy: 87.9807710647583% - Auc: 0.9790475530078896
-------------------------------------------------------
Average scores for all folds: 
> Accuracy: 87.9807710647583 (+-0.0)
> Loss: 87.9807710647583 (+-0.0)
> Auc: 0.9790475530078896
--------------------------------------------
Training for fold 2...
Epoch 1/1
Score for fold 2: loss of 0.8212475477950647; accuracy of 88.07692527770996%
---------------------------------------------------
score of fold
-------------------------------------------------
>fold 1 - loss: 0.6323962750169463 - Accuracy: 87.9807710647583% - Auc: 0.9790475530078896
-------------------------------------------------
>fold 2 - loss: 0.8212475477950647 - Accurac

Epoch 1/1
Score for fold 9: loss of 0.8003071075571755; accuracy of 87.21153736114502%
---------------------------------------------------
score of fold
-------------------------------------------------
>fold 1 - loss: 0.6323962750169463 - Accuracy: 87.9807710647583% - Auc: 0.9790475530078896
-------------------------------------------------
>fold 2 - loss: 0.8212475477950647 - Accuracy: 88.07692527770996% - Auc: 0.9762439903846153
-------------------------------------------------
>fold 3 - loss: 0.8267542837330928 - Accuracy: 87.88461685180664% - Auc: 0.9769827107988166
-------------------------------------------------
>fold 4 - loss: 0.7522109628465334 - Accuracy: 88.07692527770996% - Auc: 0.9747232495069034
-------------------------------------------------
>fold 5 - loss: 0.764246276285065 - Accuracy: 87.40384578704834% - Auc: 0.976666050295858
-------------------------------------------------
>fold 6 - loss: 0.6858436795347818 - Accuracy: 88.65384459495544% - Auc: 0.973980984960552