## 评估测试模型效果，加载模型

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=6, micro=2, releaselevel='final', serial=0)
matplotlib 3.1.2
numpy 1.18.0
pandas 0.25.3
sklearn 0.22
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


In [2]:
## 载入已经训练好的模型
model = keras.models.load_model('./save_model/resnet50_0320')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             multiple                  23587712  
_________________________________________________________________
dense (Dense)                multiple                  20490     
Total params: 23,608,202
Trainable params: 23,555,082
Non-trainable params: 53,120
_________________________________________________________________


In [3]:
class_names = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck',
]
test_csv_file = './cifar10/sampleSubmission.csv'
test_folder = 'C:/test'## 混有29万张防止作弊的无效图片，图片目标过大，发现加载有问题，使用绝对目录

def parse_csv_file(filepath, folder):
    """Parses csv files into (filename(path), label) format"""
    results = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[1:]
    for line in lines:
        image_id, label_str = line.strip('\n').split(',')
        image_full_path = os.path.join(folder, image_id + '.png')
        results.append((image_full_path, label_str))
    return results

test_csv_info = parse_csv_file(test_csv_file, test_folder)

import pprint
pprint.pprint(test_csv_info[0:5])
print(len(test_csv_info))

[('C:/test\\1.png', 'cat'),
 ('C:/test\\2.png', 'cat'),
 ('C:/test\\3.png', 'cat'),
 ('C:/test\\4.png', 'cat'),
 ('C:/test\\5.png', 'cat')]
300000


In [4]:
test_df = pd.DataFrame(test_csv_info)
test_df.columns = ['filepath', 'class']
print(test_df.head())

        filepath class
0  C:/test\1.png   cat
1  C:/test\2.png   cat
2  C:/test\3.png   cat
3  C:/test\4.png   cat
4  C:/test\5.png   cat


In [5]:
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10

test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7, 
    shuffle = False,
    class_mode = "sparse")
test_num = test_generator.samples
print(test_num)

Found 300000 validated image filenames belonging to 10 classes.
300000


In [7]:
%%time
# 并行化,workers 为10个进程或线程，当use_multiprocessing = True 为多进程，，当为False 时为多线程
test_predict = model.predict_generator(test_generator,
                                       workers = 16,
                                       use_multiprocessing = False,
                                       verbose = 1)

Wall time: 1min 51s


In [8]:
print(test_predict.shape)
print(test_predict[0:5])

(300000, 10)
[[1.5346359e-01 4.2915123e-04 8.1389570e-01 3.2077865e-03 5.1078358e-04
  1.1555092e-02 8.6429166e-03 1.3687456e-03 1.1281075e-03 5.7981159e-03]
 [9.9995577e-01 1.2881770e-07 3.1638981e-05 6.7410315e-06 2.2979153e-11
  7.4596824e-07 3.5974704e-06 2.9561619e-07 1.0550895e-06 2.2434120e-08]
 [5.3581505e-08 9.8996818e-01 3.3947944e-09 1.8416473e-07 6.4708289e-10
  7.5173432e-09 2.3868357e-05 2.0996008e-10 1.5832169e-08 1.0007801e-02]
 [2.5378698e-05 1.4155447e-05 9.9151812e-06 8.2114102e-06 1.1300784e-05
  1.5836937e-05 9.7873526e-05 6.5446568e-07 9.9980921e-01 7.5528478e-06]
 [2.6436636e-01 1.1524212e-06 7.3422664e-01 2.6001662e-06 4.8840931e-04
  1.9189991e-07 4.2207054e-07 1.4134720e-05 8.8338088e-04 1.6574500e-05]]


In [9]:
test_predict_class_indices = np.argmax(test_predict, axis = 1)
print(test_predict_class_indices[0:20])
test_predict_class = [class_names[index] 
                      for index in test_predict_class_indices]
print(test_predict_class[0:20])

[2 0 1 8 2 3 0 3 0 3 2 6 7 6 5 2 5 4 0 3]
['bird', 'airplane', 'automobile', 'ship', 'bird', 'cat', 'airplane', 'cat', 'airplane', 'cat', 'bird', 'frog', 'horse', 'frog', 'dog', 'bird', 'dog', 'deer', 'airplane', 'cat']


In [11]:
def generate_submissions(filename, predict_class):
    with open(filename, 'w') as f:
        f.write('id,label\n')
        for i in range(len(predict_class)):
            f.write('%d,%s\n' % (i+1, predict_class[i]))

output_file = "./cifar10/out_submit/submission_jandy_resnet50.csv"
generate_submissions(output_file, test_predict_class)