In [1]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=6, micro=2, releaselevel='final', serial=0)
matplotlib 3.1.2
numpy 1.18.0
pandas 0.25.3
sklearn 0.22
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


In [2]:
class_names = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck',
]

train_lables_file = './cifar10/trainLabels.csv'
test_csv_file = './cifar10/sampleSubmission.csv'
train_folder = 'cifar10\\train'
test_folder = 'cifar10\\test'

test_csv_file = './cifar10/sampleSubmission.csv'
test_folder = 'C:\\test'

def parse_csv_file(filepath, folder):
    """Parses csv files into (filename(path), label) format"""
    results = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[1:]
    for line in lines:
        image_id, label_str = line.strip('\n').split(',')
        image_full_path = os.path.join(folder, image_id + '.png')
        results.append((image_full_path, label_str))
    return results

test_csv_info = parse_csv_file(test_csv_file, test_folder)
import pprint
pprint.pprint(test_csv_info[0:5])
print( len(test_csv_info))



[('C:\\test\\1.png', 'cat'),
 ('C:\\test\\2.png', 'cat'),
 ('C:\\test\\3.png', 'cat'),
 ('C:\\test\\4.png', 'cat'),
 ('C:\\test\\5.png', 'cat')]
300000


In [3]:

test_df = pd.DataFrame(test_csv_info)
test_df.columns = ['filepath', 'class']
print(test_df.head())

        filepath class
0  C:\test\1.png   cat
1  C:\test\2.png   cat
2  C:\test\3.png   cat
3  C:\test\4.png   cat
4  C:\test\5.png   cat


In [4]:
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10

test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = "sparse")
test_num = test_generator.samples
print(test_num)

Found 300000 validated image filenames belonging to 10 classes.
300000


In [5]:
model = keras.models.load_model('.\keras_save_model_500')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 128)       3584      
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 128)       512       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 128)       147584    
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 128)       512       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 256)       295168    
_________________________________________________________________
batch_normalization_2 (Batch (None, 16, 16, 256)       1

In [6]:
# 并行化,workers 为10个进程或线程，当use_multiprocessing = True 为多进程，，当为False 时为多线程
test_predict = model.predict_generator(test_generator,
                                       workers = 16,
                                       use_multiprocessing = False,
                                      verbose=1)



In [7]:
print(test_predict.shape)
print(test_predict[0:5])

(300000, 10)
[[3.7200740e-04 2.2641630e-03 4.2493917e-05 2.0375755e-04 2.5678903e-04
  7.5843811e-05 1.9839363e-05 7.8211895e-05 9.1589038e-04 9.9577099e-01]
 [8.3627814e-01 7.6947487e-03 1.9166196e-02 1.5120008e-02 1.6069705e-02
  6.6126212e-03 1.0863190e-02 5.9641171e-03 5.7401914e-02 2.4829419e-02]
 [4.2894937e-12 1.3300299e-05 1.4284742e-16 2.9355161e-15 7.7310080e-17
  1.2928743e-16 4.0629365e-17 1.9250694e-16 1.1269588e-10 9.9998665e-01]
 [3.5777140e-13 4.1062709e-12 1.4247181e-14 8.8806654e-14 1.7812117e-14
  3.3035976e-13 1.7265602e-15 1.3299573e-11 1.0000000e+00 2.9331641e-12]
 [9.9989331e-01 7.7734410e-07 8.8038314e-06 1.0437548e-06 5.2451692e-06
  6.0542277e-07 8.0431028e-06 2.5361697e-07 5.2082960e-05 2.9714151e-05]]


In [8]:
test_predict_class_indices = np.argmax(test_predict, axis = 1)
print(test_predict_class_indices[0:20])

test_predict_class = [class_names[index] 
                      for index in test_predict_class_indices]
print(test_predict_class[0:20])

[9 0 9 8 0 3 0 6 1 3 2 7 6 4 5 0 5 2 0 6]
['truck', 'airplane', 'truck', 'ship', 'airplane', 'cat', 'airplane', 'frog', 'automobile', 'cat', 'bird', 'horse', 'frog', 'deer', 'dog', 'airplane', 'dog', 'bird', 'airplane', 'frog']


In [9]:
def generate_submissions(filename, predict_class):
    with open(filename, 'w') as f:
        f.write('id,label\n')
        for i in range(len(predict_class)):
            f.write('%d,%s\n' % (i+1, predict_class[i]))

output_file = "./cifar10/submission_500_again22.csv"
generate_submissions(output_file, test_predict_class)