#### Convolution1D mainly used for NLP
#### Convolution2D mainly used for CV

In [1]:
import numpy as np
import os
import cv2
import pickle

In [2]:
# import keras
# print(keras.__version__)

In [3]:
from keras.utils import np_utils, conv_utils
# from keras.utils.conv_utils import normalize_data_format
# normalize_data_format function was moved to keras.backend.common from keras.utils.conv_utils since keras 2.2.1
from keras.backend.common import normalize_data_format
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dropout, Dense,Activation
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
from keras.layers import BatchNormalization

In [5]:
# get the names of different kinds of fruits

def get_name_list(filepath):
    pathDir = os.listdir(filepath)
    out = []
    for allDir in pathDir:
        if os.path.isdir(os.path.join(filepath,allDir)):
            child = allDir.decode('gbk')
            out.append(child)
    return out

In [6]:
# put the names of all folders and files in a list

def eachFile(filepath):
    pathDir = os.listdir(filepath)
    out = []
    for allDir in pathDir:
        child = allDir
        out.append(child)
    return out

In [7]:
#  get_data used to convert target image dataset to numpy array

def get_data(data_name, train_percentage=0.7, resize=True,data_format=None):
    file_name = os.path.join(pic_dir_out, data_name+str(Width)+"X"+str(Height)+".pkl")
    if os.path.exists(file_name):
        (X_train, y_train),(X_test, y_test)= pickle.load(open(file_name, "rb"))
        return (X_train, y_train),(X_test, y_test)
#     data_format = conv_utils.normalize_data_format(data_format)
    data_format = normalize_data_format(data_format)
    pic_dir_set = eachFile(pic_dir_data) 
    # pic_dir_data is the path of our dataset
    # pic_dir_set includes all the folders in the dataset path(for example apple, banana etc.)
    X_train = []
    y_train = []
    X_test = []
    y_test = []
    label = 0
    for pic_dir in pic_dir_set:
        print(pic_dir_data+pic_dir)# print the current folder name
        if not os.path.isdir(os.path.join(pic_dir_data, pic_dir)):
            continue 
        pic_set = eachFile(os.path.join(pic_dir_data, pic_dir))
        # pic_set contains all the images in the current folder
        pic_index = 0
        train_count = int(len(pic_set)*train_percentage)
        for pic_name in pic_set:
            if not os.path.isfile(os.path.join(pic_dir_data, pic_dir, pic_name)):
                continue
            img = cv2.imread(os.path.join(pic_dir_data, pic_dir, pic_name))
            if img is None:
                continue
            img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            # RGB to gray scale
            if(resize):
                img = cv2.resize(img,(Width, Height))
            if(data_format == 'channels_last'):
                img = img.reshape(-1,Width, Height,1)
            elif(data_format == 'channels_first'):
                img = img.reshape(-1,1,Width,Height)
            if(pic_index < train_count):
                X_train.append(img)
                y_train.append(label)
            else:
                X_test.append(img)
                y_test.append(label)
            pic_index += 1
        if len(pic_set)!= 0:
            label += 1
    X_train = np.concatenate(X_train, axis=0)
    X_test = np.concatenate(X_test, axis=0)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    pickle.dump([(X_train, y_train), (X_test, y_test)], open(file_name, 'wb'))
    return(X_train, y_train), (X_test, y_test)        

In [16]:
def main():
    global Width, Height, pic_dir_out, pic_dir_data
    Width=100
    Height=100
    num_classes=3
    pic_dir_out='C:/Users/butte/Jupyter/Final_Project/pic/pic_out'
    pic_dir_data='C:/Users/butte/Jupyter/Final_Project/pic/pic_dataset'
    (X_train, y_train),(X_test,y_test)= get_data("grey_data_",0.7,data_format='channels_last')
    print("read and transform images successfully")
    
    
    # data preprocessing_normalization
    X_train = X_train/255. #normalization to (0,1) in order to fit in the neural network
    X_test = X_test/255.
    print('X_train.shape after normalization:', X_train.shape)
    print('X_test.shape after normalization:', X_test.shape)

    # num_classes needed to be given
    # turn the specific number of a certain kind to a multidimension array
    y_train = np_utils.to_categorical(y_train, num_classes)
    y_test = np_utils.to_categorical(y_test, num_classes)
    
    ########################################################
    # build CNN model
    model = Sequential()
          
    # Convolution
    # first convolution layer
    model.add(Convolution2D(input_shape=(Height,Width ,1),
                           filters=32,
                           kernel_size=(3,3),
                           strides=(1,1),
                           padding='same',
                           data_format='channels_last',# default data_format
                           ))
    model.add(Activation('relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(
        pool_size=(3,3),
        strides=(3,3),
        data_format='channels_last',
    ))
    model.add(Dropout(0.25))
    
    # second convolution layer (CONV->RELU)*2 ->POOL
    model.add(Convolution2D(64,(3,3),strides=(1,1),padding='same',data_format='channels_last'))
    model.add(Activation('relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(Convolution2D(64,(3,3),strides=(1,1),padding='same',data_format='channels_last'))
    model.add(Activation('relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D((2,2),(2,2),data_format='channels_last'))
    model.add(Dropout(0.25))
    
    # third convolution layer
    model.add(Convolution2D(128,(3,3),strides=(1,1),padding='same',data_format='channels_last'))
    model.add(Activation('relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(Convolution2D(128,(3,3),strides=(1,1),padding='same',data_format='channels_last'))
    model.add(Activation('relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D((2,2),(2,2),data_format='channels_last'))
    model.add(Dropout(0.25))
    
    # Flatten Dense Dropout
    model.add(Flatten()) 
    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    #softmax convert the prediciton result to (0,1)
    
    model.compile(optimizer=Adam(),#method to optimize hyperparameters
                 loss='categorical_crossentropy',# method to calculate loss
                 metrics=['accuracy']) # method to evaluate model
    
    #########################################
    print("\nTraining----------------------------")
    cm=0
    cm_str=''if cm==0 else str(cm)
    cm2_str='' if (cm+1)==0 else str(cm+1)
    if(cm>=1):
        model.load_weights(os.path.join(pic_dir_out, 'cnn_model_'+cm_str+'.h5'))

    model.fit(X_train, y_train, epochs=50, batch_size=128)
    model.save_weights(os.path.join(pic_dir_out,'cnn_model_'+cm2_str+'.h5'))
    model.save(os.path.join(pic_dir_out, 'my_model.h5'))
    
    # try to save model
#     f_name='C:/Users/butte/Jupyter/Final_Project/pic/pic_out/finalized_model.sav'
#     pickle.dump(model, open(f_name,'wb'))
    
    ########################################
    print("\nTesting----------------------------")              
    loss,accuracy= model.evaluate(X_test, y_test)
    print('test loss:', loss)
    print('test accuracy:', accuracy)

#     class_name_list = get_name_list(pic_dir_data) #list the name of all kinds of fruits in the dataset directory
#     predict = model.predict(X_test, batch_size=128)

#### 在构建网络模型的时候，给每一层都定义一个名字，这样在复用之前的参数权重的时候，除了官网给的先加载权重，再冻结权重之外，你可以通过简单的修改层的名字来达到加载之前训练的权重的目的。save_weights将每一次迭代后的模型参数保存下来。

In [17]:
if __name__ == '__main__':
    main()

C:/Users/butte/Jupyter/Final_Project/pic/pic_datasetapple
C:/Users/butte/Jupyter/Final_Project/pic/pic_datasetbanana
C:/Users/butte/Jupyter/Final_Project/pic/pic_datasetorange
read and transform images successfully
X_train.shape after normalization: (153, 100, 100, 1)
X_test.shape after normalization: (67, 100, 100, 1)

Training----------------------------
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

Testing----------------------------
test loss: 2.

In [10]:
# file = open('C:/Users/butte/Jupyter/Final_Project/pic/pic_out/grey_data_100X100.pkl',"rb")
# data = pickle.load(file)
# print(data)
# file.close()

In [11]:
# file_model=open('C:/Users/butte/Jupyter/Final_Project/pic/pic_out/finalized_model.sav',"rb")
# loaded_model = pickle.load(file_model)

# def predict_specific_image(img, model):
#     img = imread()
    

In [18]:
import h5py
from keras.models import load_model

In [19]:
from keras.preprocessing.image import img_to_array

In [20]:
img = cv2.imread('C:\\Users\\butte\\Jupyter\\Final_Project\\pic\\pic_pred\\banana_1.jpg')
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 
img = cv2.resize(img,(Width, Height))
img = img.reshape(-1,100,100,1)
img = img.astype("float")/255.0
# img = img_to_array(img)
img = np.array(img) 
model=load_model('C:\\Users\\butte\\Jupyter\\Final_Project\\pic\\pic_out\\my_model.h5')
pred = model.predict(img)
print(pred)

[[6.9242069e-24 1.0000000e+00 2.2542204e-17]]
