# Classifier for Deed

## Part 0: Packages requirement

opencv-python        4.1.2.30   
six                  1.13.0             
slim                 0.1                
tensorboard          2.0.2              
tensorflow           2.0.0              
tensorflow-estimator 2.0.1              
tensorflow-gpu       2.0.0   

In [106]:
#set PATH=C
#echo %PATH% 

# here is NETS package pwd
#PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
%env PYTHONPATH =  '~/models-master/research/slim'

env: PYTHONPATH='~/models-master/research/slim'


In [107]:
%matplotlib inline
import cv2
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from PIL import Image
import os

## Part 1: Preprocessing

In [108]:
def img_show_old(img,name):
    cv2.namedWindow(name, cv2.WINDOW_NORMAL)
    cv2.imshow(name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def img_show(img, code=cv2.COLOR_BGR2RGB):
    cv_rgb = cv2.cvtColor(img, code)
    fig, ax = plt.subplots(figsize=(16, 10))
    ax.imshow(cv_rgb)
    fig.show()


In [109]:
def img_to_arr(img,x,y):
    img = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)).convert('L')
    #img.show()  
    #img = Image.open('iCard_021873_1_Daba_Ayehush_H-Copy1.jpg').convert('L')
    if img.size[0] != x or img.size[1] != y:
        img = img.resize((x, y))

    arr = []

    for i in range(y):
        for j in range(x):
            # mnist 里的颜色是0代表白色（背景），1.0代表黑色
            #print(img.getpixel((j, i)))
            pixel = 1.0 - float(img.getpixel((j, i)))/255.0
            # pixel = 255.0 - float(img.getpixel((j, i))) # 如果是0-255的颜色值
            arr.append(pixel)
            
    return arr

#img_to_arr(img_list[0],300,300)

In [111]:
img_path = sorted(os.listdir('All_Data/'))
#Returns a list of all folders with participant numbers
img_list =[]
for path in img_path:
    img  = cv2.imread('All_Data/' + path) 
    img_list.append(img)
    
text_list = []    
for img in img_list:
    text_list.append(img_to_arr(img,500,500))
    
np.array(text_list).shape

(520, 250000)

In [112]:
img_data = pd.read_csv('label_data.csv')

img_group_list = []

img_group = []
for i in np.array(img_data.iloc[:,[1]]).tolist():
    if(i == ["N"]):
        img_group.append([0])
    elif(i == ["Y"]):
        img_group.append([1])
img_group_list.append(img_group)  

img_group = []
for i in np.array(img_data.iloc[:,[2]]).tolist():
    if(i == ["N"]):
        img_group.append([0])
    elif(i == ["Y"]):
        img_group.append([1])
img_group_list.append(img_group)   


len(img_group_list[1])

520

In [117]:
input_train = np.array(text_list)
input_train = input_train.reshape(input_train.shape[0], 500, 500, 1)
input_train.shape

input_train[0:450].shape

(450, 500, 500, 1)

## Part 2: Modeling

In [118]:
'''
  Keras CNN
'''
import tensorflow
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
from tensorflow.keras import activations

# Model configuration
img_width, img_height = 500, 500
batch_size = 250
no_epochs = 100
no_classes = 3
validation_split = 0.2
verbosity = 1

def kares_data(input_data,input_target,img_width,img_height,no_classes):
        # Load dataset
    input_data = np.array(input_data)
    input_train = input_data[0:450]
    target_train = input_target[0:450]
    
    input_test = input_data[450:520]
    target_test = input_target[450:520]
    
    # Reshape data based on channels first / channels last strategy.
    # This is dependent on whether you use TF, Theano or CNTK as backend.
    # Source: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py
    if K.image_data_format() == 'channels_first':
        input_train = input_train.reshape(input_train.shape[0], 1, img_width, img_height)
        input_test = input_test.reshape(input_test.shape[0], 1, img_width, img_height)
        input_shape = (1, img_width, img_height)
    else:
        input_train = input_train.reshape(input_train.shape[0], img_width, img_height, 1)
        input_test = input_test.reshape(input_test.shape[0], img_width, img_height, 1)
        input_shape = (img_width, img_height, 1)
    
    # Parse numbers as floats
    input_train = input_train.astype('float32')
    input_test = input_test.astype('float32')
    
    # Normalize data
    input_train = input_train / 255
    input_test = input_test / 255
    
    # Convert target vectors to categorical targets
    target_train = keras.utils.to_categorical(target_train, no_classes)
    target_test = keras.utils.to_categorical(target_test, no_classes)
    
    return input_train,input_test,target_train,target_test,input_shape
    
def kares_setup(input_train,input_test,target_train,target_test,input_shape,img_width, img_height,batch_size,no_epochs,no_classes,validation_split,verbosity):
    
    # Create the model
    model = Sequential()
    model.add(Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(10, kernel_size=(5, 5), activation='relu'))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(no_classes, activation='softmax'))
    
    # Compile the model
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

    # Fit data to model
    model.fit(input_train, target_train,
          batch_size=batch_size,
          epochs=no_epochs,
          verbose=verbosity,
          validation_split=validation_split)

    # Generate generalization metrics
    score = model.evaluate(input_test, target_test, verbose=0)
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')
    
    return model



In [122]:
# Hand writing classifier
keras.backend.clear_session()

input_train,input_test,target_train,target_test,input_shape = kares_data(text_list,img_group_list[0],500,500,2)

model_handwriting = kares_setup(input_train,input_test,target_train,target_test,input_shape, 500, 500, 250, 50, 2 , 0.2 ,1)

model_handwriting.save('model_hw.h5')


Train on 360 samples, validate on 90 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.5373307279178074 / Test accuracy: 0.9142857193946838


In [123]:
# Fraction classifier
keras.backend.clear_session()

input_train,input_test,target_train,target_test,input_shape = kares_data(text_list,img_group_list[1],500,500,2)

model_f = kares_setup(input_train,input_test,target_train,target_test,input_shape, 500, 500, 250, 50, 2 , 0.2 ,1)

model_f.save('model_f.h5')

Train on 360 samples, validate on 90 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.6759024006979806 / Test accuracy: 0.6571428775787354
