# Fruit Identification

### Import Libraries

In [1]:
import pandas as pd
import numpy as np

import cv2
import os
import random

from sklearn.model_selection import  train_test_split
from sklearn.metrics import confusion_matrix

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Import Data

In [2]:
directory = "fruits-360/Training/"
directory2 = "fruits-360/Test/"
classes = ["Apple Golden 1","Avocado","Banana","Cherry 1","Cocos","Kiwi","Lemon","Mango","Orange"]

all_arrays = []
img_size = 100
for i in classes:
    path = os.path.join(directory,i)
    class_num = classes.index(i)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
        img_array = cv2.resize(img_array,(img_size,img_size))
        all_arrays.append([img_array,class_num])

all_arrays2 = []
img_size = 100
for i in classes:
    path = os.path.join(directory2,i)
    class_num2 = classes.index(i)
    for img in os.listdir(path):
        img_array2 = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
        img_array2 = cv2.resize(img_array2,(img_size,img_size))
        all_arrays2.append([img_array2,class_num2])

In [3]:
fruits_array_train=[]
for features,label in all_arrays:
    fruits_array_train.append(features)

location = [[1,500,1150],[1500,2000,2500],[3000,3500,4000]]
fruit_names = ["Apple","Avocado","Banana","Cherry","Cocos","Kiwi","Lemon","Mango","Orange"]

### Preprocessing

In [4]:
random.shuffle(all_arrays)

x_train = []
y_train = []
for features,label in all_arrays:
    x_train.append(features)
    y_train.append(label)
x_train = np.array(x_train)

random.shuffle(all_arrays2)

x_test = []
y_test = []
for features,label in all_arrays2:
    x_test.append(features)
    y_test.append(label)
x_test = np.array(x_test)

In [5]:
# Normalization and reshaping
x_train = x_train.reshape(-1,img_size,img_size,1)
x_train = x_train/255
x_test = x_test.reshape(-1,img_size,img_size,1)
x_test = x_test/255
print("shape of x_train= ",x_train.shape)
print("shape of x_test=  ",x_test.shape)

shape of x_train=  (4306, 100, 100, 1)
shape of x_test=   (1445, 100, 100, 1)


In [6]:
y_train = to_categorical(y_train,num_classes=9)
y_test = to_categorical(y_test,num_classes=9)

In [7]:
# Split the data
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=0.2,random_state=42)

### Model

In [8]:
# CNN
model = Sequential()
model.add(Conv2D(filters=16,kernel_size=(2,2),padding='same',input_shape=(100,100,1),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=32,kernel_size=(2,2),padding='same',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64,kernel_size=(2,2),padding='same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(2,2),padding='same',activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(units=150,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(units=9,activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 100, 100, 16)      80        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 50, 50, 32)        2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 25, 25, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 25, 25, 64)        8256      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 25, 25, 64)        16448     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 64)        0         
__________

In [10]:
# Compile Model
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [11]:
# Train Model
#model.fit(x_train,y_train,epochs=30,batch_size=32)
# Taking too long to run, cutting back to 10 epochs
model.fit(x_train,y_train,epochs=10,batch_size=32)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x20780f65630>

In [12]:
# Test the Model
loss, accuracy = model.evaluate(x_test,y_test)
print('Loss: '+str(loss))
print('Accuracy: '+str(accuracy*100)+'%')

y_pred = model.predict(x_test)
y_pred = (y_pred>0.5)
confusion_matrix = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
print(confusion_matrix)

Loss: 0.07640891293583009
Accuracy: 98.4083044982699%
[[137   0   0   0   0   0  23   0   0]
 [  0 143   0   0   0   0   0   0   0]
 [  0   0 166   0   0   0   0   0   0]
 [  0   0   0 164   0   0   0   0   0]
 [  0   0   0   0 166   0   0   0   0]
 [  0   0   0   0   0 156   0   0   0]
 [  0   0   0   0   0   0 164   0   0]
 [  0   0   0   0   0   0   0 166   0]
 [  0   0   0   0   0   0   0   0 160]]
