<a href="https://colab.research.google.com/github/ayandalab/Fruit-Recognition_NN/blob/main/Fruits_Recognition_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='101lHWlBKAsXjPkVBTz1_Ge9S2rwVPTUU',
dest_path='content/fruits_data.zip',
unzip=True)

Downloading 101lHWlBKAsXjPkVBTz1_Ge9S2rwVPTUU into content/fruits_data.zip... Done.
Unzipping...Done.


In [2]:
import pandas as pd # Data analysis and manipultion tool
import numpy as np # Fundamental package for linear algebra and multidimensional arrays
import tensorflow as tf # Deep Learning Tool
import os # OS module in Python provides a way of using operating system dependent functionality
import cv2 # Library for image processing
from sklearn.model_selection import train_test_split # For splitting the data into train and validation set

In [3]:
labels = pd.read_csv("/content/content/fruits_data/Training_set.csv") # loading the labels
labels.head() # will display the first five rows in labels dataframe

Unnamed: 0,filename,label
0,Image_1.jpg,Pear 2
1,Image_2.jpg,Tomato Heart
2,Image_3.jpg,Plum 3
3,Image_4.jpg,Pear Stone
4,Image_5.jpg,Cherry 2


In [4]:
labels.tail() # will display the last five rows in labels dataframe

Unnamed: 0,filename,label
47379,Image_47380.jpg,Grape Pink
47380,Image_47381.jpg,Melon Piel de Sapo
47381,Image_47382.jpg,Grape White 3
47382,Image_47383.jpg,Avocado
47383,Image_47384.jpg,Pear Abate


In [5]:
file_paths = [[fname, '/content/content/fruits_data/train/' + fname] for fname in labels['filename']]

In [6]:
# Confirm if number of images is same as number of labels given
if len(labels) == len(file_paths):
    print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
    print('Number of labels does not match the number of filenames')

Number of labels i.e.  47384 matches the number of filenames i.e.  47384


In [7]:
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,/content/content/fruits_data/train/Image_1.jpg
1,Image_2.jpg,/content/content/fruits_data/train/Image_2.jpg
2,Image_3.jpg,/content/content/fruits_data/train/Image_3.jpg
3,Image_4.jpg,/content/content/fruits_data/train/Image_4.jpg
4,Image_5.jpg,/content/content/fruits_data/train/Image_5.jpg


In [8]:
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,/content/content/fruits_data/train/Image_1.jpg,Pear 2
1,Image_2.jpg,/content/content/fruits_data/train/Image_2.jpg,Tomato Heart
2,Image_3.jpg,/content/content/fruits_data/train/Image_3.jpg,Plum 3
3,Image_4.jpg,/content/content/fruits_data/train/Image_4.jpg,Pear Stone
4,Image_5.jpg,/content/content/fruits_data/train/Image_5.jpg,Cherry 2


In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_data['label'] = le.fit_transform(train_data['label'])

In [10]:
data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(train_data)):

    img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE) # converting the image to gray scale

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # resizing the image array
    data.append([new_img_array, train_data['label'][i]])

In [11]:
# image pixels of a image
data[5]

[array([[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255]], dtype=uint8), 124]

In [12]:
np.random.shuffle(data)

In [13]:
x = []
y = []
for image in data:
    x.append(image[0])
    y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [14]:
np.unique(y, return_counts=True)

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130]),
 array([344, 311, 336, 345, 337, 344, 319, 345, 344, 300, 343, 344, 470,
        345, 299, 344, 343, 315, 343, 315, 324, 343, 344, 344, 343, 491,
        344, 517, 517, 344, 344, 344, 315, 343, 343, 315, 324, 275, 328,
        343, 328, 491, 208, 343, 689

In [15]:
x = x.reshape(-1, 100, 100, 1)

In [16]:
# split the data
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.3, random_state = 42)

In [17]:
cnn = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)),
tf.keras.layers.MaxPooling2D((2, 2)),

tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),

# tf.keras.layers.Flatten(input_shape=(100, 100, 1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(131, activation='sigmoid')
])

In [18]:
cnn.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

In [19]:
cnn.fit(X_train, y_train, epochs=3, batch_size=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f0cb2e8b990>

In [20]:
cnn.evaluate(X_val, y_val)



[0.4148562550544739, 0.9274761080741882]

In [21]:
# Loading the order of the image's name that has been provided
test_image_order = pd.read_csv("/content/content/fruits_data/Testing_set.csv")
test_image_order.head()

Unnamed: 0,filename
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [22]:
file_paths = [[fname, '/content/content/fruits_data/test/' + fname] for fname in test_image_order['filename']]

In [23]:
# Confirm if number of images is same as number of labels given
if len(test_image_order) == len(file_paths):
    print('Number of image names i.e. ', len(test_image_order), 'matches the number of file paths i.e. ', len(file_paths))
else:
    print('Number of image names does not match the number of filepaths')

Number of image names i.e.  20308 matches the number of file paths i.e.  20308


In [24]:
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,/content/content/fruits_data/test/Image_1.jpg
1,Image_2.jpg,/content/content/fruits_data/test/Image_2.jpg
2,Image_3.jpg,/content/content/fruits_data/test/Image_3.jpg
3,Image_4.jpg,/content/content/fruits_data/test/Image_4.jpg
4,Image_5.jpg,/content/content/fruits_data/test/Image_5.jpg


In [25]:
test_pixel_data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(test_images)):

    img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE) # converting the image to gray scale

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # resizing the image array

    test_pixel_data.append(new_img_array)

In [26]:
test_pixel_data = np.array(test_pixel_data)

In [27]:
test_pixel_data = test_pixel_data.reshape(-1, 100, 100, 1)

In [28]:
pred = cnn.predict(test_pixel_data)

In [29]:
# The predicted values are the probabilities value
pred[0]

array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 9.0412896e-29, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e

In [30]:
prediction = []
for value in pred:
    prediction.append(np.argmax(value))

In [31]:
predictions = le.inverse_transform(prediction)

In [32]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions}) # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False)

# To download the csv file locally
from google.colab import files
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>