In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import glob

from sklearn.ensemble import RandomForestClassifier

In [None]:
np.random.seed(1000)

# data already split into train, validation, test
data_dir = "/content/drive/MyDrive/APS360 Project/vegetable dataset/"
train_dir = data_dir + "training/"
valid_dir = data_dir + "validation/"
test_dir = data_dir + "test/"

# 20 vegetable classes
vegetable_classes = ['asparagus', 'bell pepper', 'broccoli', 'cabbage', 'carrot', 'celery', 'chilli pepper', 'corn', 'cucumber', 'eggplant', 
                     'lettuce', 'mushroom', 'onion', 'peas', 'potato', 'pumpkin', 'raddish', 'spinach', 'sweet potato', 'tomato']

In [None]:
# convert string class names to int
def word_to_index(label, classes):
  for i, word in enumerate(classes):
    if word == label:
      return i
  
  return None;

In [None]:
def rescale_img(img, label, image_file):
  width = 64
  height = 64
  dim = (width, height)
  try:
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized
  except:
    print(label)
    print(image_file)
    return img

In [None]:
# random forest wants nparrays as input
train_data = []
train_labels = []

In [None]:
test_data = []
test_labels = []

In [None]:
for label in os.listdir(test_dir):
  image_files = glob.glob(test_dir + label + "/*.jpg") + glob.glob(test_dir + label + "/*.jpeg") + glob.glob(test_dir + label + "/*.png") + glob.glob(test_dir + label + "/*.JPG")
  i = word_to_index(label, vegetable_classes)
  print(i)
  if(i == None):
    print(label)
  
  for j, image_file in enumerate(image_files):
    if j!=0 and j%32 == 0:
      print(label + " ok")
    img = cv2.imread(image_file)
    img = rescale_img(img, label, image_file)
    img = img.flatten()
    img = img.tolist()
    test_data.append(img)
    test_labels.append(i)

test_data = np.array(test_data)

1
bell pepper ok
2
broccoli ok
3
cabbage ok
4
carrot ok
6
chilli pepper ok
7
corn ok
8
cucumber ok
9
eggplant ok
10
lettuce ok
12
onion ok
13
peas ok
14
potato ok
16
raddish ok
17
spinach ok
18
sweet potato ok
19
tomato ok
11
mushroom ok
0
asparagus ok
5
celery ok
15
pumpkin ok


In [None]:
for label in os.listdir(train_dir):
  image_files = glob.glob(train_dir + label + "/*.jpg") + glob.glob(train_dir + label + "/*.jpeg") + glob.glob(train_dir + label + "/*.png") + glob.glob(train_dir + label + "/*.JPG")
  i = word_to_index(label, vegetable_classes)
  print(i)
  if(i == None):
    print(label)

  for j, image_file in enumerate(image_files):
    if j!=0 and j%99 == 0:
      print(label + " ok")
    img = cv2.imread(image_file)
    img = rescale_img(img, label, image_file)
    img = img.flatten()
    img = img.tolist()
    train_data.append(img)
    train_labels.append(i)

train_data = np.array(train_data)

1
bell pepper ok
2
broccoli ok
3
cabbage ok
4
carrot ok
6
chilli pepper ok
7
corn ok
8
cucumber ok
9
eggplant ok
10
12
onion ok
13
peas ok
14
potato ok
15
pumpkin ok
16
raddish ok
17
spinach ok
18
19
tomato ok
11
mushroom ok
0
asparagus ok
5
celery ok


In [None]:
train_data.shape

(2016, 12288)

In [None]:
# Random Forest
model = RandomForestClassifier(n_estimators=250)

# Fit the model to training data
model.fit(train_data, train_labels)

In [1]:
# Make predictions
score = model.score(test_data, test_labels)
print("Random Forest Test Accruacy: {:.2f}%".format(score*100))

Random Forest Test Accruacy: 60.12%


In [None]:
# validation data for potato and sweet potato
potato = []
potato_labels = []
sweet_potato = []
sweet_potato_labels = []

In [None]:
potato_files = glob.glob(valid_dir + 'potato' + "/*.jpg") + glob.glob(valid_dir + 'potato' + "/*.jpeg") + glob.glob(valid_dir + 'potato' + "/*.png") + glob.glob(valid_dir + 'potato' + "/*.JPG")
label = 'potato'
i = word_to_index(label, vegetable_classes)
print(i)
if(i == None):
  print(label)

for j, potato_files in enumerate(potato_files):
  if j!=0 and j%32 == 0:
    print(label + " ok")
  img = cv2.imread(potato_files)
  img = rescale_img(img, label, potato_files)
  img = img.flatten()
  img = img.tolist()
  potato.append(img)
  potato_labels.append(i)

14
potato ok


In [None]:
sweet_potato_files = glob.glob(valid_dir + 'sweet potato' + "/*.jpg") + glob.glob(valid_dir + 'sweet potato' + "/*.jpeg") + glob.glob(valid_dir + 'sweet potato' + "/*.png") + glob.glob(valid_dir + 'sweet potato' + "/*.JPG")
label = 'sweet potato'
i = word_to_index(label, vegetable_classes)
print(i)
if(i == None):
  print(label)

for j, sweet_potato_files in enumerate(sweet_potato_files):
  if j!=0 and j%32 == 0:
    print(label + " ok")
  img = cv2.imread(sweet_potato_files)
  img = rescale_img(img, label, sweet_potato_files)
  img = img.flatten()
  img = img.tolist()
  sweet_potato.append(img)
  sweet_potato_labels.append(i)

18
sweet potato ok


In [None]:
potato_score = model.score(potato, potato_labels)
sweet_potato_score = model.score(sweet_potato, sweet_potato_labels)
print("Random Forest validation accruacy on potatos: {:.2f}%".format(potato_score*100))
print("Random Forest validation accruacy on sweet potatos: {:.2f}%".format(sweet_potato_score*100))

Random Forest validation accruacy on potatos: 35.29%
Random Forest validation accruacy on sweet potatos: 41.18%
