In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# First, we are going to load the file names and their respective target labels into numpy array! 
from sklearn.datasets import load_files
import numpy as np

train_dir = '/content/drive/My Drive/Dataset/plantdisease-tugasakhir/train'
test_dir = '/content/drive/My Drive/Dataset/plantdisease-tugasakhir/test'

def load_dataset(path):
    data = load_files(path)
    files = np.array(data['filenames'])
    targets = np.array(data['target'])
    target_labels = np.array(data['target_names'])
    return files,targets,target_labels
    
print('Loading..')
x_train, y_train,target_labels = load_dataset(train_dir)
x_test, y_test,_ = load_dataset(test_dir)
print('Loading complete!')

print('Training set size : ' , x_train.shape[0])
print('Testing set size : ', x_test.shape[0])


Loading..
Loading complete!
Training set size :  7510
Testing set size :  1875


In [4]:
# Let's confirm the number of classes :p
no_of_classes = len(np.unique(y_train))
no_of_classes

11

In [5]:
print(y_train[0:10])
# target labels are numbers corresponding to class label. We need to change them to a vector of 81 elements.

[3 0 2 5 7 5 5 7 5 0]


In [6]:
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train,no_of_classes)
y_test = np_utils.to_categorical(y_test,no_of_classes)
y_train[0] # Note that only one element has value 1(corresponding to its label) and others are 0.

Using TensorFlow backend.


array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [7]:
# Now, we have to divide the validation set into test and validation set
x_test,x_valid = x_test[1000:],x_test[:1000]
y_test,y_vaild = y_test[1000:],y_test[:1000]
print('Vaildation X : ',x_valid.shape)
print('Vaildation y :',y_vaild.shape)
print('Test X : ',x_test.shape)
print('Test y : ',y_test.shape)

Vaildation X :  (1000,)
Vaildation y : (1000, 11)
Test X :  (875,)
Test y :  (875, 11)


In [8]:
x_train[0]
# training data is just file names of images. We need to convert them into pixel matrix.

'/content/drive/My Drive/Dataset/plantdisease-tugasakhir/train/Apple___healthy/2345cf40-76a9-4bb3-9e7c-8d185dd41905___RS_HL 8123.JPG'

In [9]:
# We just have the file names in the x set. Let's load the images and convert them into array.
from keras.preprocessing.image import array_to_img, img_to_array, load_img

def convert_image_to_array(files):
    images_as_array=[]
    for file in files:
        # Convert to Numpy Array
        images_as_array.append(img_to_array(load_img(file)))
    return images_as_array

print('Loading..')
x_train = np.array(convert_image_to_array(x_train))
print('Training set shape : ',x_train.shape)

x_valid = np.array(convert_image_to_array(x_valid))
print('Validation set shape : ',x_valid.shape)

x_test = np.array(convert_image_to_array(x_test))
print('Test set shape : ',x_test.shape)

print('1st training image shape ',x_train[0].shape)

Loading..
Training set shape :  (7510, 256, 256, 3)
Validation set shape :  (1000, 256, 256, 3)
Test set shape :  (875, 256, 256, 3)
1st training image shape  (256, 256, 3)


In [10]:
print('1st training image as array',x_train[0]) # don't worry if you see only 255s..
# there are elements will other values too :p

1st training image as array [[[113. 113. 137.]
  [110. 110. 134.]
  [ 86.  86. 110.]
  ...
  [ 93.  93. 117.]
  [ 94.  94. 118.]
  [ 95.  95. 119.]]

 [[ 95.  95. 119.]
  [100. 100. 124.]
  [ 94.  94. 118.]
  ...
  [ 86.  86. 110.]
  [ 88.  88. 112.]
  [ 93.  93. 117.]]

 [[105. 105. 129.]
  [ 95.  95. 119.]
  [ 89.  89. 113.]
  ...
  [100. 100. 124.]
  [ 92.  92. 116.]
  [ 83.  83. 107.]]

 ...

 [[156. 161. 183.]
  [156. 161. 183.]
  [155. 160. 182.]
  ...
  [193. 192. 208.]
  [194. 193. 209.]
  [195. 194. 210.]]

 [[155. 160. 182.]
  [154. 159. 181.]
  [154. 159. 181.]
  ...
  [192. 191. 207.]
  [193. 192. 208.]
  [193. 192. 208.]]

 [[153. 158. 180.]
  [153. 158. 180.]
  [153. 158. 180.]
  ...
  [190. 189. 205.]
  [190. 189. 205.]
  [190. 189. 205.]]]


In [0]:
# time to re-scale so that all the pixel values lie within 0 to 1
print('Loading..')
x_train = x_train.astype('float32')/255
x_valid = x_valid.astype('float32')/255
x_test = x_test.astype('float32')/255
x_train[0]

Loading..
