Datasource: "Leafsnap: A Computer Vision System for Automatic Plant Species Identification,"  
Neeraj Kumar, Peter N. Belhumeur, Arijit Biswas, David W. Jacobs, W. John Kress, Ida C. Lopez, João V. B. Soares,  
Proceedings of the 12th European Conference on Computer Vision (ECCV),  
October 2012

In [1]:
tarurl = r'http://leafsnap.com/static/dataset/leafsnap-dataset.tar'

In [16]:
pwd

'C:\\Users\\User\\Documents\\GitHub\\datasets\\dataset\\images\\field'

In [2]:
%cd ../../datasets/

C:\Users\User\Documents\GitHub\datasets


In [None]:
!curl -O $tarurl 

In [None]:
!tar -xvf leafsnap-dataset.tar

## Imports

In [45]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
import skimage
import os
import math
import random

In [18]:
%cd ./dataset/images/field/

[WinError 3] The system cannot find the path specified: './dataset/images/field/'
C:\Users\User\Documents\GitHub\datasets\dataset\images\field


In [19]:
tree_specs = os.popen('ls').read()
tree_specs = tree_specs.strip().split(sep='\n')

In [38]:
!mkdir test,train,vdate

A subdirectory or file test already exists.
Error occurred while processing: test.
A subdirectory or file train already exists.
Error occurred while processing: train.
A subdirectory or file vdate already exists.
Error occurred while processing: vdate.


In [None]:
#move images into train, test, validate folders with subfolders for classes
test_ratio = .15
for spec in tree_specs:
    %cd ./$spec
    listing = os.popen('ls').read().strip().split(sep='\n')
    random.shuffle(listing) #inplace
    im_count = len(listing)
    test_size=val_size = math.ceil(test_ratio*im_count)
    train_size = im_count - (test_size + val_size)
    subfolders = ['train', 'test', 'vdate']
    for subfolder in subfolders:
        os.makedirs(os.path.join(os.path.dirname(os.getcwd()), subfolder, spec))
    for item in range(0, train_size):
        moved = listing.pop()
        !mv $moved ../train/$spec/$moved
    for item in range(0, test_size):
        moved = listing.pop()
        !mv $moved ../test/$spec/$moved
    for item in range(0, val_size):
        moved = listing.pop()
        !mv $moved ../vdate/$spec/$moved
    %cd ..
    !rmdir ./$spec

In [192]:
train_datagen = ImageDataGenerator(rescale=1/255)
train_augmented_datagen = ImageDataGenerator(rescale=1./255,
                                             rotation_range=30,
                                             zoom_range=0.15,
                                             width_shift_range=0.2,
                                             height_shift_range=0.2,
                                             shear_range=0.15,
                                             fill_mode='nearest',
                                             horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1/255)
vdate_datagen = ImageDataGenerator(rescale=1/255)

In [127]:
# Flow training images in batches of 128 using train_datagen generator
batch_size = 32
target_size = (256,256)
train_generator = train_datagen.flow_from_directory(
        r'./train/',  # This is the source directory for training images
        target_size=target_size, 
        batch_size=batch_size,
        color_mode='rgb',
        # Specify the classes explicitly
        classes = tree_specs,
        class_mode='categorical')

Found 1303 images belonging to 48 classes.


In [194]:
train_augmented_generator = train_augmented_datagen.flow_from_directory(
        r'./train/',  # This is the source directory for training images
        target_size=target_size, 
        batch_size=batch_size,
        color_mode='rgb',
        classes = tree_specs,
        class_mode='categorical',
        #augmentation
        )

Found 1303 images belonging to 48 classes.


In [128]:
test_generator = test_datagen.flow_from_directory(
        r'./test/',
        target_size=target_size,
        batch_size=batch_size,
        color_mode='rgb',
        classes=tree_specs,
        class_mode='categorical')

Found 272 images belonging to 48 classes.


In [129]:
vdate_generator = vdate_datagen.flow_from_directory(
        r'./vdate/',
        target_size=target_size,
        batch_size=batch_size,
        color_mode='rgb',
        classes=tree_specs,
        class_mode='categorical')

Found 272 images belonging to 48 classes.


In [121]:
#shape of image array
train_generator.next()[0].shape

(32, 256, 256, 3)

In [184]:
input_shape = train_generator.next()[0].shape[1:]

# Modeling
### Sequential Model

In [185]:
sequential_mod = tf.keras.models.Sequential()
sequential_mod.add(Flatten(input_shape=input_shape))
sequential_mod.add(Dense(128, activation='relu'))
sequential_mod.add(Dropout(.1))
sequential_mod.add(Dense(64, activation='relu'))
sequential_mod.add(Dropout(.1))
sequential_mod.add(Dense(48, activation='softmax'))

sequential_mod.summary()

sequential_mod.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(),
              metrics=['accuracy'])

Model: "sequential_46"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_13 (Flatten)         (None, 196608)            0         
_________________________________________________________________
dense_133 (Dense)            (None, 128)               25165952  
_________________________________________________________________
dropout_90 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_134 (Dense)            (None, 64)                8256      
_________________________________________________________________
dropout_91 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_135 (Dense)            (None, 48)                3120      
Total params: 25,177,328
Trainable params: 25,177,328
Non-trainable params: 0
_________________________________________

In [162]:
total_sample = train_generator.n

In [163]:
n_epochs = 10
history = sequential_mod.fit_generator(
        train_generator, 
        steps_per_epoch=int(total_sample/batch_size),  
        epochs=n_epochs,
        verbose=1)

Epoch 1/10


ValueError: Shape mismatch: The shape of labels (received (1536,)) should equal the shape of logits except for the last dimension (received (32, 48)).

### Convolution Model

In [186]:
conv_model = tf.keras.models.Sequential()
conv_model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
conv_model.add(Conv2D(32, (3, 3), activation='relu'))
conv_model.add(MaxPooling2D(pool_size=(2, 2)))
conv_model.add(Dropout(0.20))
conv_model.add(Flatten())
conv_model.add(Dense(128, activation='relu'))
conv_model.add(Dropout(0.2))
conv_model.add(Dense(48, activation='softmax'))

conv_model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])
conv_model.summary()

Model: "sequential_47"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 254, 254, 32)      896       
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 252, 252, 32)      9248      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 126, 126, 32)      0         
_________________________________________________________________
dropout_92 (Dropout)         (None, 126, 126, 32)      0         
_________________________________________________________________
flatten_14 (Flatten)         (None, 508032)            0         
_________________________________________________________________
dense_136 (Dense)            (None, 128)               65028224  
_________________________________________________________________
dropout_93 (Dropout)         (None, 128)             

In [172]:
#prevent error due to file errors
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [187]:
n_epochs = 10
conv_history = conv_model.fit_generator(
        train_generator, 
        steps_per_epoch=int(total_sample/batch_size),  
        epochs=n_epochs,
        verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [176]:
conv_history.history

{'loss': [3.795672606896641,
  3.757952099986943,
  3.7581600013630863,
  3.7287140352157606,
  3.7049942029732406,
  3.666092145733717,
  3.64963339333643,
  3.6518405988214524,
  3.6174685336022185,
  3.5936855615170145],
 'accuracy': [0.048780486,
  0.05743509,
  0.059008654,
  0.07631786,
  0.0920535,
  0.092840284,
  0.08733281,
  0.093627065,
  0.101494886,
  0.107789144]}

In [195]:
n_epochs = 20
conv_aug_history = conv_model.fit_generator(
        train_augmented_generator, 
        steps_per_epoch=int(total_sample/batch_size),  
        epochs=n_epochs,
        verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
