In [1]:
from os import listdir
from os.path import isfile, isdir, join

In [4]:
!ls /data/

__MACOSX				     PlantCLEF2016Test
PlantCLEF2015TestDataWithAnnotations	     PlantCLEF2016Test.tar.gz
PlantCLEF2015TestDataWithAnnotations.tar.gz  RunFilesToolAndResults
PlantCLEF2015TrainingData		     RunFilesToolAndResults.zip
PlantCLEF2015TrainingData.tar.gz


In [5]:
train_path = "/data/PlantCLEF2015TrainingData/train"

files = {
    f.split(".")[0]: {
        "image": join(train_path, f),
        "xml": join(train_path, f.split(".")[0]+".xml")
    } 
    for f in listdir(train_path)
    if f[0] != "." and f[-3:] == "jpg"
}

print(len(files))

91758


In [6]:
import xml.etree.ElementTree as ET
from tqdm import tqdm


for f in tqdm(files):
    xml = files[f]['xml']
    tree = ET.parse(xml)
    root = tree.getroot()
    data = {}
    for child in root:
        data[child.tag] = child.text
    files[f]['data'] = data

100%|██████████| 91758/91758 [00:15<00:00, 5773.30it/s]


In [7]:
from pprint import pprint

pprint(files['9733'])

{'data': {'Author': 'daniel barthelemy',
          'ClassId': '4736',
          'Content': 'Leaf',
          'Date': '2013-11-3',
          'Family': 'Rosaceae',
          'Genus': 'Cydonia',
          'ImageId2014': '36922',
          'Latitude': '43.13079',
          'LearnTag': 'Train',
          'Location': 'Toulon',
          'Longitude': '5.9022',
          'MediaId': '9733',
          'ObservationId': '13689',
          'ObservationId2014': '7746',
          'Species': 'Cydonia oblonga Mill.',
          'Vote': '4',
          'YearInCLEF': 'PlantCLEF2014'},
 'image': '/data/PlantCLEF2015TrainingData/train/9733.jpg',
 'xml': '/data/PlantCLEF2015TrainingData/train/9733.xml'}


In [8]:
import json

with open("plant_clef_data.json", "w") as F:
    json.dump(files, F)

# Import Data to TensorFlow

In [9]:
import tensorflow as tf
import tensorflow_addons as tfa

In [10]:
img_files = []
labels    = []

for f in files:
    img_files.append(files[f]['image'])
    labels.append(files[f]['data']['Content'])
    
IMG_WIDTH = 150
IMG_HEIGHT = 150
BATCH_SIZE = 64

Map = {}
count = 0
Labels = []

for label in labels:
    if not Map.get(label):
        Map[label] = count
        count += 1
    Labels.append(Map[label])
    

In [11]:
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

In [12]:
def process_path(file_path):
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img

In [13]:
list_ds = tf.data.Dataset.from_tensor_slices(img_files)

In [14]:
image_ds = list_ds.map(process_path)

In [15]:
label_ds = tf.data.Dataset.from_tensor_slices(Labels)

In [16]:
train_ds = tf.data.Dataset.zip((image_ds, label_ds))

train_ds = train_ds.shuffle(1024).batch(BATCH_SIZE)

# Model Creation

In [17]:
model = tf.keras.Sequential()

model.add(tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3)))

model.add(tf.keras.layers.Conv2D(
    64, 
    (2,2), 
    strides=2,
    activation='relu',
    padding='same'
))

model.add(tf.keras.layers.Conv2D(
    64, 
    (2,2), 
    strides=3,
    activation='relu',
    padding='same'
))

model.add(tf.keras.layers.Conv2D(
    64, 
    (2,2), 
    strides=2,
    activation='relu',
    padding='same'
))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(256, activation=None))

model.add(tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 75, 75, 64)        832       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 25, 25, 64)        16448     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 64)        16448     
_________________________________________________________________
flatten (Flatten)            (None, 10816)             0         
_________________________________________________________________
dense (Dense)                (None, 256)               2769152   
_________________________________________________________________
lambda (Lambda)              (None, 256)               0         
Total params: 2,802,880
Trainable params: 2,802,880
Non-trainable params: 0
______________________________________________

In [19]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tfa.losses.TripletSemiHardLoss())

In [None]:
# Train the network
history = model.fit(
    train_ds,
    epochs=20)

Epoch 1/20
    503/Unknown - 181s 361ms/step - loss: 0.8997