In [None]:
! rm -rf data/aerial-cactus-identification
! pip install kaggle
! kaggle competitions download -c aerial-cactus-identification -p data/
! unzip data/aerial-cactus-identification.zip -d data/aerial-cactus-identification
! unzip data/aerial-cactus-identification/train.zip -d data/aerial-cactus-identification
! unzip data/aerial-cactus-identification/test.zip -d data/aerial-cactus-identification

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.applications import imagenet_utils
import shutil
import os

In [3]:
train_set_path = "data/aerial-cactus-identification/train"
test_set_path = "data/aerial-cactus-identification"

In [4]:
train_set = pd.read_csv("data/aerial-cactus-identification/train.csv",sep=",", encoding="ISO-8859-1", low_memory=False,)

In [5]:
train_root_path = 'data/aerial-cactus-identification/train/'
valid_root_path = 'data/aerial-cactus-identification/valid/'

In [6]:
if os.path.isdir(train_root_path+'no_cactus') is False:
    os.makedirs(train_root_path+'no_cactus')
if os.path.isdir(train_root_path+'has_cactus') is False:
    os.makedirs(train_root_path+'has_cactus')
tam = len(list(train_set.itertuples()))
for i in list(train_set.itertuples())[:-5000]:
    if i.has_cactus == 1:
        shutil.move(train_root_path+i.id, train_root_path+'has_cactus')
    elif i.has_cactus == 0:
        shutil.move(train_root_path+i.id, train_root_path+'no_cactus')

In [7]:
if os.path.isdir(valid_root_path+'no_cactus') is False:
    os.makedirs(valid_root_path+'no_cactus')
if os.path.isdir(valid_root_path+'has_cactus') is False:
    os.makedirs(valid_root_path+'has_cactus')
for i in list(train_set.itertuples())[-5000:]:
    if i.has_cactus == 1:
        shutil.move(train_root_path+i.id, valid_root_path+'has_cactus')
    elif i.has_cactus == 0:
        shutil.move(train_root_path+i.id, valid_root_path+'no_cactus')

In [None]:
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=train_set_path, target_size=(224,224), classes=['no_cactus','has_cactus'], batch_size=10)
vald_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=valid_root_path, target_size=(224,224), classes=['no_cactus','has_cactus'], batch_size=10)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=test_set_path, target_size=(224,224), classes=['test'], batch_size=10, shuffle=False)

Found 12500 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Found 4000 images belonging to 1 classes.


In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
      if(logs.get('accuracy')>=0.9):
          self.model.stop_training = True
model = Sequential()
vgg16_model = tf.keras.applications.vgg16.VGG16()

for layer in vgg16_model.layers[:-1]:
  model.add(layer)

for layer in model.layers:
  layer.trainable = False

model.add(keras.layers.Dense(2, activation='sigmoid'))
model.compile(optimizer=Adam(), 
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model.fit(x=train_batches, epochs=10,callbacks=[myCallback()], validation_data=vald_batches, verbose=2)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Epoch 1/10
1250/1250 - 9229s - loss: 0.1226 - accuracy: 0.9530 - val_loss: 0.0911 - val_accuracy: 0.9632


<tensorflow.python.keras.callbacks.History at 0x7f7994ff6be0>

In [11]:
train_batches = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True, vertical_flip=True) \
    .flow_from_directory(directory=train_set_path, target_size=(224,224), classes=['no_cactus','has_cactus'], batch_size=10)
vald_batches = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True, vertical_flip=True) \
    .flow_from_directory(directory=valid_root_path, target_size=(224,224), classes=['no_cactus','has_cactus'], batch_size=10)
test_batches = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True, vertical_flip=True) \
    .flow_from_directory(directory=test_set_path, target_size=(224,224), classes=['test'], batch_size=10, shuffle=False)
class myCallback2(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
      if(logs.get('accuracy')>=0.9):
          self.model.stop_training = True
model2 = Sequential()
input_shape = train_batches.image_shape
model2.add(tf.keras.layers.Conv2D(32, kernel_size=3, activation='sigmoid', input_shape=input_shape))
model2.add(tf.keras.layers.BatchNormalization())
model2.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(3,3), padding="same"))
model2.add(tf.keras.layers.Dropout(0.6))
model2.add(tf.keras.layers.Flatten())
model2.add(tf.keras.layers.Dense(128, activation='sigmoid'))
model2.add(tf.keras.layers.Dense(2, activation='sigmoid'))
model2.compile(optimizer=Adam(), 
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model2.fit(x=train_batches, epochs=10,callbacks=[myCallback2()], validation_data=vald_batches)

Found 12500 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Found 4000 images belonging to 1 classes.
Epoch 1/10


<tensorflow.python.keras.callbacks.History at 0x7f28c7168da0>