# Get data

In [None]:
!wget https://storage.googleapis.com/aibootcamp/data/plants.zip

In [None]:
!ls

In [None]:
!unzip -q plants.zip

In [None]:
!ls

In [None]:
!rm plants.zip

In [None]:
!unzip -q plants/train.zip

In [None]:
!ls

In [None]:
!pip install Keras

In [None]:
!pip install tqdm

# Iterators

In [None]:
def fibonacci_generator():
  a = 0
  b = 1
  while True:
    yield a
    a, b = b, a + b
    

In [None]:
fib_gen = fibonacci_generator()

In [None]:
next(fib_gen)

In [None]:
next(fib_gen)

In [None]:
next(fib_gen)

In [None]:
next(fib_gen)

In [None]:
next(fib_gen)

# File moving

In [None]:
import os
from tqdm import tqdm

root_dir = 'train'
target_root = 'validation'

if not os.path.isdir(target_root):
  os.mkdir(target_root)

for plant in tqdm(os.listdir(root_dir)):
  plant_path = os.path.join(root_dir,plant)
  target_plant_path = os.path.join(target_root,plant)
  
  if not os.path.isdir(target_plant_path):
    os.mkdir(target_plant_path)
    
    
  files = os.listdir(plant_path)
  for i in range(12):
    source_path = os.path.join(plant_path,files[i])
    dest_path = os.path.join(target_plant_path,files[i])
    os.rename(source_path,dest_path)

In [None]:
!ls validation/Black-grass

# Keras generator & Logistic Regression

In [None]:
import datetime
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

In [None]:
import seaborn as sns
sns.set(); np.random.seed(0)
%matplotlib inline

In [None]:
from tqdm import tqdm

In [None]:
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import cv2

In [None]:
imgen = ImageDataGenerator(rescale=1/255)

In [None]:
train_generator = imgen.flow_from_directory('train',batch_size=32, target_size=(150,150))

In [None]:
validation_generator = imgen.flow_from_directory('validation',
                                                 batch_size=32, 
                                                 target_size=(150,150))

In [None]:
from keras.layers import Flatten,Dense, Activation
from keras.models import Sequential

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(150,150,3)))
model.add(Dense(12))
model.add(Activation('softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics = ['acc'])

In [None]:
model.fit_generator(train_generator,
                    epochs=10,
                    steps_per_epoch= 5515 // 32, 
                    validation_data=validation_generator, 
                    validation_steps= 144//32)

# Stack VGG

In [None]:
from keras.applications.vgg16 import VGG16

In [None]:
vgg_model = VGG16(include_top=False,input_shape=(150,150,3))

In [None]:
vgg_model.summary()

In [None]:
for layer in vgg_model.layers:
  layer.trainable = False

In [None]:
finetune = Sequential(layers = vgg_model.layers)

In [None]:
finetune.add(Flatten())
finetune.add(Dense(12))
finetune.add(Activation('softmax'))

In [None]:
finetune.summary()

In [None]:
finetune.compile(loss='categorical_crossentropy',
                 optimizer='adam', 
                 metrics = ['acc'])

In [None]:
finetune.fit_generator(train_generator,
                    epochs=10,
                    steps_per_epoch= 5515 // 32, 
                    validation_data=validation_generator, 
                    validation_steps= 144//32)

# Preprocess and Save VGG

In [None]:
!pip install bcolz

In [None]:
import bcolz
def save_array(fname, arr): 
  c=bcolz.carray(arr, rootdir=fname, mode='w')
  c.flush()
  
def load_array(fname): 
  return bcolz.open(fname)[:]


In [None]:
source = 'train'
target = 'train_proc'

if not os.path.isdir(target):
  os.mkdir(target)

for plant in os.listdir(source):
  target_path = os.path.join(target,plant)
  if not os.path.isdir(target_path):
    os.mkdir(target_path)
  source_path = os.path.join(source,plant)
  print('Processing',plant)
  for file in tqdm(os.listdir(source_path)):
    img = cv2.imread(os.path.join(source_path,file))
    img = cv2.resize(img, (150, 150)) 
    img = np.expand_dims(img,0)
    out = vgg_model.predict(img)
    save_array(os.path.join(target_path,file), out)

In [None]:
source = 'validation'
root_dir = 'validation_proc'


if not os.path.isdir(root_dir):
  os.mkdir(root_dir)

for plant in os.listdir(source):
  target_path = os.path.join(root_dir,plant)
  if not os.path.isdir(target_path):
    os.mkdir(target_path)
  source_path = os.path.join(source,plant)
  print('Processing',plant)
  for file in tqdm(os.listdir(source_path)):
    img = cv2.imread(os.path.join(source_path,file))
    img = cv2.resize(img, (150, 150)) 
    img = img / 255
    img = np.expand_dims(img,0)
    out = vgg_model.predict(img)
    save_array(os.path.join(target_path,file), out)

In [None]:
ls

In [None]:
root_dir = 'train_proc'
dirs = os.listdir(root_dir)

In [None]:
paths = []
targets = []
for dir in dirs:
  path = os.path.join(root_dir,dir)
  for file in os.listdir(path):
    if file.endswith(".png"):
      paths.append(os.path.join(path,file))
      targets.append(dir)

In [None]:
nclasses = len(np.unique(targets))
nitems = len(targets)
print('Found {} items belonging to {} classes'.format(nitems,nclasses))

In [None]:
labelenc = LabelEncoder()
int_targets = labelenc.fit_transform(targets)
onehot_enc = OneHotEncoder(sparse=False)
int_targets = int_targets.reshape(len(int_targets), 1)
onehot_targets = onehot_enc.fit_transform(int_targets)

In [None]:
indices = np.arange(len(paths))
np.random.shuffle(indices)

In [None]:
def bcz_imgen(root_dir, batch_size = 32): 
  dirs = os.listdir(root_dir)
  paths = []
  targets = []
  for dir in dirs:
    path = os.path.join(root_dir,dir)
    for file in os.listdir(path):
      paths.append(os.path.join(path,file))
      targets.append(dir)
   
  nclasses = len(np.unique(targets))
  nitems = len(targets)
  
  labelenc = LabelEncoder()
  int_targets = labelenc.fit_transform(targets)
  onehot_enc = OneHotEncoder(sparse=False)
  int_targets = int_targets.reshape(len(int_targets), 1)
  onehot_targets = onehot_enc.fit_transform(int_targets)
  
  indices = np.arange(len(paths))
  np.random.shuffle(indices)
  
  while True:
    image_stack = []
    target_stack = []
    for index in indices:
      path = paths[index]
      target = onehot_targets[index]
      img = load_array(path)

      image_stack.append(img)
      target_stack.append(target)

      if len(image_stack) == batch_size:
        images = np.concatenate(image_stack,axis=0)

        yield images, np.stack(target_stack)
        image_stack = []
        target_stack = []

In [None]:
train_gen = bcz_imgen('train_proc')
val_gen = bcz_imgen('validation_proc')

In [None]:
yld, tar = next(gen)

In [None]:
tar.shape

In [None]:
from keras.layers import Conv2D, Dropout
model = Sequential()
#model.add(Conv2D(256,1,input_shape=(4,4,512)))
#model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(12))
model.add(Activation('softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [None]:
model.fit_generator(train_gen,
                    epochs=2,
                    steps_per_epoch= 5515 // 32, 
                    validation_data=val_gen, 
                    validation_steps= 144//32)

In [None]:
os.listdir('train')

# Custom Generator

In [None]:
!wget https://storage.googleapis.com/aibootcamp/data/plants.zip

In [None]:
!unzip plants.zip

In [None]:
!unzip plants/train.zip

# Rule based preprocessing

In [None]:
def cieluv(img, target):
    # adapted from https://www.compuphase.com/cmetric.htm
    img = img.astype('int')
    
    aR, aG, aB = img[:,:,0], img[:,:,1], img[:,:,2]
    bR, bG, bB = target
    
    rmean = ((aR + bR) / 2.).astype('int')
    r2 = np.square(aR - bR)
    g2 = np.square(aG - bG)
    b2 = np.square(aB - bB)
    
    # final sqrt removed for speed; please square your thresholds accordingly
    result = (((512+rmean)*r2)>>8) + 4*g2 + (((767-rmean)*b2)>>8)
    
    return result

In [None]:
import matplotlib.pyplot as plt

In [None]:
def ocv_imgen(root_dir,batch_size = 32, 
              rescale = 1/255, 
              target_size = (150,150)):
  dirs = os.listdir(root_dir)
  paths = []
  targets = []
  for dir in dirs:
    path = os.path.join(root_dir,dir)
    for file in os.listdir(path):
      paths.append(os.path.join(path,file))
      targets.append(dir)
   
  nclasses = len(np.unique(targets))
  nitems = len(targets)
  
  labelenc = LabelEncoder()
  int_targets = labelenc.fit_transform(targets)
  onehot_enc = OneHotEncoder(sparse=False)
  int_targets = int_targets.reshape(len(int_targets), 1)
  onehot_targets = onehot_enc.fit_transform(int_targets)
  
  indices = np.arange(len(paths))
  np.random.shuffle(indices)
  while True:
    image_stack = []
    target_stack = []
    for index in indices:
      path = paths[index]
      target = onehot_targets[index]
      
      img = plt.imread(path)
      
      
      img = np.round(img * 255).astype('ubyte')[:,:,:3]
      img = cv2.resize(img, (150,150))
      img_filter = (
        (cieluv(img, (71, 86, 38)) > 1600)
        & (cieluv(img, (65,  79,  19)) > 1600)
        & (cieluv(img, (95,  106,  56)) > 1600)
        & (cieluv(img, (56,  63,  43)) > 500)
      )
      
      img[img_filter] = 0
      img = cv2.medianBlur(img, 9)
      
      image_stack.append(img)
      target_stack.append(target)
      if len(image_stack) == batch_size:
        images = np.stack(image_stack)
        images = np.divide(images,rescale)
        yield images, np.stack(target_stack)
        image_stack = []
        target_stack = []

In [None]:
train_gen = ocv_imgen('train', batch_size=32)
val_gen = ocv_imgen('validation', batch_size=32)

In [None]:
finetune.fit_generator(train_gen,
                    epochs=10,
                    steps_per_epoch= 5515 // 32, 
                    validation_data=val_gen, 
                    validation_steps= 144//32)

In [None]:
import matplotlib.pyplot as plt

In [None]:
a,b = next(gen)

In [None]:
a.shape

In [None]:
plt.rcParams['image.cmap'] = 'gray'
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('dark_background')
plt.imshow(a[0])

# Benchmarking generators, not used in book

In [None]:
# Keras generator

times = []
for i in tqdm(range(1000)):
  start = datetime.datetime.now()
  dat = next(generator)
  end = datetime.datetime.now()
  diff = end - start
  ms_elapsed = diff.total_seconds() * 1000
  times.append(ms_elapsed)


In [None]:
sns.distplot(times)

In [None]:
mean_t = np.mean(times)
max_t = np.max(times)
min_t = np.min(times)
std_t = np.std(times)
print('Mean time: {:f} \n Max time: {:f} \n Min time: {:f} \n Standard deviation: {:f}'.format(mean_t,max_t,min_t,std_t))

In [None]:
# Custom generator
times = []
for i in tqdm(range(1000)):
  start = datetime.datetime.now()
  
  dat = next(gen)
  
  end = datetime.datetime.now()
  diff = end - start
  ms_elapsed = diff.total_seconds() * 1000
  times.append(ms_elapsed)


In [None]:
sns.distplot(times)

In [None]:
mean_t = np.mean(times)
max_t = np.max(times)
min_t = np.min(times)
std_t = np.std(times)
print('Mean time: {:f} \n Max time: {:f} \n Min time: {:f} \n Standard deviation: {:f}'.format(mean_t,max_t,min_t,std_t))

# Random Augumentation

In [None]:
train_datagen = ImageDataGenerator(
  rescale = 1/255,
  rotation_range=90,
  width_shift_range=0.2,
  height_shift_range=0.2,
  shear_range=0.2,
  zoom_range=0.1,
  horizontal_flip=True,
  fill_mode='nearest')


In [None]:
!ls train/Charlock

In [None]:
from keras.preprocessing import image
fname = 'train/Charlock/270209308.png'

In [None]:
img = image.load_img(fname, target_size=(150, 150))

In [None]:
img = image.img_to_array(img)

In [None]:
img = np.expand_dims(img,axis=0)

In [None]:
import matplotlib.pyplot as plt

gen = train_datagen.flow(img, batch_size=1)


for i in range(4):
    plt.figure(i)
    batch = next(gen)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    
plt.show()

In [None]:
train_datagen.flow(img, batch_size=1)

In [None]:
batch.shape

In [None]:
!pip install git+https://github.com/aleju/imgaug

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa
import numpy as np

ia.seed(1)

# Example batch of images.
# The array has shape (32, 64, 64, 3) and dtype uint8.
images = np.array(
    [ia.quokka(size=(64, 64)) for _ in range(32)],
    dtype=np.uint8
)

seq = iaa.Sequential([
    iaa.Fliplr(0.5), # horizontal flips
    iaa.Crop(percent=(0, 0.1)), # random crops
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    iaa.Sometimes(0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    ),
    # Strengthen or weaken the contrast in each image.
    iaa.ContrastNormalization((0.75, 1.5)),
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    
], random_order=True) # apply augmenters in random order

images_aug = seq.augment_images(images)

In [None]:
batch = seq.augment_images(img)
plt.imshow(batch[0])

In [None]:
for i in range(4):
    plt.figure(i)
    batch = seq.augment_images(img)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    
plt.show()

In [None]:
def ocv_imgen_aug(root_dir,batch_size = 32, 
                  rescale = 1/255, 
                  target_size = (150,150)):
  
  dirs = os.listdir(root_dir)
  paths = []
  targets = []
  for dir in dirs:
    path = os.path.join(root_dir,dir)
    for file in os.listdir(path):
      paths.append(os.path.join(path,file))
      targets.append(dir)
   
  nclasses = len(np.unique(targets))
  nitems = len(targets)
  
  labelenc = LabelEncoder()
  int_targets = labelenc.fit_transform(targets)
  onehot_enc = OneHotEncoder(sparse=False)
  int_targets = int_targets.reshape(len(int_targets), 1)
  onehot_targets = onehot_enc.fit_transform(int_targets)
  
  indices = np.arange(len(paths))
  np.random.shuffle(indices)
  while True:
    image_stack = []
    target_stack = []
    for index in indices:
      path = paths[index]
      target = onehot_targets[index]
      
      img = cv2.imread(path)
      img = cv2.resize(img, (150,150))
      
  
      
      image_stack.append(img)
      target_stack.append(target)
      if len(image_stack) == batch_size:
        images = np.stack(image_stack)
        
        images = seq.augment_images(images)
        
        images = np.divide(images,rescale)
        yield images, np.stack(target_stack)
        image_stack = []
        target_stack = []

In [None]:
gen = ocv_imgen_aug('train')

In [None]:
a, b = next(gen)

In [None]:
plt.imshow(a[2])

In [None]:
import numpy as np

from keras.applications.vgg16 import VGG16
from keras import backend as K


In [None]:
# dimensions of the generated pictures for each filter.
img_width = 224
img_height = 224


layer_name = 'block1_conv1'

# For conv layers
filter_index = 0

# For final layer
output_index = 184

In [None]:
# build the VGG16 network with ImageNet weights
model = VGG16(weights='imagenet')
print('Model loaded.')

model.summary()


In [None]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])

In [None]:
def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())

In [None]:
print('Processing filter %d' % filter_index)


# we build a loss function that maximizes the activation
# of the nth filter of the layer considered
layer_output = layer_dict[layer_name].output

# Uncomment for outputs
loss = K.mean(model.output[:, output_index])


#Uncomment for Conv Layers
#loss = K.mean(layer_output[:, :, :, filter_index])


# this is the placeholder for the input images
input_img = model.input
# we compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_img)[0]



# normalization trick: we normalize the gradient
grads = normalize(grads)



# this function returns the loss and grads given the input picture
iterate = K.function([input_img], [loss, grads])




In [None]:
input_img_data = np.random.rand(1,img_height,img_width,3)

In [None]:
# step size for gradient ascent
alpha = 0.01


# we run gradient ascent for 20 steps
for i in range(500):
    loss_value, grads_value = iterate([input_img_data])
    input_img_data += grads_value * alpha

    print('Current loss value:', loss_value)
    if loss_value <= 0.:
        # some filters get stuck to 0, we can skip them
        print('warning zero loss')
        break

In [None]:
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + K.epsilon())
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [None]:
img = deprocess_image(input_img_data[0])
    

In [None]:
import matplotlib.pyplot as plt
plt.style.use(['dark_background'])
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111) 
plt.imshow(img)
ax.grid(False)

In [None]:
from scipy.misc import imsave

In [None]:
imsave('test.png',img)

In [None]:
!ls

In [None]:
from google.colab import files


files.download('test.png')