**Import and structure the mosquito image dataset**

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

In [0]:
from google.colab import drive

drive.mount('/content/gdrive')
dataset_path = 'gdrive/My Drive/MosquitoNet/featured only'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
# import tensorflow and check for gpu
%tensorflow_version 2.x
import tensorflow as tf

# for AlexNet
from tensorflow import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [0]:
# path to dataset
dataset_path = 'gdrive/My Drive/MosquitoNet/featured only'

# lists to hold all filenames
filenames_headlegs = []
filenames_palps = []
filenames_wings = []

# list to hold all labels
labels = []

# iterate through original dataset folder
root, dirs, _ = next(os.walk(dataset_path))
count = 0
for subdir in dirs:
  count += 1
  root_subdir, _, _ = next(os.walk(os.path.join(root, subdir)))

  # extract label for all images in folder
  fname = os.path.join(root_subdir, 'label.txt')
  with open(fname) as fp:
    label = fp.read().split('\n')[0]
  labels.append(label)

  #  save image filepaths
  fname = os.path.join(root_subdir, 'head-legs.bmp')
  filenames_headlegs.append(fname)

  fname = os.path.join(root_subdir, 'palps.bmp')
  filenames_palps.append(fname)

  fname = os.path.join(root_subdir, 'wings.bmp')
  filenames_wings.append(fname)

print("Total examples:" + str(len(labels)))

Total examples:1327


In [0]:
def import_image(fname):
  image_string = tf.io.read_file(fname)
  image = tf.image.decode_bmp(image_string, channels=3)
  image = tf.image.convert_image_dtype(image, tf.float32)
  return image

In [0]:
# dataset will be a list of dictionaries
dataset = []

# iterate through filenames in dataset, create dictionary of images/label, add to dataset list
for i in range(0,count):
  curr_entry = {}
  curr_entry["label"] = labels[i]
  curr_entry["headlegs"] = import_image(filenames_headlegs[i])
  curr_entry["wings"] = import_image(filenames_wings[i])
  curr_entry["palps"] = import_image(filenames_palps[i])
  dataset.append(curr_entry)
  sys.stdout.write("\rAdded %i files" % i)

Added 1326 files

In [0]:
import pickle

# save the dataset dictionary structure using pickle
filename = 'gdrive/My Drive/MosquitoNet/mosquito_dataset.pkl'
outfile = open(filename,'wb')
pickle.dump(dataset,outfile)
outfile.close()

In [0]:
import pickle 

# uncomment to import with pickle
filename = 'gdrive/My Drive/MosquitoNet/mosquito_dataset.pkl'
infile = open(filename,'rb')
dataset = pickle.load(infile)
infile.close()

**AlexNet model**

In [0]:
# define AlexNet as keras sequential model
# ref: https://medium.com/datadriveninvestor/cnn-architecture-series-alexnet-with-implementation-part-ii-7f7afa2ac66a

n_classes = 4

model = keras.models.Sequential([
    # conv layer 1
    Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding="valid", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # conv layer 2
    Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding="same", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # added batchnorm
    BatchNormalization(),

    # conv layer 3
    Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),

    # conv layer 4
    Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),
    
    # conv layer 5
    Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # added batchnorm
    BatchNormalization(),

    # dense layers
    Flatten(),
    Dense(units = 9216, activation = "relu"),
    Dense(units = 4096, activation = "relu"),
    Dense(4096, activation = "relu"),

    # output
    Dense(n_classes, activation = "softmax")
])

In [0]:
# configure the learning process
model.compile(
    # define optimizer and set learning rate
    optimizer=keras.optimizers.Adam(),

    # choose a loss function
    loss=keras.losses.CategoricalCrossentropy(),

    # choose a metric to report
    metrics=['accuracy']
)

**Subset headlegs images from dataset**

In [0]:
# extract headlegs images from dictionary
x_full = [np.array(example["headlegs"]) for example in dataset]
x_full.extend([np.array(example["wings"]) for example in dataset])
x_full.extend([np.array(example["palps"]) for example in dataset])
y_full = [np.array(example["label"]) for example in dataset]
y_full.extend([np.array(example["label"]) for example in dataset])
y_full.extend([np.array(example["label"]) for example in dataset])

# create splits
from  sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_full, y_full, test_size=0.2, shuffle=True, random_state=0)

# convert to numpy array
x_train = np.array([np.array(el) for el in x_train])
x_test = np.array([np.array(el) for el in x_test])
y_train = np.array([np.array(el) for el in y_train])
y_test = np.array([np.array(el) for el in y_test])

# print summary
print('x_full: ' + str(len(x_full)) + ' samples, type: ' + str(type(x_full)))
print('y_full: ' + str(len(y_full)) + ' samples, type: ' + str(type(y_full)))
print('x_train: ' + str(len(x_train)) + ' samples, shape: ' + str(x_train.shape))
print('y_train: ' + str(len(y_train)) + ' samples, shape: ' + str(y_train.shape))
print('x_test: ' + str(len(x_test)) + ' samples, shape: ' + str(x_test.shape))
print('y_test: ' + str(len(y_test)) + ' samples, shape: ' + str(y_test.shape))

x_full: 3981 samples, type: <class 'list'>
y_full: 3981 samples, type: <class 'list'>
x_train: 3184 samples, shape: (3184, 224, 224, 3)
y_train: 3184 samples, shape: (3184,)
x_test: 797 samples, shape: (797, 224, 224, 3)
y_test: 797 samples, shape: (797,)


**Train AlexNet**

In [0]:
# preprocess dataset with keras
import tensorflow.keras

# create one-hot label vectors
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder()
y_train_onehot = onehotencoder.fit_transform(y_train.reshape(-1,1)).toarray()
y_test_onehot = onehotencoder.fit_transform(y_test.reshape(-1,1)).toarray()


# create ImageDataGenerator instances for train & test
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=40,
    horizontal_flip=True,
    vertical_flip=True,
    height_shift_range=0.2,
    width_shift_range=0.2
)

test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=40,
    horizontal_flip=True,
    vertical_flip=True,
    height_shift_range=0.2,
    width_shift_range=0.2
)


In [0]:
# training parameters
BATCH_SIZE = 32
STEPS_PER_EPOCH = np.ceil(len(x_train) / BATCH_SIZE)
EPOCHS = 30

# create generators
train_generator = train_datagen.flow(x_train, y_train_onehot, batch_size=BATCH_SIZE)
test_generator = test_datagen.flow(x_test, y_test_onehot, batch_size=BATCH_SIZE)

# train model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=test_generator,
    validation_steps=np.ceil(len(x_test)/BATCH_SIZE)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30

In [0]:
# visualize training
plt.figure(figsize=(15,5))

# plot training & validation loss values
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss vs. Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# plot training & validation accuracy values
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy vs. Epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# show plot
plt.show()