**Mount Drive**

In [0]:
# imports
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

In [5]:
from google.colab import drive

drive.mount('/content/gdrive')
dataset_path = 'gdrive/My Drive/MosquitoNet/featured only'
save_path = 'gdrive/My Drive/MosquitoNet/zipped_data'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
# loop through each folder: zip images, extract label from text file, save in new dir
labels = set()
root, dirs, _ = next(os.walk(dataset_path))
for subdir in dirs:
  root_subdir, _, _ = next(os.walk(os.path.join(root, subdir)))
  label_fname = os.path.join(root_subdir, 'label.txt')
  with open(label_fname) as fp:
    labels.add(fp.read())
print(labels)

In [0]:
display(labels)

{'Blood Fed\n', 'Gravid\n', 'Half Gravid\n', 'Unfed\n'}

**Tensorflow**

In [65]:
# import tensorflow and check for gpu
%tensorflow_version 2.x
import tensorflow as tf

# for AlexNet
from tensorflow import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [0]:
# load dataset
dataset_path = 'gdrive/My Drive/MosquitoNet/featured only'

# lists to hold all filenames
filenames_headlegs = []
filenames_palps = []
filenames_wings = []

# list to hold all labels
labels = []

# iterate through original dataset folder
root, dirs, _ = next(os.walk(dataset_path))
count = 0
for subdir in dirs:
  count += 1
  root_subdir, _, _ = next(os.walk(os.path.join(root, subdir)))

  # extract label for all images in folder
  fname = os.path.join(root_subdir, 'label.txt')
  with open(fname) as fp:
    label = fp.read().split('\n')[0]
  labels.append(label)

  # extract headlegs image
  fname = os.path.join(root_subdir, 'head-legs.bmp')
  filenames_headlegs.append(fname)
  # file_content = tf.io.read_file(fname)
  # features_headlegs.append(tf.image.decode_bmp(file_content))
  # labels_headlegs.append(label)

  # extract palps image
  fname = os.path.join(root_subdir, 'palps.bmp')
  filenames_palps.append(fname)
  # file_content = tf.io.read_file(fname)
  # features_palps.append(tf.image.decode_bmp(file_content))
  # labels_palps.append(label)

  # extract wings image
  fname = os.path.join(root_subdir, 'wings.bmp')
  filenames_wings.append(fname)
  # file_content = tf.io.read_file(fname)
  # features_wings.append(tf.image.decode_bmp(file_content))
  # labels_wings.append(label)

  # print current count
  #sys.stdout.write("\rAdded %i files" % count)

In [42]:
print(len(labels),
      len(filenames_headlegs),
      len(filenames_palps),
      len(filenames_wings))

1327 1327 1327 1327


In [0]:
# function to parse filenames and extract images
def parse_function(filename, label):
  # read the data as bytes
  image_string = tf.io.read_file(filename)
  
  # convert byte data to image data
  image = tf.image.decode_bmp(image_string, channels=3)

  # convert to float values, i.e. range [0, 1]
  image = tf.image.convert_image_dtype(image, tf.float32)
  
  return image, label

def iterate_dataset_parse(filenames, labels):
  dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
  dataset = dataset.shuffle(len(filenames))
  dataset = dataset.map(parse_function, num_parallel_calls=4)
  return dataset

In [0]:
# get the datasets
dataset_headlegs = iterate_dataset_parse(filenames_headlegs, labels)
dataset_palps = iterate_dataset_parse(filenames_palps, labels)
dataset_wings = iterate_dataset_parse(filenames_wings, labels)

**AlexNet Model**

In [0]:
# define AlexNet as keras sequential model
# ref: https://medium.com/datadriveninvestor/cnn-architecture-series-alexnet-with-implementation-part-ii-7f7afa2ac66a

n_classes = 4

model = keras.models.Sequential([
    # conv layer 1
    Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding="valid", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # conv layer 2
    Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding="same", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # conv layer 3
    Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),

    # conv layer 4
    Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),
    
    # conv layer 5
    Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding="same", activation = "relu"),
    MaxPool2D(pool_size=(3,3), strides=(2,2), padding="valid"),

    # dense layers
    Flatten(),
    Dense(units = 9216, activation = "relu"),
    Dense(units = 4096, activation = "relu"),
    Dense(4096, activation = "relu"),

    # output
    Dense(n_classes, activation = "softmax")
])

In [107]:
display(model.summary())

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_40 (Conv2D)           (None, 54, 54, 96)        34944     
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 26, 26, 96)        0         
_________________________________________________________________
conv2d_41 (Conv2D)           (None, 26, 26, 256)       614656    
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 12, 12, 256)       0         
_________________________________________________________________
conv2d_42 (Conv2D)           (None, 12, 12, 384)       885120    
_________________________________________________________________
conv2d_43 (Conv2D)           (None, 12, 12, 384)       1327488   
_________________________________________________________________
conv2d_44 (Conv2D)           (None, 12, 12, 256)      

None

In [0]:
# configure the learning process
model.compile(
    # define optimizer and set learning rate
    optimizer=keras.optimizers.SGD(learning_rate=1e-2, nesterov=True),

    # choose a loss function
    loss=keras.losses.CategoricalCrossentropy(),

    # choose a metric to report
    metrics=['accuracy']
)

In [117]:
import tensorflow_datasets as tfds
dataset_headlegs_numpy = tfds.as_numpy(dataset_headlegs, graph=None)
display(dataset_headlegs_numpy)

# # train model
# history = model.fit(
#     dataset_headlegs,
#     epochs=2
# )

<generator object _eager_dataset_iterator at 0x7f7bc5eb1468>