In [None]:
import glob
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout, BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
import wandb
from wandb.keras import WandbCallback

In [None]:
seed = 100
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
wandb.login()

### Downloading and unzipping iNaturalist Dataset

In [None]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip

In [None]:
!unzip "./nature_12K.zip"

### Preparing data for training

In [None]:
def train_dataset(augmentation=False, batch_size=64):
    dir_train = './inaturalist_12K/train'
    dir_test = './inaturalist_12K/val'

    if augmentation:
        train_datagen = ImageDataGenerator(rescale=1./255,
                                          zoom_range=0.3,
                                          rotation_range=50,
                                          brightness_range=(0.2, 0.8),
                                          shear_range=0.2,
                                          width_shift_range=0.1,
                                          height_shift_range=0.2,
                                          horizontal_flip=True,
                                          vertical_flip=True,
                                          validation_split=0.1,)
        test_datagen = ImageDataGenerator(rescale=1./255)

    else:
        train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)
        test_datagen = ImageDataGenerator(rescale=1./255)

    train = train_datagen.flow_from_directory(dir_train, target_size=(200, 200), batch_size=batch_size, subset="training")
    val = train_datagen.flow_from_directory(dir_train, target_size=(200, 200), batch_size=batch_size, subset="validation")
    test = test_datagen.flow_from_directory(dir_test, target_size=(200, 200), batch_size=batch_size)
    
    return train, val, test;

In [None]:
def CNN(n_filters, filter_multiplier, dropout, batch_norm, dense_size, act_func= "relu", n_classes=10, image_size=200):
    
    model = Sequential()
    for i in range(5):
        filter_dim = 10 - 2*i
        filter_size = (filter_dim, filter_dim)
        if i==0:
            model.add(Conv2D(n_filters, filter_size, input_shape=(image_size, image_size, 3), data_format="channels_last"))
        else:
            model.add(Conv2D(n_filters, filter_size))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(Activation(act_func))
        model.add(MaxPooling2D(pool_size=(2,2)))
        num_filters = int(n_filters * filter_multiplier)
    
    model.add(Flatten())
    model.add(Dense(dense_size))
    model.add(Dropout(dropout))
    if act_func == "relu":
        model.add(Activation("relu"))
    if act_func == "leaky":
        model.add(LeakyReLU(alpha=0.3))
    model.add(Dense(n_classes))
    model.add(Activation("softmax"))

    return model

### Visualizing some training images

In [None]:
train, val, test = train_dataset(augmentation=False, batch_size=64)
img = train.next()

In [None]:
index_to_class = {0: 'Amphibia', 1: 'Animalia', 2: 'Arachnida', 3: 'Aves', 4: 'Fungi', 
                  5: 'Insecta', 6: 'Mammalia', 7: 'Mollusca', 8: 'Plantae', 9: 'Reptilia'}

plt.figure(figsize=(15,15))
for i in range(64):
    plt.subplot(8,8,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(img[0][i])
    plt.xlabel(index_to_class[np.argmax(img[1][i])])
plt.show()

### Function to set WandB run name

In [None]:
def setWandbName(n_filters, filter_multiplier, augment, dropout, batch_norm):
    
    batch_norm_dict = {True: "Y", False: "N"}
    augment_dict = {True: "Y", False: "N"}

    name = "_".join(["num", str(n_filters), "org", str(filter_multiplier), "aug", augment_dict[augment],
                      "drop", str(dropout), "norm", batch_norm_dict[batch_norm]])
    
    return name;

### Function to train the dataset

In [None]:
def train_wandb(config= None):

    '''best_config = {
        "n_filters": 64,
        "filter_multiplier": 2,
        "augment_data": True,
        "dropout": 0.3,
        "batch_norm": True,
        "epochs": 20,
        "dense_size": 32,
        "lr": 0.001
    }'''
    
    wandb.init(project="Convolutional Neural Networks", entity="cs21s048-cs21s058")
    config = wandb.config
    print(config.augment_data)
    wandb.run.name = setWandbName(config.n_filters, config.filter_multiplier, config.augment_data, config.dropout, config.batch_norm)

    train, val, test = train_dataset(augmentation=config.augment_data, batch_size=config.batch_size)

    model = CNN(n_filters=config.n_filters, filter_multiplier=config.filter_multiplier,
                      dropout= config.dropout, batch_norm = config.batch_norm, dense_size= config.dense_size)
    model.compile(optimizer=keras.optimizers.Adam(config.lr), loss="categorical_crossentropy", metrics="categorical_accuracy")
    model.fit(train, epochs=config.epochs, validation_data=val, callbacks=[WandbCallback()])

In [None]:
def train(config= None):

    '''best_config = {
        "n_filters": 64,
        "filter_multiplier": 2,
        "augment_data": True,
        "dropout": 0.3,
        "batch_norm": True,
        "batch_size" : 128,
        "epochs": 20,
        "dense_size": 32,
        "lr": 0.001
    }'''

    train, val, test = train_dataset(augmentation=True, batch_size=128)

    model = CNN(n_filters=64, filter_multiplier=2,
                      dropout= 0.3, batch_norm = True, dense_size= 32)
    model.compile(optimizer=keras.optimizers.Adam(0.001), loss="categorical_crossentropy", metrics="categorical_accuracy")
    model.fit(train, epochs=20, validation_data=val)
    print("testing Model: ")
    model.evaluate(test, batch_size=128)

### Setting up wandb sweep

In [None]:
sweep_config = {
    "name": "Final Sweep(Bayesian)",
    "description": "Tuning hyperparameters",
    'metric': {
      'name': 'val_categorical_accuracy',
      'goal': 'maximize'
  },
    "method": "bayes",
    "project": "CS6910_Assignment2",
    "parameters": {
        "n_filters": {
        "values": [16, 32, 64]
        },
        "filter_multiplier": {
            "values": [0.5, 1, 2]
        },
        "augment_data": {
            "values": [True]
        },
        "dropout": {
            "values": [0.3, 0.5]
        },
        "batch_norm": {
            "values": [False, True]
        },
        "epochs": {
            "values": [5, 10]
        },
        "dense_size": {
            "values": [32, 64, 128]
        },
        "lr": {
            "values": [0.01, 0.001]
        },
        "batch_size": {
            "values": [64, 128, 256]
        },
        "activation": {
            "values": ["relu", "leaky"]
        },
    }
}

# creating the sweep
sweep_id = wandb.sweep(sweep_config, project="Convolutional Neural Networks", entity="cs21s048-cs21s058")