# Your First Image Classifier: Using CNN to Classify Images
# Train

The purpose of this dataset is to correctly classify an image as containing a dog, cat, or panda.
Containing only 3,000 images, the Animals dataset is meant to be another **introductory** dataset
that we can quickly train a CNN model and obtain a comparative results with the previous KNN model.


Let's take the following steps:

1. Encoding target variable
2. Training the CNN model
3. Export the model and the encoder object

<center><img width="900" src="https://drive.google.com/uc?export=view&id=1haMB_Zt6Et9q9sPHxfuR4g3FT5QRXlTI"></center>


## Step 01: Setup

Start out by installing the experiment tracking library and setting up your free W&B account:


*   **pip install wandb** – Install the W&B library
*   **import wandb** – Import the wandb library
*   **wandb login** – Login to your W&B account so you can log all your metrics in one place

In [None]:
!pip install wandb -qU

In [None]:
# a Python package for tracking the carbon emissions produced by various
# kinds of computer programs, from straightforward algorithms to deep neural networks.
!pip install codecarbon

### Import Packages

In [None]:
import logging
import joblib
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD
import matplotlib.pyplot as plt
import numpy as np
from codecarbon import EmissionsTracker
from tensorflow.keras.callbacks import Callback
from wandb.keras import WandbCallback
import os
from sklearn.metrics import classification_report
from sklearn.metrics import fbeta_score, precision_score, recall_score, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import wandb

In [None]:
wandb.login()

In [None]:
# configure logging
# reference for a logging obj
logger = logging.getLogger()

# set level of logging
logger.setLevel(logging.INFO)

# create handlers
c_handler = logging.StreamHandler()
c_format = logging.Formatter(fmt="%(asctime)s %(message)s",datefmt='%d-%m-%Y %H:%M:%S')
c_handler.setFormatter(c_format)

# add handler to the logger
logger.handlers[0] = c_handler

## Step 02 Basic configuration and download artifacts

In [None]:
# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
  "project_name": "cnn_classifier",
  "train_feature_artifact": "train_x:latest",
  "train_target_artifact": "train_y:latest",
  "val_feature_artifact": "val_x:latest",
  "val_target_artifact": "val_y:latest",
  "encoder": "target_encoder",
  "inference_model": "model.h5"
}

In [None]:
# open the W&B project created in the Fetch step
run = wandb.init(entity="ivanovitch-silva",project=args["project_name"], job_type="Train")

logger.info("Downloading the train and validation data")
# train x
train_x_artifact = run.use_artifact(args["train_feature_artifact"])
train_x_path = train_x_artifact.file()

# train y
train_y_artifact = run.use_artifact(args["train_target_artifact"])
train_y_path = train_y_artifact.file()

# validation x
val_x_artifact = run.use_artifact(args["val_feature_artifact"])
val_x_path = val_x_artifact.file()

# validation y
val_y_artifact = run.use_artifact(args["val_target_artifact"])
val_y_path = val_y_artifact.file()

# unpacking the artifacts
train_x = joblib.load(train_x_path)
train_y = joblib.load(train_y_path)
val_x = joblib.load(val_x_path)
val_y = joblib.load(val_y_path)

In [None]:
logger.info("Train x: {}".format(train_x.shape))
logger.info("Train y: {}".format(train_y.shape))
logger.info("Validation x: {}".format(val_x.shape))
logger.info("Validation y: {}".format(val_y.shape))

In [None]:
train_x[3]

In [None]:
train_y[3]

## Step 03: Encoder the target variable

In [None]:
# encode the labels as Binarizers
lb = LabelBinarizer()

# take care not to produce data lakeage
train_y = lb.fit_transform(train_y)
val_y = lb.transform(val_y)

In [None]:
lb.classes_

In [None]:
train_y[4]

In [None]:
val_y[3]

## Step 04: Model definition

Source code based on **Rosebrock, Adrian. Deep Learning For Computer vision with Python, 2019** [link](https://pyimagesearch.com/deep-learning-computer-vision-python-book/)

In [None]:
class ShallowNet:
	@staticmethod
	def build(width, height, depth, classes):
		# initialize the model along with the input shape to be
		# "channels last"
		model = Sequential()
		inputShape = (height, width, depth)

		# if we are using "channels first", update the input shape
		if K.image_data_format() == "channels_first":
			inputShape = (depth, height, width)

		# define the first (and only) CONV => RELU layer
		model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
		model.add(Activation("relu"))
	
		# softmax classifier
		model.add(Flatten())
		model.add(Dense(classes))
		model.add(Activation("softmax"))

		# return the constructed network architecture
		return model

In [None]:
# create a model object
model = ShallowNet.build(32,32,3,3)

# summarize layers
model.summary()

In [None]:
32768*3 + 3

In [None]:
32*32*32

In [None]:
Z = WX + b

In [None]:
3*9*32 + 32

## Step 05: Training

In [None]:
# create codecarbon tracker
# codecarbon is too much verbose, change the log level for more info
tracker = EmissionsTracker(log_level="critical")
tracker.start()

# initialize the optimizer and model
print("[INFO] compiling model...")
opt = SGD(learning_rate=0.005)
model = ShallowNet.build(width=32, height=32, depth=3, classes=3)
model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=["accuracy"])

# train the network
print("[INFO] training network...")
history = model.fit(train_x, train_y, 
              validation_data=(val_x, val_y),
              batch_size=32, 
              epochs=100, 
              verbose=0,
              callbacks=[wandb.keras.WandbCallback(save_model=False,
                                                   compute_flops=True)]
          )

# get co2 emissions from tracker
# "CO2 emission (in Kg)"
emissions = tracker.stop()

## Step 06: Evaluation Metrics

In [None]:
# plot the training loss and accuracy
plt.style.use("ggplot")
fig, ax = plt.subplots(1,1,figsize=(10,8))

ax.plot(np.arange(0, 100), history.history["loss"], label="train_loss",linestyle='--')
ax.plot(np.arange(0, 100), history.history["val_loss"], label="val_loss",linestyle='--')
ax.plot(np.arange(0, 100), history.history["accuracy"], label="train_acc")
ax.plot(np.arange(0, 100), history.history["val_accuracy"], label="val_acc")
ax.set_title("Training Loss and Accuracy")
ax.set_xlabel("Epoch #")
ax.set_ylabel("Loss/Accuracy")
ax.legend()
plt.show()

In [None]:
print("[INFO] {} kWh of electricity used since the begining".format(tracker.final_emissions_data.energy_consumed))
print("[INFO] Energy consumed for RAM: {} kWh".format(tracker.final_emissions_data.ram_energy))
print("[INFO] Energy consumed for all GPU: {} kWh".format(tracker.final_emissions_data.gpu_energy))
print("[INFO] Energy consumed for all CPU: {} kWh".format(tracker.final_emissions_data.cpu_energy))
print("[INFO] CO2 emission {}(in Kg)".format(tracker.final_emissions_data.emissions))

In [None]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(val_x, batch_size=32)
print(classification_report(val_y.argmax(axis=1),
                            predictions.argmax(axis=1),
                            target_names=lb.classes_))

In [None]:
fig_confusion_matrix, ax = plt.subplots(1,1,figsize=(7,4))
ConfusionMatrixDisplay(confusion_matrix(predictions.argmax(axis=1),
                                        val_y.argmax(axis=1)),
                       display_labels=lb.classes_).plot(values_format=".0f",ax=ax)

ax.set_xlabel("True Label")
ax.set_ylabel("Predicted Label")
ax.grid(False)
plt.show()

In [None]:
# Uploading figures
logger.info("Uploading figures")
run.log(
    {
        "confusion_matrix": wandb.Image(fig_confusion_matrix),
        # "other_figure": wandb.Image(other_fig)
    }
)

In [None]:
# Evaluation Metrics
logger.info("Validation Evaluation metrics")
fbeta = fbeta_score(val_y.argmax(axis=1), 
                    predictions.argmax(axis=1), 
                    beta=1, zero_division=1,average='weighted')
precision = precision_score(val_y.argmax(axis=1),
                            predictions.argmax(axis=1),
                            zero_division=1,average='weighted')
recall = recall_score(val_y.argmax(axis=1),
                      predictions.argmax(axis=1),
                      zero_division=1,average='weighted')
acc = accuracy_score(val_y.argmax(axis=1),
                     predictions.argmax(axis=1))

logger.info("Validation Accuracy: {}".format(acc))
logger.info("Validation Precision: {}".format(precision))
logger.info("Validation Recall: {}".format(recall))
logger.info("Validation F1: {}".format(fbeta))

run.summary["Acc"] = acc
run.summary["Precision"] = precision
run.summary["Recall"] = recall
run.summary["F1"] = fbeta
# number of parameters
run.summary["Count_Params"] = model.count_params()
# energy unit is kWh
run.summary["Energy_Consumed"] = tracker.final_emissions_data.energy_consumed
run.summary["Energy_RAM"] = tracker.final_emissions_data.ram_energy
run.summary["Energy_GPU"] = tracker.final_emissions_data.gpu_energy
run.summary["Energy_CPU"] = tracker.final_emissions_data.cpu_energy
# kg
run.summary["CO2_Emissions"] = tracker.final_emissions_data.emissions

In [None]:
logger.info("Dumping the model and encoder artifacts to the disk")

# Save the artifacts using joblib
joblib.dump(lb, args["encoder"])
model.save(os.path.join(wandb.run.dir, args["inference_model"]))

In [None]:
# encoder artifact
artifact = wandb.Artifact(args["encoder"],
                          type="INFERENCE_MODEL",
                          description="A json file representing the target encoder"
                          )

logger.info("Logging the target encoder artifact")
artifact.add_file(args["encoder"])
run.log_artifact(artifact)

In [None]:
# inference model artifact
artifact = wandb.Artifact(args["inference_model"],
                          type="INFERENCE_MODEL",
                          description="A json file representing the inference model"
                          )

logger.info("Logging the inference model artifact")
artifact.add_file(os.path.join(wandb.run.dir, args["inference_model"]))
run.log_artifact(artifact)

In [None]:
run.finish()

**How can we improve this model?**

-  Data Augmentation [Link](https://colab.research.google.com/drive/1S8SJvH4bqhPvurG4gjh3-t-XulX4S8JX#scrollTo=me4Jr5IhaT0j)
- Batch Normalization
- Dropout
- Add more CNN layers
- Add more hidden layers in the head

## Step 07: Sweep (hyperparameter tuning)

### Sweep setup

ℹ️ [Reference](https://docs.wandb.ai/guides/sweeps/define-sweep-configuration)

**Sweep configuration structure**

Sweep configurations are nested; keys can have, as their values, further keys. The top-level keys are listed and briefly described below, and then detailed in the following section.

| Top-Level Key | Description                                         |
|---------------|-----------------------------------------------------|
| **program**       | (required) Training script to run.                  |
| **method**        | (required) Specify the <br>search strategy.         |
| **parameters**    | (required) Specify <br>parameters bounds to search. |

<br>

**Search type methods**

The following list describes hyperparameter search methods. Specify the search strategy with the **method**:

- **grid**  – Iterate over every combination of hyperparameter values. Can be computationally costly.
- **random**  – Choose a random set of hyperparameter values on each iteration based on provided distributions.
- **bayes** – Create a probabilistic model of a metric score as a function of the hyperparameters, and choose parameters with high probability of improving the metric. 
<br>

**Metric**

Describes the metric to optimize. This metric should be logged **explicitly** to W&B by your training script.

| Key    | Description |
|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **name**   | Name of the metric to optimize.|
| **goal**   | Either minimize or  maximize (Default is <br>minimize.|
| **target** | Goal value for the metric you're optimizing. <br>When any run in the sweep achieves that target value,<br> the sweep's state will be set to finished. <br>This means all agents with active runs will <br>finish those jobs, but no new runs will <br>be launched in the sweep. |

In [None]:
# Configure the sweep 
sweep_config = {
    'method': 'random', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'conv_layer': {
            'max': 3,
            'min': 1,
            'distribution': 'int_uniform',
        },
        'hidden_layer': {
            'values': [0,1,2,3]
        },
        'learn_rate': {
            'values': [0.01,0.001,0.005],  
        },
        'epoch': {
            'values': [100,200]
        },
        'batch_size': {
            'values': [32,64]
        }
    }
}

⚠️⚠️⚠️

> Please, you must run again the following steps and command before go ahead to step 7:
- **Step 02** (basic configuration and download artifacts) and 
- **Step 03** (encoder the target variable)
- **run.finish()**

In [None]:
# Initialize a new sweep
# Arguments:
#     – sweep_config: the sweep config dictionary defined above
#     – entity: Set the username for the sweep
#     – project: Set the project name for the sweep
sweep_id = wandb.sweep(sweep_config,
                       entity="ivanovitch-silva",
                       project=args["project_name"])

### Adapt the model

In [None]:
class ShallowNetAdapt:
  @staticmethod
  def build(width, height, depth, classes, config):
    '''
      width, height, depth: dimensions of the image
      classes: number of targets
      config: variable used to configure the sweep
    '''
    # initialize the model along with the input shape to be
    # "channels last"
    model = Sequential()
    inputShape = (height, width, depth)

    # if we are using "channels first", update the input shape
    if K.image_data_format() == "channels_first":
      inputShape = (depth, height, width)

    for i in range(config.conv_layer):
      # define a convolution layer followed by a relu activation 
      # CONV => RELU layer
      model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
      model.add(Activation("relu"))

    # add a flatten layer
    model.add(Flatten())

    # add hidden layers followed by a relu activation
    for j in range(config.hidden_layer):
      model.add(Dense(10,activation="relu"))

    # softmax classifier
    model.add(Dense(classes))
    model.add(Activation("softmax"))

    # return the constructed network architecture
    return model

### Training

In [None]:
def train():
    with wandb.init() as run:

      # create codecarbon tracker
      # codecarbon is too much verbose, change the log level for more info
      tracker = EmissionsTracker(log_level="critical")
      tracker.start()

      # initialize the optimizer and model
      print("[INFO] compiling model...")
      opt = SGD(learning_rate=run.config.learn_rate)
      model = ShallowNetAdapt.build(width=32, 
                               height=32,
                               depth=3, 
                               classes=3,
                               config=run.config)
      model.compile(loss="categorical_crossentropy", 
                    optimizer=opt,metrics=["accuracy"])

      # train the network
      print("[INFO] training network...")
      history = model.fit(train_x, train_y, 
                    validation_data=(val_x, val_y),
                    batch_size=run.config.batch_size, 
                    epochs=run.config.epoch, 
                    verbose=0,
                    callbacks=[wandb.keras.WandbCallback(save_model=False,
                                                        compute_flops=True)]
                )

      # get co2 emissions from tracker
      # "CO2 emission (in Kg)"
      emissions = tracker.stop()

      # make predictions
      predictions = model.predict(val_x, batch_size=run.config.batch_size)

      # Evaluation Metrics
      logger.info("Validation Evaluation metrics")
      fbeta = fbeta_score(val_y.argmax(axis=1), 
                          predictions.argmax(axis=1), 
                          beta=1, zero_division=1,average='weighted')
      precision = precision_score(val_y.argmax(axis=1),
                                  predictions.argmax(axis=1),
                                  zero_division=1,average='weighted')
      recall = recall_score(val_y.argmax(axis=1),
                            predictions.argmax(axis=1),
                            zero_division=1,average='weighted')
      acc = accuracy_score(val_y.argmax(axis=1),
                          predictions.argmax(axis=1))

      logger.info("Validation Accuracy: {}".format(acc))
      logger.info("Validation Precision: {}".format(precision))
      logger.info("Validation Recall: {}".format(recall))
      logger.info("Validation F1: {}".format(fbeta))

      run.summary["Acc"] = acc
      run.summary["Precision"] = precision
      run.summary["Recall"] = recall
      run.summary["F1"] = fbeta
      # number of parameters
      run.summary["Count_Params"] = model.count_params()
      # energy unit is kWh
      run.summary["Energy_Consumed"] = tracker.final_emissions_data.energy_consumed
      run.summary["Energy_RAM"] = tracker.final_emissions_data.ram_energy
      run.summary["Energy_GPU"] = tracker.final_emissions_data.gpu_energy
      run.summary["Energy_CPU"] = tracker.final_emissions_data.cpu_energy
      # kg
      run.summary["CO2_Emissions"] = tracker.final_emissions_data.emissions


In [None]:
# Initialize a new sweep
# Arguments:
#     – sweep_id: the sweep_id to run - this was returned above by wandb.sweep()
#     – function: function that defines your model architecture and trains it
wandb.agent(sweep_id = sweep_id, function=train,count=3)

In [None]:
run.finish()