In [51]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
import wandb
from wandb.integration.keras import WandbMetricsLogger
import numpy as np
import os

from sklearn.model_selection import train_test_split

In [33]:
entity = "adamata-selection"

In [34]:
images_dir = os.path.expanduser('dataset-resized')

In [35]:
config={
    "learning_rate": 0.001,
    "loss": "categorical_crossentropy",
    "metric": "accuracy",
    "epoch": 20,
    "batch_size": 32
}

In [68]:
def generate_data(images_dir):
    # Without data augmentation
    datagen = ImageDataGenerator(
        rescale=1./255, 
        validation_split=0.2
    )
    
    
    train_generator = datagen.flow_from_directory(
        images_dir,
        target_size=(128, 128),
        batch_size=config["batch_size"],
        class_mode='categorical',
        subset='training'
    )
    
    validation_generator = datagen.flow_from_directory(
        images_dir,
        target_size=(128, 128),
        batch_size=config["batch_size"],
        class_mode='categorical',
        subset='validation'
    )

    return train_generator, validation_generator

## Store Data To Artifact

In [70]:
project = "wandb-trash-classification"

model_use_case_id = "model"
job_type = "build_dataset"

train_generator, validation_generator = generate_data(images_dir)


# Initialize a W&B run
run = wandb.init(entity=entity, project=project, job_type=job_type)

# Create W&B Table for training data
train_table = wandb.Table(columns=["x_train", "y_train"])
for _ in range(len(train_generator)):
    x_batch, y_batch = next(train_generator)
    for x, y in zip(x_batch, y_batch):
        train_table.add_data(wandb.Image(x), y)

# Create W&B Table for validation data
eval_table = wandb.Table(columns=["x_eval", "y_eval"])
for _ in range(len(validation_generator)):
    x_batch, y_batch = next(validation_generator)
    for x, y in zip(x_batch, y_batch):
        eval_table.add_data(wandb.Image(x), y)

# Create an artifact object
artifact_name = "{}_dataset".format(model_use_case_id)
artifact = wandb.Artifact(name=artifact_name, type="dataset")

# Add wandb.WBValue obj to the artifact.
artifact.add(train_table, "train_table")
artifact.add(eval_table, "eval_table")

# Persist any changes made to the artifact.
artifact.save()

# Tell W&B this run is finished.
run.finish()

Found 2024 images belonging to 6 classes.
Found 503 images belonging to 6 classes.


## Train Model

In [73]:
run = wandb.init(
    project="wandb-trash-classification",
    config={
    "learning_rate": 0.001,
    "loss": "categorical_crossentropy",
    "metric": "accuracy",
    "epoch": 20,
    "batch_size": 32
    }
)

n_class = 6

# Assuming the following config values are set in wandb:
loss = run.config["loss"]
metrics = run.config["metric"]
epochs = run.config["epoch"]
learning_rate = run.config["learning_rate"]
batch_size = run.config['batch_size']

i = Input(shape=(128, 128, 3))

# Convolutional Layers {Conv --> BatchNorm --> Conv --> BatchNorm --> MaxPooling (3x)}
x = Conv2D(32, (3, 3), padding='same', activation='relu')(i)
x = BatchNormalization()(x)
x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Neural Networks Layer
x = Flatten()(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(n_class, activation='softmax')(x)

model = Model(i, x)

model.compile(optimizer=Adam(learning_rate=learning_rate), 
              loss=loss, 
              metrics=[metrics])

model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=epochs,
    callbacks=[WandbMetricsLogger()]
)

# Save model locally
path = "model.keras"
model.save(path)

# Save model to W&B
path = "./model.keras"
registered_model_name = "trash-classification-dev"

run.link_model(path=path, registered_model_name=registered_model_name)
run.finish()

Epoch 1/20


  self._warn_if_super_not_called()


63/63 ━━━━━━━━━━━━━━━━━━━━ 8:50 9s/step - accuracy: 0.3750 - loss: 3.140 ━━━━━━━━━━━━━━━━━━━━ 1:28 1s/step - accuracy: 0.2500 - loss: 14.85 ━━━━━━━━━━━━━━━━━━━━ 1:24 1s/step - accuracy: 0.2222 - loss: 19.10 ━━━━━━━━━━━━━━━━━━━━ 1:22 1s/step - accuracy: 0.2099 - loss: 21.16 ━━━━━━━━━━━━━━━━━━━━ 1:21 1s/step - accuracy: 0.2209 - loss: 21.83 ━━━━━━━━━━━━━━━━━━━━ 1:19 1s/step - accuracy: 0.2327 - loss: 22.46 ━━━━━━━━━━━━━━━━━━━━ 1:18 1s/step - accuracy: 0.2430 - loss: 22.84 ━━━━━━━━━━━━━━━━━━━━ 1:16 1s/step - accuracy: 0.2514 - loss: 22.94 ━━━━━━━━━━━━━━━━━━━━ 1:15 1s/step - accuracy: 0.2593 - loss: 22.97 ━━━━━━━━━━━━━━━━━━━━ 1:13 1s/step - accuracy: 0.2665 - loss: 22.90 ━━━━━━━━━━━━━━━━━━━━ 1:12 1s/step - accuracy: 0.2744 - loss: 22.76 ━━━━━━━━━━━━━━━━━━━━ 1:11 1s/step - accuracy: 0.2814 - loss: 22.56 ━━━━━━━━━━━━━━━━━━━━ 1:09 1s/step - accuracy: 0.2870 - loss: 22.36 ━━━━━━━━━━━━━━━━━━━━ 1:08 1s/step - accuracy: 0.2928 - loss: 22.13 ━━━━━━━━━━━━━━━━━━━━ 1:06 1s/step - accuracy: 0.2981 - l

wandb: ERROR Unable to log learning rate.


63/63 ━━━━━━━━━━━━━━━━━━━━ 98s 1s/step - accuracy: 0.4068 - loss: 14.8024 - val_accuracy: 0.2271 - val_loss: 5.7997
Epoch 2/20
 1/63 ━━━━━━━━━━━━━━━━━━━━ 1:25 1s/step - accuracy: 0.5312 - loss: 1.4262

  self.gen.throw(typ, value, traceback)


63/63 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5312 - loss: 1.4262 - val_accuracy: 0.2174 - val_loss: 3.7785
Epoch 3/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 2:01 2s/step - accuracy: 0.7188 - loss: 0.790 ━━━━━━━━━━━━━━━━━━━━ 1:16 1s/step - accuracy: 0.7031 - loss: 0.909 ━━━━━━━━━━━━━━━━━━━━ 1:17 1s/step - accuracy: 0.7083 - loss: 0.970 ━━━━━━━━━━━━━━━━━━━━ 1:15 1s/step - accuracy: 0.7129 - loss: 0.971 ━━━━━━━━━━━━━━━━━━━━ 1:15 1s/step - accuracy: 0.7141 - loss: 0.980 ━━━━━━━━━━━━━━━━━━━━ 1:12 1s/step - accuracy: 0.7114 - loss: 1.030 ━━━━━━━━━━━━━━━━━━━━ 1:10 1s/step - accuracy: 0.7073 - loss: 1.080 ━━━━━━━━━━━━━━━━━━━━ 1:09 1s/step - accuracy: 0.7044 - loss: 1.113 ━━━━━━━━━━━━━━━━━━━━ 1:08 1s/step - accuracy: 0.7017 - loss: 1.144 ━━━━━━━━━━━━━━━━━━━━ 1:06 1s/step - accuracy: 0.6984 - loss: 1.170 ━━━━━━━━━━━━━━━━━━━━ 1:05 1s/step - accuracy: 0.6943 - loss: 1.191 ━━━━━━━━━━━━━━━━━━━━ 1:04 1s/step - accuracy: 0.6916 - loss: 1.205 ━━━━━━━━━━━━━━━━━━━━ 1:02 1s/step - accuracy: 0.6885 - loss: 1.2

0,1
epoch/accuracy,▁▂▃▄▄▅▅▅▆▆▆▇▇▇▇▆████
epoch/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch/loss,█▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_accuracy,▃▃▂▂▁▂▃▁▃▃▃▃▄▆▇▅█▆▇▅
epoch/val_loss,▄▃▇▇█▇▄▄▅▅▄▃▂▁▁▃▁▁▂▂

0,1
epoch/accuracy,0.875
epoch/epoch,19.0
epoch/loss,0.32806
epoch/val_accuracy,0.3913
epoch/val_loss,2.9568


In [56]:
job_type = "train_model"

config={
    "learning_rate": 0.001,
    "loss": "categorical_crossentropy",
    "metric": "accuracy",
    "epoch": 20,
    "batch_size": 32
}

# Initialize a W&B run
run = wandb.init(project=project, job_type=job_type, config=config)

# Retrieve the dataset artifact
version = "latest"
name = "{}:{}".format("{}_dataset".format(model_use_case_id), version)
artifact = run.use_artifact(artifact_or_name=name)

# Get specific content from the dataframe
train_table = artifact.get("train_table")
x_train = train_table.get_column("x_train", convert_to="numpy")
y_train = train_table.get_column("y_train", convert_to="numpy")

run.finish()

wandb:   2526 of 2526 files downloaded.  


In [57]:
x_train[0].shape

(128, 128, 3)

In [67]:
wandb.init(
    project="wandb-trash-classification"
)


n_class = 6
# input_shape = (128, 128, 3)

loss = run.config["loss"]
metrics = run.config["metric"]
epochs = run.config["epoch"]
learning_rate = run.config["learning_rate"]

i = Input(shape=(128, 128, 3))

# Convolutional Layers {Conv --> BatchNorm --> Conv --> BatchNorm --> MaxPooling (3x)}
x = Conv2D(32, (3,3), padding='same', activation='relu')(i)
x = BatchNormalization()(x)
x = Conv2D(32, (3,3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(64, (3,3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(64, (3,3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(128, (3,3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3,3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)

# Neural Networks Layer
x = Flatten()(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(n_class, activation='softmax')(x)

model = Model(i, x)

model.compile(optimizer=Adam(learning_rate=learning_rate), 
          loss=loss, 
          metrics=[run.config['metric']])

# Generate labels for training data
y_train = keras.utils.to_categorical(y_train, num_classes)

# Create training and test set
x_t, x_v, y_t, y_v = train_test_split(x_train, y_train, test_size=0.33)

model.fit(
    x=x_t,
    y=y_t,
    batch_size=run.config['batch_size'],
    epochs=epochs,
    validation_data=(x_v, y_v),
    callbacks=[WandbMetricsLogger()]
)

# Save model locally
path = "model.keras"
model.save(path)

path = "./model.h5"
registered_model_name = "trash-classification-dev"

run.link_model(path=path, registered_model_name=registered_model_name)
run.finish()

Epoch 1/20


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None, 6, 6, 6, 6, 1), output.shape=(None, 6)

In [62]:
x_t.shape, x_v.shape, y_t.shape, y_v.shape

((1356, 128, 128, 3), (668, 128, 128, 3), (1356, 6, 6, 6), (668, 6, 6, 6))

In [65]:
np.expand_dims(x_t, -1).shape, np.expand_dims(x_v, -1).shape, np.expand_dims(y_t, -1).shape, np.expand_dims(y_v, -1).shape  

((1356, 128, 128, 3, 1),
 (668, 128, 128, 3, 1),
 (1356, 6, 6, 6, 1),
 (668, 6, 6, 6, 1))

In [66]:
np.expand_dims(x_t, -1)

array([[[[[238],
          [234],
          [223]],

         [[238],
          [234],
          [223]],

         [[236],
          [232],
          [221]],

         ...,

         [[218],
          [208],
          [198]],

         [[208],
          [198],
          [188]],

         [[211],
          [201],
          [191]]],


        [[[237],
          [233],
          [222]],

         [[237],
          [233],
          [222]],

         [[235],
          [231],
          [220]],

         ...,

         [[217],
          [207],
          [197]],

         [[207],
          [197],
          [187]],

         [[210],
          [200],
          [190]]],


        [[[236],
          [232],
          [221]],

         [[236],
          [232],
          [221]],

         [[235],
          [231],
          [220]],

         ...,

         [[216],
          [206],
          [196]],

         [[206],
          [196],
          [186]],

         [[209],
          [199],
          [189]]