## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, random
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
import keras
import cv2

## Load Data

In [2]:
train_path = "./dataset-resized"
img_size = (256, 256)

In [3]:
X = []
y = []

dataset_path = "./dataset-resized"
list_class = [item for item in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, item))]

jenis_dict = {"cardboard":0, "glass":1, "metal":2, "paper":3, "plastic":4, "trash":5}
for jenis in list_class:
    for image_name in (os.listdir(os.path.join(dataset_path, jenis))):
        image_path = train_path+f'/{jenis}/'+image_name
        img = cv2.imread(image_path)
        resized_img = cv2.resize(img, (256, 256))
        X.append(resized_img)
        y.append(jenis_dict[jenis])
X = np.array(X)

In [4]:
print(f"X shape: {X.shape}")

X shape: (2527, 256, 256, 3)


## Split data

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, 
                                                  test_size = 0.2, 
                                                  random_state = 27, 
                                                  stratify=y)

### Convert label to one hot encoding

In [6]:
y_train = keras.utils.to_categorical(y_train)
y_val = keras.utils.to_categorical(y_val)

y_train[:5]

array([[1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)

## Modelling

### Define callbacks

In [7]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.4,   
    patience=2, 
    verbose=1)

In [8]:
# Weights and Biases related imports
import wandb
from wandb.integration.keras import WandbMetricsLogger

# Initialize a W&B run
configs = dict(
    num_classes = 6,
    batch_size = 16,
    image_size = 256,
    epochs = 20
)
run = wandb.init(
    project = "trash_classification",
    config = configs
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: bryannaufal (ml_workspace). Use `wandb login --relogin` to force relogin


### Define Model 1

In [9]:
from keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation

In [10]:
def model_1(configs):
    model = Sequential()
    model.add(Conv2D(16, (3, 3), 1, activation='relu', input_shape=(configs['image_size'], configs['image_size'], 3)))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Conv2D(32, (3, 3), 1, activation='relu'))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Conv2D(64, (3, 3), 1, activation='relu'))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Conv2D(64, (3, 3), 1, activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (3, 3), 1, activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (3, 3), 1, activation='relu'))
    model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(configs['num_classes'], activation='softmax'))
    return model

model = model_1(configs=configs)
model.summary()
model.compile(loss = "categorical_crossentropy", optimizer = "Adam", metrics = ["accuracy"])

history = model.fit(
    X_train,
    y_train,
    validation_data = (X_val, y_val),
    epochs=configs['epochs'],
    batch_size=configs['batch_size'],
    verbose=2,
    callbacks = [reduce_lr, WandbMetricsLogger()])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 16)      448       
                                                                 
 dropout (Dropout)           (None, 254, 254, 16)      0         
                                                                 
 batch_normalization (BatchN  (None, 254, 254, 16)     64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 32)      4640      
                                                                 
 dropout_1 (Dropout)         (None, 125, 125, 32)      0

In [11]:
path = "./models/model_cnn.h5"
model.save(path)

registered_model_name = "cnn_base"

run.link_model(path=path, registered_model_name=registered_model_name)

run.finish()

0,1
epoch/accuracy,▁▃▃▄▄▄▅▅▆▆▆▆▇▇▇█████
epoch/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch/learning_rate,██████████████▁▁▁▁▁▁
epoch/loss,█▇▆▆▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
epoch/lr,███████████████▁▁▁▁▁
epoch/val_accuracy,▃▆▅▅▅▁▄▆▃▇▅▆▅▄▄▇▆▇██
epoch/val_loss,▄▂▂▂▂▅▄▂▅▁▃▂▄█▇▃▅▂▂▂

0,1
epoch/accuracy,0.98516
epoch/epoch,19.0
epoch/learning_rate,0.0004
epoch/loss,0.04622
epoch/lr,0.0004
epoch/val_accuracy,0.72332
epoch/val_loss,1.11438


## Evaluation

### Helpers

In [18]:
start = 0

import matplotlib.pyplot as plt

def plot_loss(history_dict):
    key1 = list(history_dict.keys())[0]
    key2 = list(history_dict.keys())[2]
    loss_values = history_dict[key1][start:]
    val_loss_values = history_dict[key2][start:]
    plt.plot(loss_values, "b-", label=key1)
    plt.plot(val_loss_values, "r--", label=key2)
    plt.title("Training vs Validation Loss")
    plt.legend()
    plt.show()
    print(key1, ": ", history_dict[key1][-1], key2, ": ", history_dict[key2][-1])


def plot_metric(history_dict):
    key1 = list(history_dict.keys())[1]
    key2 = list(history_dict.keys())[3]
    metric_values = history_dict[key1][start:]
    val_metric_values = history_dict[key2][start:]
    plt.plot(metric_values, "b-", label=key1)
    plt.plot(val_metric_values, "r--", label=key2)
    plt.title("Training vs Validation Metric")
    plt.legend()
    plt.show()
    print(key1, ": ", history_dict[key1][-1], key2, ": ", history_dict[key2][-1])

### Model 1

In [1]:
plot_loss(history.history)
plot_metric(history.history)

NameError: name 'plot_loss' is not defined

## Submission

In [27]:
submission = pd.read_csv("/kaggle/input/final-joints-data-competition-2023/sample_submission.csv")

In [29]:
# load test data
test_img = []
for test_name in submission.id:
    test_path_fix = test_path+f"/{test_name}.jpg"
    img = cv2.imread(test_path_fix)
    resized_img = cv2.resize(img, (256, 256))
    test_img.append(resized_img)

In [30]:
# convert to np array
test_img = np.array(test_img, dtype="float")

In [None]:
hasil1 = model.predict(test_img)
hasil2 = model2.predict(test_img)

# model2 trained with full data
hasil3 = model2.predict(test_img)

In [None]:
final = []
for a,b,c in zip(hasil1, hasil2, hasil3):
    if a == b:
        final.append(a)
    elif a == c:
        final.append(a)
    elif b == c:
        final.append(b)

In [None]:
submission["label"] = final
submission.head()

In [None]:
submission.to_csv("third.csv", index=False)

# Test for scoring

In [14]:
import tensorflow as tf
import cv2
import numpy as np
model =  tf.keras.models.load_model("./models/nopal_2.h5")
model2 = tf.keras.models.load_model("./models/nopal_3.h5")
model3 = tf.keras.models.load_model("./models/nopal_4.h5")
def predict2_voting(image_path):
    img = cv2.imread(image_path)
    resized_img = cv2.resize(img, (256,256))
    data_fix = [resized_img]
    data_fix = np.array(data_fix, dtype="float")
#     print(new_resized_img.shape)
    result1 = model.predict(data_fix)
    result2 = model2.predict(data_fix)
    result3 = model3.predict(data_fix)
    final_result1 = np.argmax(result1)
    final_result2 = np.argmax(result2)
    final_result3 = np.argmax(result3)
    if final_result1 == final_result2:
        return final_result1
    elif final_result1 == final_result3:
        return final_result1
    elif final_result2 == final_result3:
        return final_result2


    

In [15]:
predict2_voting("./12.jpg")



1

In [16]:
predict2_voting("./start.jpg")



0