In [17]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

In [18]:
dataset_path = r"C:\Users\honya\Documents\GitHub\2024-25c-fai1-adsai-GergoHonyak242720\Deliverables\RealWaste"

In [19]:
full_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path,  
    image_size=(224, 224),  
    batch_size=32,
    shuffle=True
)

class_names = full_dataset.class_names  
print("Classes found:", class_names)

Found 1867 files belonging to 7 classes.
Classes found: ['Food Organics', 'Glass', 'Metal', 'Paper_Cardboard', 'Plastic', 'Textile Trash', 'Vegetation']


In [20]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),  
    tf.keras.layers.experimental.preprocessing.RandomWidth(0.2),  
    tf.keras.layers.experimental.preprocessing.RandomHeight(0.2),  
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),  
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal")  
])

In [21]:
def preprocess_image(image, label):
    augmented_images = []
    
    augmented_images.append(data_augmentation(image))  
    
    augmented_images.append(tf.image.rot90(image))    
    augmented_images.append(tf.image.rot90(augmented_images[1]))  
    augmented_images.append(tf.image.rot90(augmented_images[2]))  

    augmented_images = [tf.image.resize(img, (224, 224)) for img in augmented_images]  
    
    labels = [label] * len(augmented_images)
    
    return augmented_images, labels

full_dataset = full_dataset.flat_map(
    lambda image, label: tf.data.Dataset.from_tensor_slices(preprocess_image(image, label))
)

In [22]:
def extract_data_and_labels(dataset):
    data = []
    labels = []

    for images, label in dataset:
        data.append(images.numpy())
        labels.append(label.numpy())

    data = np.concatenate(data, axis=0)
    labels = np.concatenate(labels, axis=0)

    return data, labels


all_data, all_labels = extract_data_and_labels(full_dataset)

In [23]:
train_data, remaining_data, train_labels, remaining_labels = train_test_split(
    all_data, all_labels, test_size=0.2, random_state=42, stratify=all_labels
)

val_data, test_data, val_labels, test_labels = train_test_split(
    remaining_data, remaining_labels, test_size=0.5, random_state=42, stratify=remaining_labels
)

In [24]:
print("Train data shape:", train_data.shape)
print("Train labels shape:", train_labels.shape)
print("Validation data shape:", val_data.shape)
print("Validation labels shape:", val_labels.shape)
print("Test data shape:", test_data.shape)
print("Test labels shape:", test_labels.shape)

Train data shape: (5974, 224, 224, 3)
Train labels shape: (5974,)
Validation data shape: (747, 224, 224, 3)
Validation labels shape: (747,)
Test data shape: (747, 224, 224, 3)
Test labels shape: (747,)


In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

num_classes = len(np.unique(train_labels))
train_labels_cat = to_categorical(train_labels, num_classes=num_classes)  
val_labels_cat = to_categorical(val_labels, num_classes=num_classes)
test_labels_cat = to_categorical(test_labels, num_classes=num_classes)

train_data = train_data.reshape(train_data.shape[0], -1)
val_data = val_data.reshape(val_data.shape[0], -1)
test_data = test_data.reshape(test_data.shape[0], -1)

early_stopping = EarlyStopping(monitor="val_loss", patience=3)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min')

In [28]:
early_stopping = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min')

print("Original shape of train_data:", train_data.shape)  
print("Original shape of val_data:", val_data.shape)  

train_data = train_data.reshape((train_data.shape[0], 224, 224, 3))  
print("Reshaped train_data:", train_data.shape)

val_data = val_data.reshape((val_data.shape[0], 224, 224, 3))  
print("Reshaped val_data:", val_data.shape)

model = Sequential()

model.add(Conv2D(16, activation="relu", kernel_size=3, padding="same", input_shape=(224, 224, 3)))
model.add(BatchNormalization())  
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization())  
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization())  
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))  

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(7, activation='softmax')) 

optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Original shape of train_data: (5974, 224, 224, 3)
Original shape of val_data: (747, 224, 224, 3)
Reshaped train_data: (5974, 224, 224, 3)
Reshaped val_data: (747, 224, 224, 3)
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 224, 224, 16)      448       
                                                                 
 batch_normalization_3 (Bat  (None, 224, 224, 16)      64        
 chNormalization)                                                
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 112, 112, 16)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 110, 110, 64)      9280      
                                                                 
 batch_nor

In [None]:
history = model.fit(
    train_data, train_labels_cat, 
    validation_data=(val_data, val_labels_cat),
    epochs=20, 
    batch_size=32, 
    callbacks=[early_stopping, checkpoint]
)

Epoch 1/20



  saving_api.save_model(


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


: 

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix


test_loss, test_accuracy = model.evaluate(test_data, test_labels_cat)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


y_pred = model.predict(test_data)  
y_pred_classes = np.argmax(y_pred, axis=1) 

y_true_classes = np.argmax(test_labels_cat, axis=1)

conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()