In [21]:
import cv2
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report , confusion_matrix

In [14]:
input_root = "dataset"
output_root = "augmented_dataset"

In [8]:
# Computing the minimum number of clean images per class
minCleanImages = 9999999999
for class_name in os.listdir(input_root):
    class_path = os.path.join(input_root, class_name)
    if not os.path.isdir(class_path):
        continue

    # Create class folder inside augmented_dataset
    output_class_path = os.path.join(output_root, class_name)
    os.makedirs(output_class_path, exist_ok=True)
    counter = 0
    # Checking the number of images that are clean
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        if img is None:
            continue
        counter+=1
    minCleanImages = min(minCleanImages, counter)    
    print(f"Processed {counter} images in class '{class_name}'")
print(f"Minimum number of clean images across classes: {minCleanImages}")

Processed 247 images in class 'cardboard'
Processed 385 images in class 'glass'
Processed 315 images in class 'metal'
Processed 449 images in class 'paper'
Processed 363 images in class 'plastic'
Processed 106 images in class 'trash'
Minimum number of clean images across classes: 106


When augmenting the data, we want the sample size for each class to be similar, so we decided to set a limit for the augmentation size.

In [9]:
#Augmentation Techniques
def rotate_image(img, angle):
    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((w/2, h/2), angle, 1)
    return cv2.warpAffine(img, M, (w, h))

def add_gaussian_noise(img):
    mean = 0
    std = 5      # adjust noise level if needed
    noise = np.random.normal(mean, std, img.shape).astype(np.uint8)
    noisy_img = cv2.add(img, noise)
    return noisy_img

def change_brightness(img):
    # Random brightness adjustment between -50 and +50
    value = random.randint(-50, 50)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    # Convert to int32 to avoid overflow
    v = v.astype(np.int32)
    v = np.clip(v + value, 0, 255).astype(np.uint8)

    final_hsv = cv2.merge((h, s, v))
    bright_img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return bright_img


In [10]:
numberOfAugmentationTecniques = 4 

# Loop over classes
for class_name in os.listdir(input_root):
    class_path = os.path.join(input_root, class_name)
    if not os.path.isdir(class_path):
        continue

    # Create class folder inside augmented_dataset
    output_class_path = os.path.join(output_root, class_name)
    os.makedirs(output_class_path, exist_ok=True)
    # duplicating original images and updating their name
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue
        filename = os.path.splitext(img_name)[0]
        # Save original image
        cv2.imwrite(os.path.join(output_class_path, f"{filename}_orig.png"), img)
    # Loop over images
    cnt = len(os.listdir(class_path))
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        filename = os.path.splitext(img_name)[0]
        # ----- Augmentations -----
        # 1. Flip
        flipped = cv2.flip(img, 1)
        cv2.imwrite(os.path.join(output_class_path, f"{filename}_flip.png"), flipped)
        cnt += 1 
        if ( cnt > minCleanImages * numberOfAugmentationTecniques ) :
            break 
        # 2. Rotation
        angle = random.randint(-30, 30)  # rotate between -30 to +30 degrees
        rotated = rotate_image(img, angle)
        cv2.imwrite(os.path.join(output_class_path, f"{filename}_rot.png"), rotated)
        cnt += 1 
        if ( cnt > minCleanImages * numberOfAugmentationTecniques ) :
            break 
        # 3. Gaussian Noise
        noisy = add_gaussian_noise(img)
        cv2.imwrite(os.path.join(output_class_path, f"{filename}_noise.png"), noisy)
        cnt += 1 
        if ( cnt > minCleanImages * numberOfAugmentationTecniques ) :
            break 
        # 4. Brightness Change
        bright = change_brightness(img)
        cv2.imwrite(os.path.join(output_class_path, f"{filename}_bright.png"), bright)
        cnt += 1 
        if ( cnt > minCleanImages * numberOfAugmentationTecniques ) :
            break 
print("Augmentation completed successfully!")

Augmentation completed successfully!


In [11]:
X = []  # features
y = []  # labels
# Loop over classes
for class_name in os.listdir(output_root):
    class_path = os.path.join(output_root, class_name)
    if not os.path.isdir(class_path):
        continue
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        resized_image = cv2.resize(img,(384,384))
        img_gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
        features = img_gray.flatten()
        features = features / 255.0
        X.append(features)
        y.append(class_name)

In [28]:
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=7, stratify=y
)

In [29]:
print("X_train:", X_train)
print("X_test:", X_test)
print("y_train:", y_train)
print("y_test:", y_test)

X_train: [[0.56470588 0.57254902 0.57254902 ... 0.90588235 0.90588235 0.90588235]
 [0.80392157 0.81960784 0.83529412 ... 0.61176471 0.61568627 0.60392157]
 [0.74901961 0.74901961 0.74901961 ... 0.06666667 0.05098039 0.05490196]
 ...
 [0.68235294 0.70196078 0.69803922 ... 0.42745098 0.41960784 0.41176471]
 [0.69803922 0.69803922 0.69803922 ... 0.56862745 0.56862745 0.56862745]
 [0.75686275 0.76078431 0.76862745 ... 0.35686275 0.36470588 0.35686275]]
X_test: [[0.58823529 0.58039216 0.58039216 ... 0.32941176 0.34901961 0.36470588]
 [0.96862745 0.98431373 0.99215686 ... 0.85490196 0.85490196 0.85098039]
 [0.98039216 0.94901961 0.98823529 ... 0.5372549  0.83529412 0.43529412]
 ...
 [0.90196078 0.90588235 0.90588235 ... 0.67058824 0.67058824 0.67058824]
 [0.92156863 0.92941176 0.93333333 ... 0.40784314 0.40784314 0.40392157]
 [0.8        0.8        0.79607843 ... 0.59215686 0.58823529 0.58823529]]
y_train: ['plastic' 'cardboard' 'cardboard' ... 'cardboard' 'metal' 'cardboard']
y_test: ['card

In [30]:
knn = KNeighborsClassifier(n_neighbors=3)  # you can choose k
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

# Optional: detailed classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Optional: confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Test Accuracy: 0.44223107569721115

Classification Report:
               precision    recall  f1-score   support

   cardboard       0.38      0.61      0.47        83
       glass       0.44      0.40      0.42        82
       metal       0.58      0.18      0.28        83
       paper       0.50      0.36      0.42        90
     plastic       0.40      0.60      0.48        80
       trash       0.52      0.51      0.51        84

    accuracy                           0.44       502
   macro avg       0.47      0.44      0.43       502
weighted avg       0.47      0.44      0.43       502


Confusion Matrix:
 [[51  6  2  9  8  7]
 [13 33  4  7 20  5]
 [28  9 15  4 13 14]
 [18  7  1 32 23  9]
 [10  9  0  8 48  5]
 [14 11  4  4  8 43]]
