In [7]:
from numpy import array
from cv2 import imread, resize, calcHist
from os import listdir
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from kagglehub import dataset_download

## Fontes
Transformação das imagens em dados que podem ser usados pelos classificadores do sklearn (image to feature): https://pyimagesearch.com/2016/08/08/k-nn-classifier-for-image-classification/ <br>
Classificador KNN: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html <br>
Treinamento dos dados: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html <br>
Métricas do modelo: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html <br>

In [8]:
# Parametros de entrada

n = 3 # Numero de vizinhos do knn
img_size = 64 # As imagens sao padronizadas para img_size x img_size pixels para a classificacao

In [9]:
# Função para extrair features
def extract_features(img_path):
    img = imread(img_path)
    img = resize(img, (img_size,img_size))
    hist = calcHist([img], [0,1,2], None, [8,8,8], [0,256,0,256,0,256])
    return hist.flatten()

In [10]:
# Inicializacao dos parametros e processamento das imagens

# Baixar dataset com kagglehub
path = dataset_download("sumn2u/garbage-classification-v2")
print("Path to dataset files:", path)

classes = listdir(f"{path}/garbage-dataset")

data = []
labels = []

"""
    Iteramos por cada imagem de cada categoria de lixo, utilizando a tecnica de histograma de cores para
    transformar as imagens em vetores que podem ser utilizados pelos classificadores do sklearn.
"""
for class_name in classes:
    class_path = f"{path}/garbage-dataset/{class_name}"
    img_list = listdir(class_path)

    for image_name in img_list:
        image_path = f"{path}/garbage-dataset/{class_name}/{image_name}"
        image = extract_features(image_path)

        if image is not None:
            data.append(image)
            labels.append(class_name)

data = array(data)
labels = array(labels)

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

Path to dataset files: /kaggle/input/garbage-classification-v2


In [11]:
# Treinamento do classificador KNN

knn = KNeighborsClassifier(n_neighbors=n)
knn.fit(X_train, y_train)

In [12]:
# Avaliacao do modelo

y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     battery       0.34      0.56      0.42       182
  biological       0.28      0.60      0.38       200
   cardboard       0.44      0.57      0.50       357
     clothes       0.71      0.70      0.70      1100
       glass       0.51      0.47      0.49       583
       metal       0.37      0.31      0.34       207
       paper       0.42      0.45      0.44       348
     plastic       0.53      0.31      0.39       404
       shoes       0.42      0.25      0.31       390
       trash       0.45      0.36      0.40       182

    accuracy                           0.50      3953
   macro avg       0.45      0.46      0.44      3953
weighted avg       0.52      0.50      0.50      3953

