# Bibliotecas

In [40]:
from pathlib import Path
import os
import pandas as pd

import pickle

from sklearn.model_selection import train_test_split

# Criar dataframe a partir do dataset

In [41]:
dataset="/kaggle/input/garbage-classification/garbage_classification"
image_dir = Path(dataset)

# Get filepaths and labels
filepaths = list(image_dir.glob(r'**/*.JPG')) + list(image_dir.glob(r'**/*.jpg')) + list(image_dir.glob(r'**/*.png')) + list(image_dir.glob(r'**/*.png'))

labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

# Concatenate filepaths and labels
image_df = pd.concat([filepaths, labels], axis=1)

In [42]:
image_df

Unnamed: 0,Filepath,Label
0,/kaggle/input/garbage-classification/garbage_c...,metal
1,/kaggle/input/garbage-classification/garbage_c...,metal
2,/kaggle/input/garbage-classification/garbage_c...,metal
3,/kaggle/input/garbage-classification/garbage_c...,metal
4,/kaggle/input/garbage-classification/garbage_c...,metal
...,...,...
15510,/kaggle/input/garbage-classification/garbage_c...,green-glass
15511,/kaggle/input/garbage-classification/garbage_c...,green-glass
15512,/kaggle/input/garbage-classification/garbage_c...,green-glass
15513,/kaggle/input/garbage-classification/garbage_c...,green-glass


# Separar em conjuntos de treino e teste

In [43]:
train_df1, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42)

In [44]:
train_df1.shape

(12412, 2)

In [45]:
test_df.shape

(3103, 2)

# Criar o conjunto de validação a partir do conjunto de treino

In [46]:
train_df, val_df = train_test_split(train_df1, test_size=0.2, shuffle=True, random_state=42)

In [47]:
train_df.shape

(9929, 2)

In [48]:
val_df.shape

(2483, 2)

# Salvar os dataframes em arquivos binários

In [49]:
pickle.dump(train_df, open("/kaggle/working/train_df.pkl", 'wb'))
pickle.dump(val_df, open("/kaggle/working/val_df.pkl", 'wb'))
pickle.dump(test_df, open("/kaggle/working/test_df.pkl", 'wb'))

# Recuperar os dataframes a partir dos arquivos binários

Dado que a pessoa tem acesso ao dataset "garbage_classification", queremos que seja possível separá-lo sempre nos conjuntos de treino, validação e teste que criamos acima.

Dessa forma, garantimos que nossos modelos sejam sempre treinados com o mesmo conjunto de treino, validados com o mesmo conjunto de validação e testados com o mesmo conjunto de teste.

In [50]:
train = pickle.load(open("/kaggle/input/garbage-classification-sets/train_df.pkl", 'rb'))
val = pickle.load(open("/kaggle/input/garbage-classification-sets/val_df.pkl", 'rb'))
test = pickle.load(open("/kaggle/input/garbage-classification-sets/test_df.pkl", 'rb'))

In [51]:
train.shape

(9929, 2)

In [52]:
val.shape

(2483, 2)

In [53]:
test.shape

(3103, 2)

# Alterar os endereços das imagens

Neste notebook, os endereços das imangens começam com:

"/kaggle/input/garbage-classification/"

Porém, pode ser que no seu projeto o dataset esteja armazenado em outro endereço.

Vamos ver como alterar os endereços no dataframe para adequá-los ao seu projeto local.

Para exemplificar, o novo endereço deverá começar com "drive/MyDrive/CPQD", mas você pode substituí-lo com o que for necessário.

In [54]:
train

Unnamed: 0,Filepath,Label
6891,/kaggle/input/garbage-classification/garbage_c...,shoes
11225,/kaggle/input/garbage-classification/garbage_c...,clothes
9241,/kaggle/input/garbage-classification/garbage_c...,clothes
13175,/kaggle/input/garbage-classification/garbage_c...,clothes
9170,/kaggle/input/garbage-classification/garbage_c...,clothes
...,...,...
4642,/kaggle/input/garbage-classification/garbage_c...,battery
7072,/kaggle/input/garbage-classification/garbage_c...,shoes
2321,/kaggle/input/garbage-classification/garbage_c...,biological
73,/kaggle/input/garbage-classification/garbage_c...,metal


In [55]:
train['Filepath']=train.Filepath.str.replace("/kaggle/input/garbage-classification", "drive/MyDrive/CPQD")

In [56]:
train

Unnamed: 0,Filepath,Label
6891,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes
11225,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
9241,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
13175,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
9170,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
...,...,...
4642,drive/MyDrive/CPQD/garbage_classification/batt...,battery
7072,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes
2321,drive/MyDrive/CPQD/garbage_classification/biol...,biological
73,drive/MyDrive/CPQD/garbage_classification/meta...,metal


Agora é só repetir o mesmo processo para os conjuntos de validação e teste.

In [58]:
val['Filepath']=val.Filepath.str.replace("/kaggle/input/garbage-classification", "drive/MyDrive/CPQD")
test['Filepath']=test.Filepath.str.replace("/kaggle/input/garbage-classification", "drive/MyDrive/CPQD")

In [59]:
val

Unnamed: 0,Filepath,Label
5677,drive/MyDrive/CPQD/garbage_classification/tras...,trash
13923,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
12266,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
4358,drive/MyDrive/CPQD/garbage_classification/batt...,battery
1914,drive/MyDrive/CPQD/garbage_classification/biol...,biological
...,...,...
9769,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
10689,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
7345,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes
7483,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes


In [60]:
test

Unnamed: 0,Filepath,Label
11755,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
1011,drive/MyDrive/CPQD/garbage_classification/whit...,white-glass
8753,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
3668,drive/MyDrive/CPQD/garbage_classification/brow...,brown-glass
12097,drive/MyDrive/CPQD/garbage_classification/clot...,clothes
...,...,...
6436,drive/MyDrive/CPQD/garbage_classification/card...,cardboard
8072,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes
5312,drive/MyDrive/CPQD/garbage_classification/tras...,trash
7729,drive/MyDrive/CPQD/garbage_classification/shoe...,shoes
