This notebook has been imported from Kaggle. Please note that the dataset used in this notebook is not included here. However, you can find the dataset [here](https://www.kaggle.com/datasets/joebeachcapital/realwaste).


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Data Loading and Preprocessing

In [None]:
import tensorflow as tf, os, subprocess, sys

print("TF:", tf.__version__)
print("Physical GPUs:", tf.config.list_physical_devices("GPU"))

# Optional, see driver details:
!nvidia-smi

In [None]:
gpus = tf.config.list_physical_devices('GPU')
for g in gpus:
    tf.config.experimental.set_memory_growth(g, True)

In [None]:
# Mixed precision for T4 Tensor Cores
from tensorflow.keras import mixed_precision

mixed_precision.set_global_policy("mixed_float16")
strategy = tf.distribute.MirroredStrategy()

### Data Loading

In [None]:
SEED = 42
IMG_SIZE = (224, 224)
BATCH = 64
AUTOTUNE = tf.data.AUTOTUNE
DATA_DIR = "/kaggle/input/realwaste/realwaste-main/RealWaste" 

In [None]:
train_raw = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.30,
    subset="training",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH,
)

temp_raw = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.30,
    subset="validation",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=True,
)

class_names = train_raw.class_names
num_classes = len(class_names)
print("Classes:", class_names)

In [None]:
temp_batches = temp_raw.cardinality().numpy()  # should be known for this dataset
val_raw  = temp_raw.take(temp_batches // 2)
test_raw = temp_raw.skip(temp_batches // 2)

### Data Preprocessing

In [None]:
import matplotlib.pyplot as plt

In [None]:
def count_labels(ds, num_classes):
    counts = np.zeros(num_classes, dtype=np.int64)
    for _, y in ds.unbatch():
        # y may be int (sparse) or one-hot; normalize to int
        if len(y.shape) == 0:
            counts[int(y.numpy())] += 1
        else:
            counts[int(np.argmax(y.numpy()))] += 1
    return counts

train_counts = count_labels(train_raw, len(class_names))
val_counts   = count_labels(val_raw,   len(class_names))
test_counts  = count_labels(test_raw,  len(class_names))

def bar_counts(title, counts):
    plt.figure(figsize=(10,4))
    plt.title(title)
    plt.bar(class_names, counts)
    plt.xticks(rotation=45, ha='right')
    plt.ylabel("images")
    plt.tight_layout()
    plt.show()

bar_counts("Train class distribution", train_counts)
bar_counts("Validation class distribution", val_counts)
bar_counts("Test class distribution", test_counts)

In [None]:
ys = []
for _, y in train_raw.unbatch().take(2000):
    ys.append(y.numpy())

ys = np.array(ys)
print("dtype:", ys.dtype, "shape:", ys.shape)
print("min/max:", ys.min(), ys.max())
print("unique:", np.unique(ys)[:20]) 