In [8]:
! pip install torchvision

Collecting torchvision
  Downloading torchvision-0.21.0-cp310-cp310-win_amd64.whl (1.6 MB)
     ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
     --- ------------------------------------ 0.2/1.6 MB 4.5 MB/s eta 0:00:01
     ---------------- ----------------------- 0.6/1.6 MB 8.1 MB/s eta 0:00:01
     ------------------------------- -------- 1.2/1.6 MB 9.7 MB/s eta 0:00:01
     ---------------------------------------  1.6/1.6 MB 9.0 MB/s eta 0:00:01
     ---------------------------------------- 1.6/1.6 MB 8.2 MB/s eta 0:00:00
Collecting torch==2.6.0
  Downloading torch-2.6.0-cp310-cp310-win_amd64.whl (204.2 MB)
     ---------------------------------------- 0.0/204.2 MB ? eta -:--:--
     --------------------------------------- 0.2/204.2 MB 12.2 MB/s eta 0:00:17
     ---------------------------------------- 0.8/204.2 MB 9.6 MB/s eta 0:00:22
     ---------------------------------------- 1.3/204.2 MB 8.9 MB/s eta 0:00:23
     ---------------------------------------- 1.


[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: C:\Users\vidia\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [9]:
import pandas as pd
import numpy as np
from torchvision import datasets


In [24]:


# Path to the dataset folder, not a single class folder
dataset_path = r"C:\Users\vidia\OneDrive\Documents\mlp_proj\mlp_assignment\data\dataset-resized"
dataset_path2 = r"C:\Users\vidia\OneDrive\Documents\mlp_proj\mlp_assignment\data\realwaste-main\RealWaste"
# Load dataset correctly
trashNet_dataset = datasets.ImageFolder(root=dataset_path)
realWaste_dataset = datasets.ImageFolder(root=dataset_path2)


In [38]:
class_mapping = {
"cardboard": "cardboard",  
"Cardboard": "cardboard",  
"Glass": "glass",
"glass": "glass",
"Metal": "metal",
"metal": "metal",
"paper": "paper",
"Paper": "paper",
"plastic": "plastic",
"Plastic": "plastic",
"trash": "trash",
"Food Organics": "trash",
"Miscellaneous Trash": "trash",
}
 

In [27]:
from torch.utils.data import Dataset
 
class MergedDataset(Dataset):
    def __init__(self, datasets, class_mapping):
        self.samples = []
        self.class_mapping = class_mapping
        self.new_classes = sorted(set(class_mapping.values()))  # Unique merged class names
        self.class_to_idx = {cls: i for i, cls in enumerate(self.new_classes)}  # New class indices
 
        # Process each dataset
        for dataset in datasets:
            for img_path, label in dataset.samples:
                original_class = dataset.classes[label]
                if original_class in class_mapping:
                    new_label = self.class_to_idx[class_mapping[original_class]]
                    self.samples.append((img_path, new_label))
 
        self.transform = datasets[0].transform  # Use the same transform
 
    def __len__(self):
        return len(self.samples)
 
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = datasets.folder.default_loader(img_path)  # Load image
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
merged_dataset = MergedDataset([trashNet_dataset, realWaste_dataset], class_mapping)
 
# Create DataLoader
from torch.utils.data import DataLoader
train_loader = DataLoader(merged_dataset, batch_size=32, shuffle=True)
 
# Print new class names
print("Merged Classes:", merged_dataset.new_classes)

In [32]:
import numpy as np
import time

import PIL.Image as Image
import matplotlib.pylab as plt

import tensorflow as tf
import tensorflow_hub as hub

In [36]:
batch_size = 32
img_height = 256
img_width = 256
data_root = 'dataset_augmented/'

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  str(data_root),
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
 

Smallest width: 512, Smallest height: 384


In [None]:
import tensorflow as tf
import os
import shutil

# Paths to dataset directories
data_root1 = 'dataset_folder1/'  # First dataset
data_root2 = 'dataset_folder2/'  # Second dataset

# Extract class names from both datasets
classes1 = sorted(os.listdir(data_root1))
classes2 = sorted(os.listdir(data_root2))

# Print detected class names
print("Classes in dataset 1:", classes1)
print("Classes in dataset 2:", classes2)

# Manual mapping if class names differ
class_mapping = {
"cardboard": "cardboard",  
"Cardboard": "cardboard",  
"Glass": "glass",
"glass": "glass",
"Metal": "metal",
"metal": "metal",
"paper": "paper",
"Paper": "paper",
"plastic": "plastic",
"Plastic": "plastic",
"trash": "trash",
"Food Organics": "trash",
"Miscellaneous Trash": "trash",
}
 
# Create a unified directory structure
unified_dataset_path = 'dataset_merged/'
os.makedirs(unified_dataset_path, exist_ok=True)

# Function to copy & relabel images into a unified dataset
def copy_and_relabel_images(source_folder, target_folder, mapping):
    for original_class in os.listdir(source_folder):
        if original_class in mapping:  # Only process known classes
            new_class = mapping[original_class]
            source_class_path = os.path.join(source_folder, original_class)
            target_class_path = os.path.join(target_folder, new_class)

            os.makedirs(target_class_path, exist_ok=True)  # Create target class folder if not exist
            
            for img_name in os.listdir(source_class_path):
                source_img_path = os.path.join(source_class_path, img_name)
                target_img_path = os.path.join(target_class_path, img_name)
                shutil.copy2(source_img_path, target_img_path)  # Copy image to correct folder

# Copy images from both datasets into the unified dataset
copy_and_relabel_images(data_root1, unified_dataset_path, class_mapping)
copy_and_relabel_images(data_root2, unified_dataset_path, class_mapping)

print("✅ Dataset merging and relabeling completed!")

# Now, use `image_dataset_from_directory` on the new merged dataset
batch_size = 32
img_height = 256
img_width = 256

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    unified_dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    unified_dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)
