In [None]:
# !pip install kagglehub

In [None]:
# import kagglehub
# import shutil

# # Download dataset
# path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")
# print("Path to dataset files:", path)

# # Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive')

# # Copy to your target folder in Drive
# target_path = "/content/drive/MyDrive/RecycleVision/datas/"
# shutil.copytree(path, target_path, dirs_exist_ok=True)

# print("Dataset copied to:", target_path)

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

# Path where dataset was copied
dataset_path = "/content/drive/MyDrive/RecycleVision/datas/garbage-dataset"

# List all class folders
classes = os.listdir(dataset_path)
print("Classes:", classes)
print("Total classes:", len(classes))

# Count images in each class
for cls in classes:
    cls_path = os.path.join(dataset_path, cls)
    num_images = len(os.listdir(cls_path))
    print(f"{cls}: {num_images} images")

Classes: ['battery', 'biological', 'cardboard', 'clothes', 'glass', 'paper', 'plastic', 'trash', 'shoes', 'metal']
Total classes: 10
battery: 944 images
biological: 997 images
cardboard: 1825 images
clothes: 5327 images
glass: 3061 images
paper: 1680 images
plastic: 1984 images
trash: 947 images
shoes: 1977 images
metal: 1020 images


In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

data_dir = "/content/drive/MyDrive/RecycleVision/datas/garbage-dataset"

classes = os.listdir(data_dir)
print('Classes: ', classes)
print('Total Classes: ', len(classes))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Classes:  ['battery', 'biological', 'cardboard', 'clothes', 'glass', 'paper', 'plastic', 'trash', 'shoes', 'metal']
Total Classes:  10


In [None]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406],  #imagenet Mean
                         std = [0.229,0.224,0.225]) #imageNet std
])

val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std = [0.229,0.224,0.225])
])

#loadingDataset
full_dataset = datasets.ImageFolder(data_dir,transform=train_transform)

#traintestsplit
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

#for validation , override transform
val_dataset.dataset.transform = val_transform

print(f"Total images: {len(full_dataset)}")
print(f"Training: {len(train_dataset)}")
print(f"Validation: {len(val_dataset)}")

Total images: 19762
Training: 15809
Validation: 3953


In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print("Train loader batches:", len(train_loader))
print("Validation loader batches:", len(val_loader))

Train loader batches: 495
Validation loader batches: 124


In [None]:
from collections import Counter

class_counts = Counter([label for _, label in full_dataset.samples])    #full_dataset.samples is a list of (image_path, label) tuples.

class_names = full_dataset.classes
for cls_idx, count in class_counts.items():
    print(f"{class_names[cls_idx]}: {count} images")

battery: 944 images
biological: 997 images
cardboard: 1825 images
clothes: 5327 images
glass: 3061 images
metal: 1020 images
paper: 1680 images
plastic: 1984 images
shoes: 1977 images
trash: 947 images


In [None]:
from torch.utils.data import WeightedRandomSampler
import numpy as np

class_counts = np.bincount([label for _, label in full_dataset.samples])    #counts how many times each label appear
class_weights = 1./class_counts #inverse freq

#assign weight to each sample(image)
sample_weights = [class_weights[label] for _, label in full_dataset.samples]

#sampler for training set
train_sampler = WeightedRandomSampler(
    weights=sample_weights[:len(train_dataset)],
    num_samples=len(train_dataset),
    replacement=True
)

# Update DataLoaders with sampler
train_loader = DataLoader(train_dataset, batch_size=32, sampler=train_sampler)

print("Imbalance handled: using WeightedRandomSampler for training")


Imbalance handled: using WeightedRandomSampler for training


In [None]:
1.0/class_counts

array([0.00105932, 0.00100301, 0.00054795, 0.00018772, 0.00032669,
       0.00098039, 0.00059524, 0.00050403, 0.00050582, 0.00105597])

In [None]:
labels = [label for _,label in full_dataset.samples]
classCounts = np.bincount(labels)
classWeights = 1.0/class_counts

len(train_dataset)

15809

In [None]:
sample_weights = [class_weights[label] for _, label in full_dataset.samples]

len(sample_weights[:len(train_dataset)])

15809