In [71]:
# Necessary imports
import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from torchvision.models import detection
from PIL import Image
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from matplotlib.patches import Rectangle

import train

# Check for device: use MPS if available, otherwise use CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Define dataset path
dataset_path = './dataset1'

# Collect data helper functions
def is_valid_file(file_path):
    valid_extensions = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp']
    return any(file_path.endswith(ext) for ext in valid_extensions)

def collect_data(directory):
    data = []
    labels = []
    class_to_idx = {cls: idx for idx, cls in enumerate(os.listdir(directory)) if os.path.isdir(os.path.join(directory, cls))}
    for cls, idx in class_to_idx.items():
        class_path = os.path.join(directory, cls)
        for file_path in glob.glob(os.path.join(class_path, '*')):
            if is_valid_file(file_path):
                data.append(file_path)
                labels.append(idx)
    return data, labels, class_to_idx

# Data transformations and dataset preparation
transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

# Custom Dataset Class
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data[idx]
        label = self.labels[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Load and split dataset
data, labels, class_to_idx = collect_data(dataset_path)
dataset = CustomImageDataset(data, labels, transform)
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Compute class weights for imbalanced datasets
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32, device=device)

# Waste Classifier Model
class WasteClassifier(nn.Module):
    def __init__(self, num_classes):
        super(WasteClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(64 * 56 * 56, 1024),
            nn.ReLU(),
            nn.Linear(1024, len(class_to_idx))
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Custom ResNet Model
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = [block(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

def resnet18(num_classes):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)


class_weights_dict = dict(zip(np.unique(labels), class_weights))
def run_faster_cnn(img, model, classes):
    img = np.array(Image.open(img))
    img = transforms.toTensor()(img)
    out = model([img])
    scores = out[0]['scores']
    boxes = out[0]['boxes']
    classes = out[0]['classes']
    if len(scores) > 0:
        plt.imshow(img)
        ax = plt.gca()
        for box in boxes.detach().numpy():
            xbox_min, ybox_min, xbox_max, ybox_max = box
            width = xbox_max - xbox_min
            height = ybox_max - ybox_min
            rect = Rectangle((xbox_min, ybox_min), width, height, linewidth=2, edgecolour='r', facecolor='none')
            ax.add_patch(rect)
        plt.show()
    class_labels = [classes[val.item()-1] for val in classes]
    return class_labels

def load_model(weights_dict):
    # frcnn_args_inference = {'box_score_thresh': 0.75, 'box_detections_per_img': 32}
    model = detection.fasterrcnn_resnet50_fpn(weights_dict)
    model.eval()

    return model

faster_cnn_model = load_model(class_weights_dict)
faster_cnn_model._modules.keys()
# model3 = run_faster_cnn(img, faster_cnn_model, num_classes=len(class_to_idx)).to(device)

# YOLO model
class YOLO(nn.Module):
    def __init__(self, num_classes=6):
        super(YOLO, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 48, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(48, 32, kernel_size=1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(64, 64, kernel_size=1)
        self.conv6 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv7 = nn.Conv2d(128, 64, kernel_size=1)
        self.conv8 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv9 = nn.Conv2d(128, 64, kernel_size=1)
        self.conv10 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv11 = nn.Conv2d(128, 64, kernel_size=1)
        self.conv12 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv13 = nn.Conv2d(128, 64, kernel_size=1)
        self.conv14 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv15 = nn.Conv2d(128, 128, kernel_size=1)
        self.conv16 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv17 = nn.Conv2d(256, 128, kernel_size=1)
        self.conv18 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv19 = nn.Conv2d(256, 128, kernel_size=1)
        self.conv20 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv21 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv22 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv23 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv24 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv25 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv26 = nn.Conv2d(256, 128, kernel_size=1)
        self.conv27 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv28 = nn.Conv2d(256, 1024, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(1024, 1024)
        self.fc2 = nn.Linear(1024, 6)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.maxpool3(x)
        x = F.relu(self.conv7(x))
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv9(x))
        x = F.relu(self.conv10(x))
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = F.relu(self.conv13(x))
        x = F.relu(self.conv14(x))
        x = F.relu(self.conv15(x))
        x = F.relu(self.conv16(x))
        x = self.maxpool4(x)
        x = F.relu(self.conv17(x))
        x = F.relu(self.conv18(x))
        x = F.relu(self.conv19(x))
        x = F.relu(self.conv20(x))
        x = F.relu(self.conv21(x))
        x = F.relu(self.conv22(x))
        x = F.relu(self.conv23(x))
        x = F.relu(self.conv24(x))
        x = F.relu(self.conv25(x))
        x = F.relu(self.conv26(x))
        x = F.relu(self.conv27(x))
        x = F.relu(self.conv28(x))
        x = F.relu(self.fc1(x.view(x.size(0), -1)))
        x = self.fc2(x)
        return F.softmax(x, dim=1)


# Instantiate both models
model1 = WasteClassifier(num_classes=len(class_to_idx)).to(device)
model2 = resnet18(num_classes=len(class_to_idx)).to(device)


# Optimizers and loss functions
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
criterion1 = nn.CrossEntropyLoss(weight=class_weights_tensor)
criterion2 = nn.CrossEntropyLoss(weight=class_weights_tensor)


# Unified training function for both models
import torch

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Train both models separately
print("\nTraining WasteClassifier...")
train.train_model(model1, optimizer1, criterion1, train_loader, val_loader)

print("\nTraining Custom ResNet Model...")
train.train_model(model2, optimizer2, criterion2, train_loader, val_loader)

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    total_test = 0
    correct_test = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            total_test += labels.size(0)
            correct_test += (preds == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = correct_test / total_test * 100
    print(f'Test Accuracy: {accuracy:.2f}%')

    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

# Evaluate both models
print("\nEvaluating WasteClassifier...")
evaluate_model(model1, test_loader)

print("\nEvaluating Custom ResNet Model...")
evaluate_model(model2, test_loader)


Using device: cpu




Using device: cpu

Training WasteClassifier...
Epoch 1: Train Loss: 3.0132, Train Acc: 23.13%, Val Loss: 1.5606, Val Acc: 27.18%


RuntimeError: Parent directory checkpoints does not exist.