<a href="https://colab.research.google.com/github/adrynalean/PI_Summer_Internship/blob/prototype_v1_CNN_Module/waste_sorting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Food Waste Sorting Machine

### 1. Accumulating data

In [1]:
# # Uploading the json file from kaggle

# from google.colab import files
# files.upload()

In [2]:
# INstalling kaggle API and configuring it

!pip install kaggle
!mkdir ~/.kaggle/
!cp "/content/drive/MyDrive/Food_Waste_project/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



In [3]:
# Downloading the dataset from kaggle
!kaggle datasets download -d joebeachcapital/realwaste

Dataset URL: https://www.kaggle.com/datasets/joebeachcapital/realwaste
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading realwaste.zip to /content
 99% 652M/657M [00:07<00:00, 85.1MB/s]
100% 657M/657M [00:07<00:00, 95.6MB/s]


In [4]:
# unzip the dataset

import zipfile
import os
zip_path = "/content/realwaste.zip"
extract_dir = "/content/realwaste"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
  zip_ref.extractall(extract_dir)

In [5]:
# Let's see the dataset directory structure

os.listdir(extract_dir)

['realwaste-main']

## Define data transformations and data loaders

In [6]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

# define data transformations
data_transforms = {
    'train': transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
}


# Set data directories
data_dir = '/content/realwaste/realwaste-main/RealWaste'


In [7]:
# Creating train and test split
from torch.utils.data import random_split

# Create dataset
dataset = datasets.ImageFolder(root=data_dir, transform=data_transforms['train'])

total_size = len(dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


In [8]:
# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)



class_names = dataset.classes

print(f"Classes: {class_names}")
print(f"Dataset Sizes: Train: {train_size}, Test: {test_size}")

Classes: ['Cardboard', 'Food Organics', 'Glass', 'Metal', 'Miscellaneous Trash', 'Paper', 'Plastic', 'Textile Trash', 'Vegetation']
Dataset Sizes: Train: 3801, Test: 951




## 2. Build the Model

In [9]:
# Importing dependencies
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [10]:
# Make the model

class wasteCNN(nn.Module):
  def __init__(self, num_classes):
    super(wasteCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 32, kernel_size = 3, stride = 1, padding = 1)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding= 1)
    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
    self.pool = nn.MaxPool2d(kernel_size=2, stride = 2, padding=0)
    self.fc1 = nn.Linear(128 * 28 * 28, 512)
    self.fc2 = nn.Linear( 512, num_classes)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool(F.relu(self.conv3(x)))
    x = x.view(-1, 128 * 28 *28) # flatten the tensor
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x

# Use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

num_classes = len(class_names)
model = wasteCNN(num_classes)
model = model.to(device)


In [11]:
# Setting up loss function and optimizer

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [12]:
# training loop
num_epochs = 25

for epoch in range(num_epochs):
  print(f'Epoch {epoch+1}/{num_epochs}')
  print('-' * 10)

  # Each epoch has training and validaiton phase
  for phase in ['train', 'test']:
    if phase == 'train':
      model.train()
    else:
      model.eval()

    running_loss = 0.0
    running_corrects = 0


    for inputs, labels in (train_loader if phase == 'train' else test_loader):
      inputs = inputs.to(device)
      labels = labels.to(device)

      # 1. optimizer zero grad
      optimizer.zero_grad()

      # 2. Forward pass
      with torch.set_grad_enabled(phase == 'train'):
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # Backpropogation if only in training pahse
        if phase == 'train':
          loss.backward()
          optimizer.step()

      # stats
      running_loss += loss.item() * inputs.size(0)
      running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss/ len(train_dataset if phase =='train' else test_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset if phase == 'train' else test_dataset)

    print(f'{phase} Loss : {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

  print()

print('Training Complete')



Epoch 1/25
----------
train Loss : 2.0962 Acc: 0.2910
test Loss : 1.6232 Acc: 0.4143

Epoch 2/25
----------
train Loss : 1.4489 Acc: 0.4765
test Loss : 1.3821 Acc: 0.5100

Epoch 3/25
----------
train Loss : 1.3427 Acc: 0.5238
test Loss : 1.3028 Acc: 0.5247

Epoch 4/25
----------
train Loss : 1.2314 Acc: 0.5656
test Loss : 1.2210 Acc: 0.5499

Epoch 5/25
----------
train Loss : 1.1689 Acc: 0.5756
test Loss : 1.1636 Acc: 0.5773

Epoch 6/25
----------
train Loss : 1.0993 Acc: 0.5983
test Loss : 1.1161 Acc: 0.5889

Epoch 7/25
----------
train Loss : 1.1036 Acc: 0.6069
test Loss : 1.2090 Acc: 0.5626

Epoch 8/25
----------
train Loss : 1.0217 Acc: 0.6285
test Loss : 1.0222 Acc: 0.6320

Epoch 9/25
----------
train Loss : 0.9680 Acc: 0.6461
test Loss : 1.0360 Acc: 0.6246

Epoch 10/25
----------
train Loss : 0.9471 Acc: 0.6546
test Loss : 0.9626 Acc: 0.6467

Epoch 11/25
----------
train Loss : 0.9479 Acc: 0.6609
test Loss : 1.0146 Acc: 0.6372

Epoch 12/25
----------
train Loss : 0.8903 Acc: 0.67

In [13]:
# Save the model
torch.save(model.state_dict(), 'custom_cnn_waste_classifier.pth')

## 3. Evaluate model

In [14]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the model on validation images: {100 * correct / total:.2f}%')

# Inference on new images
from PIL import Image

def predict_image(image_path):
    image = Image.open(image_path)
    image = data_transforms['test'](image).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)

    return class_names[predicted.item()]



Accuracy of the model on validation images: 69.30%
