In [None]:

import os
from google.colab import drive
import torch

drive.mount('/content/drive')

!pip install -q timm

BASE_PATH = '/content/drive/MyDrive/inDrive_hackathon'
DATA_PATH = os.path.join(BASE_PATH, 'data', 'processed', 'cleanliness_dataset')
MODELS_PATH = os.path.join(BASE_PATH, 'models')
os.makedirs(MODELS_PATH, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")


import random
import numpy as np
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(42)

if not os.path.exists(DATA_PATH):
    print(f"❌ ERROR: Dataset path not found! Please check the path: {DATA_PATH}")
else:
    print("\n✅ Setup complete!")
    print(f"Cleanliness dataset found at: {DATA_PATH}")
    print("Contents of the dataset folder:")
    !ls -l {DATA_PATH}

Mounted at /content/drive
Using device: cuda

✅ Setup complete!
Cleanliness dataset found at: /content/drive/MyDrive/inDrive_hackathon/data/processed/cleanliness_dataset
Contents of the dataset folder:
total 8
drwx------ 2 root root 4096 Sep 13 10:04 clean
drwx------ 2 root root 4096 Sep 13 12:51 dirty


In [None]:

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split


IMG_SIZE = 224
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(), # Flips images horizontally
    transforms.RandomRotation(10),     # Rotates images by up to 10 degrees
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


full_dataset = datasets.ImageFolder(DATA_PATH)

TRAIN_RATIO = 0.8
train_size = int(TRAIN_RATIO * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_dataset.dataset.transform = train_transforms
val_dataset.dataset.transform = val_transforms

BATCH_SIZE = 32
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

class_names = full_dataset.classes
print(f"Classes found: {class_names}")
print(f"Total images: {len(full_dataset)}")
print(f"Training images: {len(train_dataset)}")
print(f"Validation images: {len(val_dataset)}")

images, labels = next(iter(train_loader))
print(f"\nShape of one batch of images [B, C, H, W]: {images.shape}")
print(f"Shape of one batch of labels: {labels.shape}")

Classes found: ['clean', 'dirty']
Total images: 911
Training images: 728
Validation images: 183

Shape of one batch of images [B, C, H, W]: torch.Size([32, 3, 224, 224])
Shape of one batch of labels: torch.Size([32])


In [None]:

import timm
import torch.nn as nn
import torch.optim as optim


model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=len(class_names))
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

print("✅ Model, loss function, and optimizer are ready.")
print(f"Model: {model.default_cfg['architecture']}")
print(f"Classifier layer: {model.classifier}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

✅ Model, loss function, and optimizer are ready.
Model: efficientnet_b0
Classifier layer: Linear(in_features=1280, out_features=2, bias=True)


In [None]:

from tqdm.notebook import tqdm

NUM_EPOCHS = 10
best_val_accuracy = 0.0
model_save_path = os.path.join(MODELS_PATH, 'cleanliness_classifier.pt')

print("Starting training...")

for epoch in range(NUM_EPOCHS):
    # --- Training Phase ---
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Train]"):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_dataset)

    # --- Validation Phase ---
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Val]"):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_val_loss = val_loss / len(val_dataset)
    epoch_val_accuracy = correct / total

    print(f"Epoch {epoch+1}/{NUM_EPOCHS} -> Train Loss: {epoch_loss:.4f}, Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_accuracy:.4f}")

    # --- Save the best model ---
    if epoch_val_accuracy > best_val_accuracy:
        best_val_accuracy = epoch_val_accuracy
        torch.save(model.state_dict(), model_save_path)
        print(f"   -> New best model saved with accuracy: {best_val_accuracy:.4f}")


print("\n🎉 Training complete!")
print(f"Best validation accuracy: {best_val_accuracy:.4f}")
print(f"Best model saved to: {model_save_path}")

Starting training...


Epoch 1/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 1/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 1.0114, Val Loss: 0.5240, Val Acc: 0.9344
   -> New best model saved with accuracy: 0.9344


Epoch 2/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 2/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 0.3700, Val Loss: 0.1863, Val Acc: 0.9781
   -> New best model saved with accuracy: 0.9781


Epoch 3/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 3/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 0.1370, Val Loss: 0.1633, Val Acc: 0.9399


Epoch 4/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 4/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.0331, Val Loss: 0.0873, Val Acc: 0.9781


Epoch 5/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 5/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.0403, Val Loss: 0.1221, Val Acc: 0.9727


Epoch 6/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 6/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.0134, Val Loss: 0.1117, Val Acc: 0.9781


Epoch 7/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 7/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.0148, Val Loss: 0.0986, Val Acc: 0.9727


Epoch 8/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 8/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.0049, Val Loss: 0.1168, Val Acc: 0.9781


Epoch 9/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 9/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.0109, Val Loss: 0.1224, Val Acc: 0.9727


Epoch 10/10 [Train]:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch 10/10 [Val]:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.0036, Val Loss: 0.1984, Val Acc: 0.9672

🎉 Training complete!
Best validation accuracy: 0.9781
Best model saved to: /content/drive/MyDrive/inDrive_hackathon/models/cleanliness_classifier.pt


In [None]:
from google.colab import files
from PIL import Image
import torch
import torch.nn.functional as F


model_to_test = timm.create_model('efficientnet_b0', pretrained=False, num_classes=len(class_names))
model_save_path = os.path.join(MODELS_PATH, 'cleanliness_classifier.pt')
model_to_test.load_state_dict(torch.load(model_save_path, map_location=DEVICE))
model_to_test.to(DEVICE)
model_to_test.eval()


test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

while True:
    print("\n----------------------------------")
    print("Please upload a car image to test (or click 'Cancel' to stop).")
    uploaded = files.upload()

    if not uploaded:
        print("No file uploaded. Exiting test loop.")
        break

    file_name = list(uploaded.keys())[0]
    try:
        img = Image.open(file_name).convert('RGB')
    except Exception as e:
        print(f"Error opening image: {e}")
        continue

    img_tensor = test_transforms(img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        outputs = model_to_test(img_tensor)
        probabilities = F.softmax(outputs, dim=1)[0]
        predicted_idx = torch.argmax(probabilities).item()

    predicted_class = class_names[predicted_idx]
    confidence = probabilities[predicted_idx].item() * 100

    print(f"\n✅ Prediction for '{file_name}':")
    print(f"   -> Predicted Class: {predicted_class.upper()}")
    print(f"   -> Confidence: {confidence:.2f}%")

    print("\n   --- Detailed Probabilities ---")
    for i, class_name in enumerate(class_names):
        print(f"      {class_name.capitalize()}: {probabilities[i].item()*100:.2f}%")

    another = input("\nTest another image? (y/n): ")
    if another.lower() != 'y':
        print("Exiting test loop.")
        break


----------------------------------
Please upload a car image to test (or click 'Cancel' to stop).


Saving images.jpeg to images.jpeg

✅ Prediction for 'images.jpeg':
   -> Predicted Class: CLEAN
   -> Confidence: 99.61%

   --- Detailed Probabilities ---
      Clean: 99.61%
      Dirty: 0.39%

Test another image? (y/n): y

----------------------------------
Please upload a car image to test (or click 'Cancel' to stop).


Saving images (2).jpeg to images (2).jpeg

✅ Prediction for 'images (2).jpeg':
   -> Predicted Class: CLEAN
   -> Confidence: 98.35%

   --- Detailed Probabilities ---
      Clean: 98.35%
      Dirty: 1.65%

Test another image? (y/n): y

----------------------------------
Please upload a car image to test (or click 'Cancel' to stop).


Saving images (3).jpeg to images (3).jpeg

✅ Prediction for 'images (3).jpeg':
   -> Predicted Class: CLEAN
   -> Confidence: 99.88%

   --- Detailed Probabilities ---
      Clean: 99.88%
      Dirty: 0.12%

Test another image? (y/n): y

----------------------------------
Please upload a car image to test (or click 'Cancel' to stop).


Saving images (4).jpeg to images (4).jpeg

✅ Prediction for 'images (4).jpeg':
   -> Predicted Class: DIRTY
   -> Confidence: 99.99%

   --- Detailed Probabilities ---
      Clean: 0.01%
      Dirty: 99.99%
