In [6]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model = "valhalla/distilbart-mnli-12-3")
labels = ["Waste", "Road Damage", "Street Lights", "Water Supply"]
text = "Water pipeline broken causing lots of leakage"

classifier(text, labels)

In [3]:
train_path = "dataset/train"
val_path = "dataset/val"

In [None]:
# No need to save the text model as it it available on the hugging space hub

In [4]:
import torch
import torch.nn as nn
import torchvision.models as models

device = torch.device("cuda")
model = models.mobilenet_v2(pretrained = True)
model = model.to(device)
num_classes = 4

In [5]:
model.classifier[1] = nn.Sequential(
    nn.Linear(model.last_channel, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, num_classes)
)

In [6]:
for param in model.features.parameters():
    param.requires_grad = False

In [7]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [8]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(
    model.classifier.parameters(),
    lr=1e-3,
    weight_decay=1e-4  # helps regularization
)

In [9]:
model = model.to(device)

In [10]:
from torchvision import datasets
from torch.utils.data import DataLoader

train_dataset = datasets.ImageFolder(
    root = train_path,
    transform = train_transform
)

val_dataset = datasets.ImageFolder(
    root= val_path,
    transform=val_transform
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

class_names = train_dataset.classes
print(class_names)

['Drainage', 'Road_Damage', 'Street_Light', 'Trash']


In [11]:
print(len(train_dataset))
print(len(val_dataset))

483
99


In [12]:

num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    best_val_acc = 0.0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "Best_image_model.pth")
        print("Model Saved.")

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Loss: {running_loss:.4f} "
          f"Train Acc: {train_acc:.2f}% "
          f"Val Acc: {val_acc:.2f}%")

Model Saved.
Epoch [1/5] Loss: 13.8135 Train Acc: 69.98% Val Acc: 63.64%
Model Saved.
Epoch [2/5] Loss: 7.3737 Train Acc: 83.64% Val Acc: 84.85%
Model Saved.
Epoch [3/5] Loss: 5.0301 Train Acc: 90.89% Val Acc: 91.92%
Model Saved.
Epoch [4/5] Loss: 3.0059 Train Acc: 93.37% Val Acc: 92.93%
Model Saved.
Epoch [5/5] Loss: 2.9378 Train Acc: 93.37% Val Acc: 91.92%


In [39]:
!ls

In [43]:
import matplotlib.pyplot as plt
from PIL import Image

model.eval()

image_path = "/content/drive/MyDrive/dataset/val/Street_Light/DYNDGQDXE8F2.jpg"

image = Image.open(image_path).convert("RGB")
image = val_transform(image)
image = image.unsqueeze(0)   # add batch dimension
image = image.to(device)

class_names = train_dataset.classes

with torch.no_grad():
    outputs = model(image)
    probs = torch.softmax(outputs, dim=1)
    confidence, predicted = torch.max(probs, 1)

print("Predicted:", class_names[predicted.item()])
print("Confidence:", confidence.item())


In [None]:
model.eval()
