In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

from tqdm import tqdm

In [5]:
# Define data transforms
# MobileNetV2 expects 224x224 input images and specific normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),          # phóng to rồi crop ngẫu nhiên vùng 224x224
        transforms.RandomHorizontalFlip(p=0.5),     # lật ngang ảnh ngẫu nhiên
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # thay đổi màu sắc
        transforms.RandomRotation(degrees=15),      # xoay ngẫu nhiên ±15 độ
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # dịch ảnh
        transforms.RandomPerspective(distortion_scale=0.2, p=0.5), # hiệu ứng phối cảnh
        transforms.ToTensor(),                      # chuyển thành tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([ # Often same as validation for consistent evaluation
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

base_dir = "/Users/anand/Desktop/1mg/repos/thesis/thesis/data/PlantDoc-Dataset"
base_dataset = datasets.ImageFolder(base_dir)

total_size = len(base_dataset)
train_size = int(0.8 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size

train_data, val_data, test_data = random_split(
    base_dataset, [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(42)
)
train_data.dataset.transform = data_transforms["train"]
val_data.dataset.transform = data_transforms["val"]
test_data.dataset.transform = data_transforms["test"]

batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_data, shuffle=True, pin_memory=True)


In [6]:
def loop_fn(mode, dataset, dataloader, model, criterion, optimizer, device):
  if mode == 'train':
    model.train()
  elif mode == 'val':
    model.eval()
  
  cost = correct = 0
  for feature, target in tqdm(dataloader, desc=mode.title()):
    feature, target = feature.to(device), target.to(device)
    output = model(feature)
    loss = criterion(output, target)

    if mode == 'train':
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
    
    cost += loss.item() * feature.shape[0]
    correct += (output.argmax(1) == target).sum().item()
  cost = cost/len(dataset)
  acc = correct/len(dataset)
  return cost, acc

In [7]:
def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    acc = correct / total
    print(f"✅ Accuracy: {acc:.4f} ({correct}/{total})")
    return acc

In [10]:
def train(model_name, epoch):
    best_weight = None
    best_train = [0, 0, 0, 0, 0]
    for i in range(epoch):
        print("="*30, f"Epoch {i+1}:")
        train_cost, train_score = loop_fn('train', train_data, train_loader, model, criterion, optimizer, device)
        print(f"===Train:\t|\tAccuracy: {train_score:.4f}\t|\tLoss: {train_cost:.4f}")
        with torch.no_grad():
            test_cost, test_score = loop_fn('val', test_data, test_loader, model, criterion, optimizer, device)
            print(f"===Valid:\t|\tAccuracy: {test_score:.4f}\t|\tLoss: {test_cost:.4f}")


        if best_train[0] <= train_score:
            best_train = list([train_score, train_cost, test_score, test_cost, i+1])
            best_weight = model.state_dict()
        print("==="*30, "\n\n\n")

    print(f"Best checkpoinpt: {best_train[4]}\nTrain Accuracy: {best_train[0]}\t|\tTrain Loss: {best_train[1]}\nTest Accuracy: {best_train[2]}\t|\tTest Loss: {best_train[3]}")
    evaluate(model, test_loader, device)

    id_cp = best_train[4]
    torch.save(best_weight, f"model_{model_name}_plantvillage_{id_cp}.pth")

In [12]:
import torchvision

num_epochs = 50
num_classes = len(os.listdir(base_dir))  # số lớp
device = torch.device("cuda" if torch.cuda.is_available() else "mps")

In [13]:
model = None
model = torchvision.models.mobilenet_v2(weights="DEFAULT")
# Thay classifier cuối
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)

for param in model.features.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)

model_name = "mobilenetv2"
train(model_name, num_epochs)



Train: 100%|██████████| 32/32 [00:32<00:00,  1.03s/it]


===Train:	|	Accuracy: 0.8912	|	Loss: 0.3493


Val: 100%|██████████| 256/256 [00:07<00:00, 35.36it/s]


===Valid:	|	Accuracy: 0.8594	|	Loss: 0.4041





Train: 100%|██████████| 32/32 [00:26<00:00,  1.22it/s]


===Train:	|	Accuracy: 0.9113	|	Loss: 0.2822


Val: 100%|██████████| 256/256 [00:05<00:00, 50.61it/s]


===Valid:	|	Accuracy: 0.8594	|	Loss: 0.3806





Train: 100%|██████████| 32/32 [00:25<00:00,  1.27it/s]


===Train:	|	Accuracy: 0.9118	|	Loss: 0.2726


Val: 100%|██████████| 256/256 [00:04<00:00, 51.20it/s]


===Valid:	|	Accuracy: 0.8594	|	Loss: 0.3917





Train: 100%|██████████| 32/32 [00:26<00:00,  1.22it/s]


===Train:	|	Accuracy: 0.9128	|	Loss: 0.2579


Val: 100%|██████████| 256/256 [00:05<00:00, 49.67it/s]


===Valid:	|	Accuracy: 0.8594	|	Loss: 0.4076





Train: 100%|██████████| 32/32 [00:25<00:00,  1.24it/s]


===Train:	|	Accuracy: 0.9133	|	Loss: 0.2560


Val: 100%|██████████| 256/256 [00:05<00:00, 50.12it/s]


===Valid:	|	Accuracy: 0.8594	|	Loss: 0.3830





Train: 100%|██████████| 32/32 [00:26<00:00,  1.22it/s]


===Train:	|	Accuracy: 0.9157	|	Loss: 0.2460


Val: 100%|██████████| 256/256 [00:04<00:00, 51.68it/s]


===Valid:	|	Accuracy: 0.8555	|	Loss: 0.3807





Train: 100%|██████████| 32/32 [00:26<00:00,  1.20it/s]


===Train:	|	Accuracy: 0.9133	|	Loss: 0.2397


Val: 100%|██████████| 256/256 [00:05<00:00, 49.50it/s]


===Valid:	|	Accuracy: 0.8516	|	Loss: 0.3810





Train: 100%|██████████| 32/32 [00:26<00:00,  1.22it/s]


===Train:	|	Accuracy: 0.9162	|	Loss: 0.2356


Val: 100%|██████████| 256/256 [00:05<00:00, 47.88it/s]


===Valid:	|	Accuracy: 0.8555	|	Loss: 0.4068





Train:  22%|██▏       | 7/32 [00:06<00:24,  1.03it/s]


KeyboardInterrupt: 