In [1]:
import os
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets, models
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

In [2]:
CLASS_NUMBER = 2
channels = 3
img_height = 64
img_width = 64
epochs = 20
batch_size = 64
datasetDir = r'/kaggle/input/trees-in-satellite-imagery'  # adjust path
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

Device: cuda


In [3]:
train = pd.read_csv("/kaggle/input/kg-inspect-dataset-2/train_formatted (1).csv")
valid = pd.read_csv("/kaggle/input/kg-inspect-dataset-2/valid_formatted (1).csv")
test = pd.read_csv("/kaggle/input/kg-inspect-dataset-2/test_formatted (1).csv")

In [4]:
train.head()

Unnamed: 0,Image,Label
0,/kaggle/input/mvtec-ad/bottle/train/good/173.png,bottle
1,/kaggle/input/mvtec-ad/bottle/train/good/043.png,bottle
2,/kaggle/input/mvtec-ad/bottle/train/good/038.png,bottle
3,/kaggle/input/mvtec-ad/bottle/train/good/069.png,bottle
4,/kaggle/input/mvtec-ad/bottle/train/good/083.png,bottle


In [5]:
class CSVImageDataset(Dataset):
    def __init__(self, df, label2idx, transform=None):
        self.df = df.reset_index(drop=True)
        self.label2idx = label2idx
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = str(row["Image"])
        label_name = str(row["Label"])
        label = self.label2idx[label_name]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, torch.tensor(label, dtype=torch.long)

def make_label_mapping(train_df):
    classes = sorted(train_df["Label"].astype(str).unique())
    label2idx = {c: i for i, c in enumerate(classes)}
    idx2label = {i: c for c, i in label2idx.items()}
    return label2idx, idx2label

def get_basic_loaders(train_df, val_df, test_df, img_size=224, batch_size=64, device='cuda'):
    """
    Loaders cơ bản: chỉ resize + tensor, không augment
    """
    label2idx, idx2label = make_label_mapping(train_df)

    basic_tf = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),   # không Normalize để load nhanh
    ])

    train_ds = CSVImageDataset(train_df, label2idx, transform=basic_tf)
    val_ds   = CSVImageDataset(val_df,   label2idx, transform=basic_tf)
    test_ds  = CSVImageDataset(test_df,  label2idx, transform=basic_tf)

    num_workers = min(4, os.cpu_count() or 2)
    pin = (device == "cuda")

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                              num_workers=num_workers, pin_memory=pin)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                              num_workers=num_workers, pin_memory=pin)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False,
                              num_workers=num_workers, pin_memory=pin)

    return train_loader, val_loader, test_loader, label2idx, idx2label

In [6]:
train_loader, val_loader, test_loader, label2idx, idx2label = get_basic_loaders(
    train, valid, test,
    img_size=64,     # hoặc 224 nếu bạn muốn giữ chuẩn ConvNeXt
    batch_size=64,
    device=device
)

num_classes = len(label2idx)
print("Lớp:", label2idx)


Lớp: {'bottle': 0, 'cable': 1, 'candle': 2, 'capsule': 3, 'capsules': 4, 'carpet': 5, 'cashew': 6, 'chewinggum': 7, 'fryum': 8, 'grid': 9, 'hazelnut': 10, 'leather': 11, 'macaroni1': 12, 'macaroni2': 13, 'metal_nut': 14, 'pcb1': 15, 'pcb2': 16, 'pcb3': 17, 'pcb4': 18, 'pill': 19, 'pipe_fryum': 20, 'screw': 21, 'tile': 22, 'toothbrush': 23, 'transistor': 24, 'wood': 25, 'zipper': 26}


In [7]:
try:
    model = models.convnext_tiny(pretrained=True)
    print('Using torchvision convnext_tiny')
    # Freeze the backbone (features)
    for param in model.features.parameters():
        param.requires_grad = False
    # Replace classifier head with pooling + flatten + layernorm + linear
    in_features = model.classifier[-1].in_features
    model.classifier = nn.Sequential(
        nn.AdaptiveAvgPool2d((1,1)),
        nn.Flatten(1),
        nn.LayerNorm(in_features),
        nn.Linear(in_features, num_classes)
    )
except Exception as e:
    print('torchvision convnext_tiny not available:', e)
    try:
        import timm
        model = timm.create_model('convnext_tiny', pretrained=True)
        print('Using timm convnext_tiny')
        # Freeze the backbone for timm
        for name, param in model.named_parameters():
            if 'head' not in name and 'classifier' not in name:
                param.requires_grad = False
        model.reset_classifier(num_classes=num_classes)
    except ImportError:
        raise ImportError("Neither torchvision convnext_tiny nor timm is available. Install timm with: pip install timm")

model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:00<00:00, 190MB/s]


Using torchvision convnext_tiny


In [8]:
# Training loop (simple)
train_losses = []
val_losses = []
train_acc = []
val_acc = []

for epoch in range(epochs):
    # train
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    train_losses.append(epoch_loss)
    train_acc.append(epoch_acc)

    # val
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_loss = running_loss / total if total>0 else 0
    val_accuracy = correct / total if total>0 else 0
    val_losses.append(val_loss)
    val_acc.append(val_accuracy)

    print(f'Epoch {epoch+1}/{epochs} - train_loss: {epoch_loss:.4f} train_acc: {epoch_acc:.4f} val_loss: {val_loss:.4f} val_acc: {val_accuracy:.4f}')

# Save model
torch.save(model.state_dict(), 'convnext_tiny_kg_inspect.pth')

Epoch 1/20 - train_loss: 1.3047 train_acc: 0.7934 val_loss: 0.4190 val_acc: 0.9702
Epoch 2/20 - train_loss: 0.1934 train_acc: 0.9962 val_loss: 0.1497 val_acc: 0.9903
Epoch 3/20 - train_loss: 0.0811 train_acc: 0.9984 val_loss: 0.0918 val_acc: 0.9944
Epoch 4/20 - train_loss: 0.0490 train_acc: 0.9985 val_loss: 0.0673 val_acc: 0.9951
Epoch 5/20 - train_loss: 0.0343 train_acc: 0.9983 val_loss: 0.0524 val_acc: 0.9965
Epoch 6/20 - train_loss: 0.0236 train_acc: 0.9995 val_loss: 0.0456 val_acc: 0.9965
Epoch 7/20 - train_loss: 0.0193 train_acc: 0.9989 val_loss: 0.0389 val_acc: 0.9972
Epoch 8/20 - train_loss: 0.0155 train_acc: 0.9991 val_loss: 0.0348 val_acc: 0.9972
Epoch 9/20 - train_loss: 0.0127 train_acc: 0.9995 val_loss: 0.0315 val_acc: 0.9972
Epoch 10/20 - train_loss: 0.0097 train_acc: 0.9995 val_loss: 0.0287 val_acc: 0.9972
Epoch 11/20 - train_loss: 0.0085 train_acc: 0.9995 val_loss: 0.0264 val_acc: 0.9972
Epoch 12/20 - train_loss: 0.0072 train_acc: 0.9997 val_loss: 0.0250 val_acc: 0.9972
E

In [9]:
# Hàm evaluate dùng chung cho val/test
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs) 
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.append(preds.detach().cpu().numpy())
            all_labels.append(labels.detach().cpu().numpy())

    avg_loss = running_loss / total if total > 0 else 0.0
    acc = correct / total if total > 0 else 0.0
    all_preds = np.concatenate(all_preds) if all_preds else np.array([])
    all_labels = np.concatenate(all_labels) if all_labels else np.array([])
    return avg_loss, acc, all_preds, all_labels



In [10]:
# ===== Sau khi train xong, đánh giá trên test =====
test_loss, test_acc, y_pred, y_true = evaluate(model, test_loader, criterion, device)
print(f"[TEST] loss: {test_loss:.4f}  acc: {test_acc:.4f}")

# (Tuỳ chọn) In báo cáo chi tiết nếu có sklearn
try:
    from sklearn.metrics import classification_report, confusion_matrix
    # Nếu bạn có idx2label, truyền vào đây để tên lớp đẹp hơn:
    # target_names = [idx2label[i] for i in range(len(idx2label))]
    # Nếu không có, đặt None để sklearn tự dùng chỉ số lớp
    target_names = None

    print("\nClassification report (test):")
    print(classification_report(y_true, y_pred, target_names=target_names, digits=4))

    print("Confusion matrix (test):")
    print(confusion_matrix(y_true, y_pred))
except Exception as e:
    print("Không thể in classification report/confusion matrix (thiếu sklearn?):", e)

[TEST] loss: 0.0166  acc: 0.9987

Classification report (test):
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        42
           1     1.0000    1.0000    1.0000        76
           2     1.0000    1.0000    1.0000        50
           3     1.0000    1.0000    1.0000        68
           4     1.0000    1.0000    1.0000        50
           5     1.0000    1.0000    1.0000        61
           6     0.9804    1.0000    0.9901        50
           7     1.0000    1.0000    1.0000        50
           8     1.0000    1.0000    1.0000        50
           9     1.0000    1.0000    1.0000        41
          10     1.0000    0.9821    0.9910        56
          11     1.0000    0.9844    0.9921        64
          12     1.0000    1.0000    1.0000        50
          13     1.0000    1.0000    1.0000        50
          14     1.0000    1.0000    1.0000        59
          15     1.0000    1.0000    1.0000        50
          16     