In [56]:
from urllib.request import urlopen
import timm
import torch
import zipfile,os
from PIL import Image
from pathlib import Path
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader,Dataset
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim

In [57]:
pip install roboflow

Note: you may need to restart the kernel to use updated packages.


In [58]:
pip install scikit-learn==1.3.1

Note: you may need to restart the kernel to use updated packages.


In [59]:
from roboflow import Roboflow
rf = Roboflow(api_key="B5AgQrixtU1h66djOZMm")
project = rf.workspace("digihack").project("lung-disease-pbtdg")
version = project.version(1)
dataset = version.download("folder")

loading Roboflow workspace...
loading Roboflow project...


In [60]:
from roboflow import Roboflow
rf = Roboflow(api_key="Q1flzyg1J8Z2OMweLXf3")
project = rf.workspace("weapon-mpr3p").project("lung-disease-rybev")
version = project.version(1)
dataset2 = version.download("folder")

loading Roboflow workspace...
loading Roboflow project...


In [61]:
train_dir = dataset.location + "/train"
test_dir = dataset2.location + "/train"
val_dir = dataset.location + "/valid"

In [62]:
model = timm.create_model(
    'swin_base_patch4_window7_224',
    pretrained=True,
    num_classes=5,
)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)

model = model.eval()

# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
trans = timm.data.create_transform(**data_config, is_training=False)

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

In [63]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_names = os.listdir(data_dir)

        for label, class_name in enumerate(self.class_names):
            class_dir = os.path.join(data_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.images.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label  # Ensure this returns a tuple of (image, label)


In [64]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Create an instance of the CustomDataset
dataset = CustomDataset(data_dir=train_dir, transform=transform)

train_loader = DataLoader(dataset, batch_size=4, shuffle=True)
for data in train_loader:
    print(data[0].shape)  # This will show you the structure of the data being returned
    inputs, targets = data  # Unpack only if it has the correct structure
    break

torch.Size([4, 3, 224, 224])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import precision_recall_fscore_support
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# ==============================
# DEVICE & MODEL
# ==============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_val_loss = float('inf') 
save_path = "./best_model.pth"
early_stop_patience = 5   # Early stopping patience
no_improve_epochs = 0     # Counter untuk early stopping

# Asumsikan model sudah ada
# model = ...
model.to(device)

# ==============================
# LOSS & OPTIMIZER
# ==============================
criterion = nn.CrossEntropyLoss().to(device)

# Weight decay sudah otomatis di AdamW
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Scheduler (Reduce LR on Plateau berdasarkan val_loss)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min',       # monitor loss -> semakin kecil semakin baik
    factor=0.5,       # turunkan LR setengah kali lipat
    patience=2,       # kalau 2 epoch berturut-turut tidak membaik
    verbose=True
)

# ==============================
# VALIDATION LOADER
# ==============================
try:
    val_dataset = ImageFolder(root=val_dir, transform=transform)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
except NameError:
    print("Pastikan variabel 'val_dir' dan 'transform' sudah didefinisikan.")
    val_loader = None

# ==============================
# TRAINING LOOP
# ==============================
num_epochs = 100
for epoch in range(num_epochs):
    # ---------------- TRAINING ----------------
    model.train()
    epoch_loss = 0
    correct_predictions = 0
    total_samples = 0
    all_targets, all_preds = [], []
    
    progress_bar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Training")
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        # ✅ accumulate true per-sample loss
        epoch_loss += loss.item() * targets.size(0)
        total_samples += targets.size(0)

        # Accuracy
        _, predicted = torch.max(outputs.data, 1)
        correct_predictions += (predicted == targets).sum().item()
        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())
        
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = epoch_loss / total_samples   # ✅ per-sample average
    accuracy = correct_predictions / total_samples * 100
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_targets, all_preds, average='weighted', zero_division=0
    )

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {avg_loss:.4f}, "
          f"Train Acc: {accuracy:.2f}%, "
          f"Train F1: {f1:.4f}")

    # ---------------- VALIDATION ----------------
    if val_loader:
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        all_val_targets, all_val_preds = [], []

        with torch.no_grad():
            val_progress_bar = tqdm(val_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Validation")
            for inputs, targets in val_progress_bar:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                # ✅ accumulate true per-sample loss
                val_loss += loss.item() * targets.size(0)
                val_total += targets.size(0)

                _, predicted = torch.max(outputs.data, 1)
                val_correct += (predicted == targets).sum().item()
                all_val_preds.extend(predicted.cpu().numpy())
                all_val_targets.extend(targets.cpu().numpy())

        avg_val_loss = val_loss / val_total   # ✅ per-sample average
        val_accuracy = val_correct / val_total * 100
        val_precision, val_recall, val_f1, _ = precision_recall_fscore_support(
            all_val_targets, all_val_preds, average='weighted', zero_division=0
        )

        print(f"             Val Loss: {avg_val_loss:.4f}, "
              f"Val Acc: {val_accuracy:.2f}%, "
              f"Val F1: {val_f1:.4f}\n")

        # Update scheduler
        scheduler.step(avg_val_loss)

        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), save_path)
            print(f"✅ Best model saved at epoch {epoch+1} | Val Loss: {best_val_loss:.4f}")
            no_improve_epochs = 0
        else:
            no_improve_epochs += 1
            print(f"⚠️ No improvement for {no_improve_epochs} epoch(s).")

        # Early stopping
        if no_improve_epochs >= early_stop_patience:
            print("⏹ Early stopping triggered.")
            break

# Save last model
torch.save(model.state_dict(), "./last_model.pth")


The verbose parameter is deprecated. Please use get_last_lr() to access the learning rate.
Epoch [1/100] Training:   7%|▋         | 104/1513 [00:21<04:41,  5.01it/s, loss=1.94]

In [None]:
import csv
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import timm

# ==============================
# CUSTOM DATASET WITH PATHS
# ==============================
class ImageFolderWithPaths(ImageFolder):
    """Custom dataset that includes image file paths."""
    def __getitem__(self, index):
        # Normal ImageFolder return (img, label)
        original_tuple = super().__getitem__(index)
        path = self.samples[index][0]  # file path
        return original_tuple + (path,)  # (img, label, path)

# ==============================
# DATASET & LOADER
# ==============================
test_dataset = ImageFolderWithPaths(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# ==============================
# DEVICE
# ==============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==============================
# MODEL
# ==============================
model = timm.create_model(
    'mobilevitv2_100.cvnets_in1k',
    pretrained=True,
    num_classes=5,   # sesuai dataset
)

if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

model.to(device)

# ==============================
# LOAD BEST MODEL
# ==============================
state_dict = torch.load("./best_model.pth", map_location=device)
if isinstance(model, nn.DataParallel):
    model.module.load_state_dict(state_dict)
else:
    model.load_state_dict(state_dict)

model.eval()
print("✅ Loaded best_model.pth successfully.")

# ==============================
# PREDICTION LOOP
# ==============================
results = []

with torch.no_grad():
    for inputs, _, paths in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, dim=1)

        for path, pred in zip(paths, preds.cpu().numpy()):
            filename = os.path.basename(path)

            # ✅ Clean up Roboflow suffix like ".rf.xxxxx"
            if ".rf." in filename:
                filename = filename.split(".rf.")[0]

            results.append([filename, pred])

# ==============================
# SAVE CSV
# ==============================
csv_file = "test_predictions.csv"
with open(csv_file, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Id", "Predicted"])  # header
    writer.writerows(results)

print(f"✅ Predictions saved to {csv_file}")


In [None]:
import pandas as pd
df = pd.read_csv("/kaggle/working/test_predictions.csv")
df