# 1. Install library yang diperlukan
Menginstal library PyTorch (torch), torchvision (untuk manipulasi dataset gambar), dan matplotlib (untuk visualisasi).

In [1]:
!pip install torch torchvision matplotlib

Collecting matplotlib
  Downloading matplotlib-3.10.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.1-cp312-cp312-win_amd64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.55.3-cp312-cp312-win_amd64.whl.metadata (168 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.8-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.1-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.0-cp312-cp312-win_amd64.whl (8.0 MB)
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.0 MB ? eta -:--:--
   --- ------------------------------------ 0.8/8.0

# 2. Periksa ketersediaan GPU

In [2]:
import torch
print("Apakah GPU tersedia:", torch.cuda.is_available())
print("GPU yang digunakan:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Tidak ada GPU")

Apakah GPU tersedia: False
GPU yang digunakan: Tidak ada GPU


# 3. Mengimpor modul yang diperlukan untuk membangun, melatih, dan mengevaluasi model

* torch dan torch.nn: Untuk membuat dan melatih jaringan saraf.
* torch.optim: Untuk mengoptimalkan model.
* datasets dan transforms: Untuk memuat dan memproses dataset gambar.
* DataLoader: Untuk mengelola batch data.
* matplotlib.pyplot: Untuk memvisualisasikan data atau hasil.
* os: Untuk mengelola jalur file dan folder.


In [3]:
# Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os

# 4. Cek data dan ukuran data

In [7]:
import os

# Define Paths
data_dir = r'E:\Project STKI\Dataset\common dragon fruit stem disease.v2i.folder'

# Function to count files and folders recursively
def count_files_and_folders(path):
    folder_structure = {}
    total_images = 0

    for root, dirs, files in os.walk(path):
        current_level = root.replace(data_dir, "").count(os.sep)
        relative_path = root.replace(data_dir, "").strip(os.sep)
        if current_level == 0:
            folder_structure[relative_path] = {"subfolders": 0, "files": len(files)}
        elif current_level == 1:
            folder_structure[relative_path] = {"subfolders": len(dirs), "files": len(files)}
        elif current_level == 2:
            folder_structure[relative_path] = {"files": len(files)}
            total_images += len(files)
    
    return folder_structure, total_images

# Get folder structure and total images
folder_structure, total_images = count_files_and_folders(data_dir)

# Display results
for folder, details in folder_structure.items():
    if 'subfolders' in details:
        print(f"Folder: {folder}, Subfolders: {details['subfolders']}, Files: {details['files']}")
    else:
        print(f"Subfolder: {folder}, Files: {details['files']}")

print(f"\nTotal images in subfolders: {total_images}")

Folder: , Subfolders: 0, Files: 2
Folder: test, Subfolders: 4, Files: 0
Subfolder: test\anthracnose, Files: 111
Subfolder: test\cactusvirusx, Files: 160
Subfolder: test\healthy, Files: 206
Subfolder: test\stemcanker, Files: 287
Folder: train, Subfolders: 4, Files: 0
Subfolder: train\anthracnose, Files: 1502
Subfolder: train\cactusvirusx, Files: 1404
Subfolder: train\healthy, Files: 1913
Subfolder: train\stemcanker, Files: 1060
Folder: valid, Subfolders: 4, Files: 0
Subfolder: valid\anthracnose, Files: 235
Subfolder: valid\cactusvirusx, Files: 82
Subfolder: valid\healthy, Files: 109
Subfolder: valid\stemcanker, Files: 1110

Total images in subfolders: 8179


# 5. Preprocessing gambar

* Resize: Mengubah ukuran gambar menjadi 224x224 piksel (format yang diterima oleh ResNet-50).
* RandomHorizontalFlip dan RandomRotation: Augmentasi data untuk meningkatkan generalisasi model.
* ToTensor: Mengubah gambar menjadi tensor.
* Normalize: Normalisasi nilai piksel berdasarkan mean dan standar deviasi yang sama dengan dataset ImageNet.


In [8]:
# Data Transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 6. Muat dataset
**Dataset dibagi menjadi tiga bagian: train, valid, dan test.**
* ImageFolder: Memuat dataset berdasarkan struktur folder.
* DataLoader: Mengelola data dalam batch untuk efisiensi pelatihan.
* dataset_sizes: Menyimpan jumlah sampel dalam dataset.
* class_names: Menyimpan nama kelas.

In [9]:
# Load Datasets
datasets_dict = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid', 'test']}
dataloaders = {x: DataLoader(datasets_dict[x], batch_size=32, shuffle=True, num_workers=2) for x in ['train', 'valid', 'test']}
dataset_sizes = {x: len(datasets_dict[x]) for x in ['train', 'valid']}
class_names = datasets_dict['train'].classes

In [10]:
print(f"Loaded classes: {datasets_dict['train'].classes}")
print(f"Number of classes: {len(datasets_dict['train'].classes)}")

Loaded classes: ['anthracnose', 'cactusvirusx', 'healthy', 'stemcanker']
Number of classes: 4


# 7. Atur GPU
Mengatur perangkat untuk pelatihan (cuda jika GPU tersedia, atau cpu jika tidak).

In [11]:
# Set Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 8. Transfer learning dan pelatihan model
* Memuat model ResNet-50 pre-trained dari ImageNet.
* Mengganti lapisan fully connected (FC) terakhir agar sesuai dengan jumlah kelas dataset.

In [12]:
# Load ResNet-50 Pre-trained Model
model = models.resnet50(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(class_names))
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Aulia Diaz/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100.0%


In [13]:
# Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [16]:
# Training Function
def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

    return model

In [None]:
# Train the Model
trained_model = train_model(model, criterion, optimizer, scheduler, num_epochs=20)

Epoch 1/20
----------
train Loss: 0.2189 Acc: 0.9228
valid Loss: 0.5293 Acc: 0.8184
Epoch 2/20
----------
train Loss: 0.1210 Acc: 0.9609


In [33]:
# Save the Model
torch.save(trained_model.state_dict(), "resnet50_dragonfruit.pth")
print("Model saved successfully!")

Model saved successfully!


# 9. Evaluasi model

In [34]:
# Evaluate the Model on Test Data
model.eval()
corrects = 0

for inputs, labels in dataloaders['test']:
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)
    corrects += torch.sum(preds == labels.data)

test_acc = corrects.double() / len(datasets_dict['test'])
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.9175
