# **Resnet-50**

In [None]:
!pip install torch torchvision
!pip install pillow
!pip install matplotlib
!pip install scikit-learn
!pip install seaborn

In [1]:
import torch
print(f"PyTorch CUDA availability: {torch.cuda.is_available()}")
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
print(f"Device name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No GPU detected'}")
print(f"CUDA version: {torch.version.cuda}")

PyTorch CUDA availability: False
Number of CUDA devices: 0
Device name: No GPU detected
CUDA version: None


# **Import Libraries**

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# **Data Paths**

In [None]:
train_dir = r'D:\CADT\CapstoneProjectI\ml__model\data\splited_data\train'
val_dir = r'D:\CADT\CapstoneProjectI\ml__model\data\splited_data\val'
test_dir = r'D:\CADT\CapstoneProjectI\ml__model\data\splited_data\test'

# **Define image augmentation and preprocessing for training**

In [None]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Load datasets
train_dataset = ImageFolder(train_dir, transform=train_transforms)
val_dataset = ImageFolder(val_dir, transform=val_test_transforms)
test_dataset = ImageFolder(test_dir, transform=val_test_transforms)

In [None]:
# Create data loaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

In [None]:
# Set device and explicitly check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
    print(f"GPU is available! Using device: {torch.cuda.get_device_name(0)}")
    # Set CUDA_VISIBLE_DEVICES if you want to specify a particular GPU (e.g., GPU 0)
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Uncomment and adjust if you have multiple GPUs
else:
    print("GPU not available. Using CPU instead. Training may be slower.")

In [None]:
# Set device (check for GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

In [None]:
# Load and modify ResNet-50
model = resnet50(pretrained=True)
torch.cuda.empty_cache()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7)  # Assuming 7 classes as per your previous setup
model = model.to(device)

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scaler = torch.cuda.amp.GradScaler()

# **Training Model**

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    # Initialize plot for live updates
    plt.ion()
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            # Optional: Print batch progress
            if (i + 1) % (len(train_loader) // 5) == 0:  # Update 5 times per epoch
                print(f'Epoch {epoch+1}, Batch {i+1}/{len(train_loader)}')

        epoch_train_loss = train_loss / len(train_loader)
        epoch_train_acc = 100 * train_correct / train_total
        train_losses.append(epoch_train_loss)
        train_accuracies.append(epoch_train_acc)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                with torch.cuda.amp.autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        epoch_val_loss = val_loss / len(val_loader)
        epoch_val_acc = 100 * val_correct / val_total
        val_losses.append(epoch_val_loss)
        val_accuracies.append(epoch_val_acc)

        # Print epoch results
        print(f'\nEpoch {epoch+1}/{num_epochs} Results:')
        print(f'Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.2f}%')
        print(f'Val Loss: {epoch_val_loss:.4f} | Val Acc: {epoch_val_acc:.2f}%')
        print('-' * 50)

        # Update live plots
        ax1.clear()
        ax1.plot(train_losses, label='Train Loss')
        ax1.plot(val_losses, label='Val Loss')
        ax1.set_title('Training vs Validation Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend()
        ax1.grid(True)

        ax2.clear()
        ax2.plot(train_accuracies, label='Train Accuracy')
        ax2.plot(val_accuracies, label='Val Accuracy')
        ax2.set_title('Training vs Validation Accuracy')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Accuracy (%)')
        ax2.legend()
        ax2.grid(True)

        fig.canvas.draw()
        fig.canvas.flush_events()

    plt.ioff()  # Turn off interactive mode after training
    plt.show()

    return train_losses, val_losses, train_accuracies, val_accuracies

In [None]:
# Train the model
num_epochs = 10
train_losses, val_losses, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs
)

In [None]:
# Plot Loss Curves
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot Accuracy Curves
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# **Testing Model**

In [None]:
def evaluate_model(model, test_loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            with torch.cuda.amp.autocast():
                outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Classification Report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

    # Overall accuracy
    accuracy = 100 * sum([1 for p, t in zip(all_preds, all_labels) if p == t]) / len(all_labels)
    print(f'Test Accuracy: {accuracy:.2f}%')

In [None]:
# Get class names
class_names = train_dataset.classes

In [None]:
# Evaluate on test set
evaluate_model(model, test_loader, class_names)

In [None]:
# Save the model
torch.save(model.state_dict(), '/content/drive/MyDrive/CapstoneProjectI/model/resnet50_10epoch_model.pth')

# Check Data

In [None]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Function to check all datasets and count images
def check_all_datasets(base_path):
    """
    Check and count images in train, val, and test datasets.

    Args:
        base_path (str): Base directory path containing 'train', 'val', and 'test' folders

    Returns:
        dict: Summary of image counts per dataset and class
    """
    datasets = ['train', 'val', 'test']
    summary = {}

    for dataset in datasets:
        dataset_path = os.path.join(base_path, dataset)
        print(f"\n=== Checking {dataset.upper()} Dataset ===")

        if not os.path.exists(dataset_path):
            print(f"Error: Path does not exist: {dataset_path}")
            continue

        classes = sorted(os.listdir(dataset_path))
        print(f"Found {len(classes)} classes: {classes}")

        class_counts = {}
        total_images = 0

        for class_name in classes:
            class_path = os.path.join(dataset_path, class_name)
            if not os.path.isdir(class_path):
                print(f"Warning: {class_path} is not a directory")
                continue

            # Count valid image files (supported extensions)
            valid_extensions = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
            image_files = [f for f in os.listdir(class_path) if f.lower().endswith(valid_extensions)]
            num_images = len(image_files)

            class_counts[class_name] = num_images
            total_images += num_images

            print(f"\nClass '{class_name}':")
            print(f"Number of valid images: {num_images}")
            if num_images > 0:
                print(f"Sample files: {image_files[:5]}")  # Show first 5 files as examples
            else:
                print("Warning: No valid images found in this class folder")
                print(f"Files present: {os.listdir(class_path)}")  # Show all files for debugging

        summary[dataset] = {
            'classes': classes,
            'class_counts': class_counts,
            'total_images': total_images
        }
        print(f"\nTotal images in {dataset.upper()} dataset: {total_images}")
        print("-" * 50)

    # Print summary
    print("\n=== Final Summary ===")
    for dataset in datasets:
        print(f"{dataset.upper()} Dataset:")
        print(f"Total classes: {len(summary[dataset]['classes'])}")
        print(f"Total images: {summary[dataset]['total_images']}")
        print(f"Image count per class: {summary[dataset]['class_counts']}")
        print("-" * 50)

    return summary

# Define the base path and run the check
base_path = '/content/drive/MyDrive/CapstoneProjectI/splited_data'
dataset_summary = check_all_datasets(base_path)