In [None]:
import os
import copy
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [None]:
# DATA_DIR = "/kaggle/input/breakhis"   # Kaggle
DATA_DIR = "/content/breakhis"      # Colab
# DATA_DIR = "D:/breakhis"            # Local


In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])


In [None]:
full_dataset = datasets.ImageFolder(DATA_DIR, transform=train_transform)

print("Classes:", full_dataset.classes)
print("Total images:", len(full_dataset))


In [None]:
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    full_dataset, [train_size, val_size, test_size]
)

val_dataset.dataset.transform = test_transform
test_dataset.dataset.transform = test_transform


In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=2)


In [None]:
model = models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.0001)


In [None]:
def train_model(model, epochs=10):
    best_model = copy.deepcopy(model.state_dict())
    best_val_acc = 0

    train_losses, val_losses = [], []
    train_accs, val_accs = [], []

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # ---- TRAIN ----
        model.train()
        running_loss, correct = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / len(train_dataset)

        # ---- VALIDATE ----
        model.eval()
        running_loss, correct = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                correct += (outputs.argmax(1) == labels).sum().item()

        val_loss = running_loss / len(val_loader)
        val_acc = correct / len(val_dataset)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print(f"Train Loss {train_loss:.4f} Acc {train_acc:.4f}")
        print(f"Val   Loss {val_loss:.4f} Acc {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_model)
    return model, train_losses, val_losses, train_accs, val_accs


In [None]:
model, train_losses, val_losses, train_accs, val_accs = train_model(model, epochs=10)


In [None]:
plt.plot(train_losses,label="train")
plt.plot(val_losses,label="val")
plt.legend(); plt.title("Loss"); plt.show()

plt.plot(train_accs,label="train")
plt.plot(val_accs,label="val")
plt.legend(); plt.title("Accuracy"); plt.show()


In [None]:
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(1).cpu().numpy()

        y_pred.extend(preds)
        y_true.extend(labels.numpy())


In [None]:
print(classification_report(y_true, y_pred))


In [None]:
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
torch.save(model.state_dict(), "breakhis_resnet50.pth")
