In [7]:
ROOT = './ODIR-5K'

In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import torchvision

BATCH_SIZE = 32
IMG_HEIGHT = 250
IMG_WIDTH = 250
CLASS_NAMES = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']


In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [10]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_set = ImageFolder(root=ROOT+'/Training_Images/', transform=transform)
# validation_set = ImageFolder(root=ROOT+'/Validation_Images/', transform=transform)
testing_set = ImageFolder(root=ROOT+'/Testing_Images/', transform=transform)

In [11]:
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size = 32,
    shuffle = True,
    num_workers = 2
)

# validation_loader = torch.utils.data.DataLoader(
#     validation_set,
#     batch_size = 16,
#     shuffle = False,
#     num_workers = 2
# )

test_loader = torch.utils.data.DataLoader(
    testing_set,
    batch_size = 16,
    shuffle = False,
    num_workers = 2
)

In [6]:
def train_model(model, num_epochs, criterion, optimizer, results_path):
    model.train()
    train_losses = np.zeros(num_epochs)
    val_losses = np.zeros(num_epochs)
    train_accracy = np.zeros(num_epochs)
    val_accracy = np.zeros(num_epochs)

    for epoch in range(num_epochs):
        # trainning
        running_loss = 0.0
        n = 0
        total=0
        correct=0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            n += 1
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)    # add in the number of labels in this minibatch
            correct += (predicted == labels).sum().item()  # add in the number of correct labels
        train_losses[epoch] = running_loss / n
        train_accracy[epoch]=correct/total
        # validation
#         running_loss = 0.0
#         n = 0
#         total=0
#         correct=0
#         with torch.no_grad():
#             for images,labels in validation_loader:
#                 images, labels = images.to(device), labels.to(device)
#                 outputs=model(images)
#                 running_loss += criterion(outputs,labels).item()
#                 n += 1
#                 _, predicted = torch.max(outputs.data, 1)
#                 total += labels.size(0)    # add in the number of labels in this minibatch
#                 correct += (predicted == labels).sum().item()  # add in the number of correct labels
#         val_losses[epoch]=running_loss/n
#         val_accracy[epoch]=correct/total

        print(f'Epoch [{epoch + 1}/{num_epochs}], training loss: {train_losses[epoch] : .3f} training accuracy: {train_accracy[epoch]: .1%}')

    torch.save({"state_dict": model.state_dict(), "train_losses": train_losses, "train_accracy": train_accracy}, results_path)


In [5]:
class LeNet5(nn.Module):

    def __init__(self):
        super(LeNet5, self).__init__()
        # 3 input image channels, 6 output channels, 5x5 square convolution kernel
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(55696, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, len(CLASS_NAMES))

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
# 初始化模型、损失函数和优化器
lenet = LeNet5().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(lenet.parameters(), lr=0.001, weight_decay=0.0001)
results_path = ROOT + '/results/LeNet5_multiple.pt'
num_epochs = 20

train_model(lenet, num_epochs, criterion, optimizer, results_path)

Epoch [1/20], training loss:  1.519 training accuracy:  40.4%
Epoch [2/20], training loss:  1.201 training accuracy:  53.0%
Epoch [3/20], training loss:  0.961 training accuracy:  61.7%
Epoch [4/20], training loss:  0.783 training accuracy:  68.6%
Epoch [5/20], training loss:  0.646 training accuracy:  73.5%
Epoch [6/20], training loss:  0.554 training accuracy:  77.4%
Epoch [7/20], training loss:  0.517 training accuracy:  79.0%
Epoch [8/20], training loss:  0.433 training accuracy:  82.3%
Epoch [9/20], training loss:  0.360 training accuracy:  85.3%
Epoch [10/20], training loss:  0.318 training accuracy:  87.6%
Epoch [11/20], training loss:  0.278 training accuracy:  89.2%
Epoch [12/20], training loss:  0.214 training accuracy:  92.0%
Epoch [13/20], training loss:  0.196 training accuracy:  93.2%
Epoch [14/20], training loss:  0.133 training accuracy:  95.6%
Epoch [15/20], training loss:  0.125 training accuracy:  95.7%
Epoch [16/20], training loss:  0.114 training accuracy:  96.2%
E

In [9]:
model_vgg16 = torchvision.models.vgg16(pretrained=False)
in_features = model_vgg16.classifier[6].in_features
model_vgg16.classifier[6] = nn.Linear(in_features, len(CLASS_NAMES), True)
model_vgg16 = model_vgg16.to(device)

result_path = ROOT+'/results/model_vgg_multiclass.pt'    
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model_vgg16.parameters(), lr=0.01)
num_epochs = 20

train_model(model_vgg16, num_epochs, criterion, optimizer, result_path)

Epoch [1/20], training loss:  239627274.101 training accuracy:  14.1%
Epoch [2/20], training loss:  2.259 training accuracy:  15.1%
Epoch [3/20], training loss:  2.350 training accuracy:  14.6%
Epoch [4/20], training loss:  2.185 training accuracy:  15.0%
Epoch [5/20], training loss:  2.101 training accuracy:  14.8%
Epoch [6/20], training loss:  4.555 training accuracy:  15.0%
Epoch [7/20], training loss:  2.149 training accuracy:  15.1%
Epoch [8/20], training loss:  2.102 training accuracy:  15.2%
Epoch [9/20], training loss:  2.094 training accuracy:  15.2%
Epoch [10/20], training loss:  2.085 training accuracy:  15.3%
Epoch [11/20], training loss:  2.086 training accuracy:  15.4%
Epoch [12/20], training loss:  2.088 training accuracy:  15.1%
Epoch [13/20], training loss:  2.082 training accuracy:  15.2%
Epoch [14/20], training loss:  2.091 training accuracy:  15.0%
Epoch [15/20], training loss:  2.075 training accuracy:  15.3%
Epoch [16/20], training loss:  2.076 training accuracy: 

In [10]:
resnet18 = torchvision.models.resnet18(pretrained=False)
in_features = resnet18.fc.in_features
resnet18.fc = nn.Linear(in_features, len(CLASS_NAMES), True)
resnet50 = resnet18.to(device)

result_path = ROOT+'/results/model_resnet18_multiclass.pt'
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(resnet18.parameters(), lr=0.01)
num_epochs = 20

train_model(resnet18, num_epochs, criterion, optimizer, result_path)

Epoch [1/20], training loss:  1.783 training accuracy:  29.8%
Epoch [2/20], training loss:  1.487 training accuracy:  41.6%
Epoch [3/20], training loss:  1.292 training accuracy:  48.5%
Epoch [4/20], training loss:  1.155 training accuracy:  53.9%
Epoch [5/20], training loss:  1.046 training accuracy:  58.4%
Epoch [6/20], training loss:  0.939 training accuracy:  63.2%
Epoch [7/20], training loss:  0.843 training accuracy:  66.9%
Epoch [8/20], training loss:  0.725 training accuracy:  71.8%
Epoch [9/20], training loss:  0.639 training accuracy:  74.6%
Epoch [10/20], training loss:  0.545 training accuracy:  78.0%
Epoch [11/20], training loss:  0.482 training accuracy:  80.2%
Epoch [12/20], training loss:  0.441 training accuracy:  81.9%
Epoch [13/20], training loss:  0.375 training accuracy:  84.4%
Epoch [14/20], training loss:  0.331 training accuracy:  86.8%
Epoch [15/20], training loss:  0.278 training accuracy:  89.3%
Epoch [16/20], training loss:  0.200 training accuracy:  92.3%
E

In [94]:
def test_model(model, path):
    d = torch.load(path)
    model.load_state_dict(d["state_dict"]) 
    model = model.to(device)
    total = 0
    correct = 0
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    prob_all = []
    label_all = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            # Move predicted probabilities and labels to CPU and append to lists
            prob_all.extend(outputs.cpu().detach().numpy())
            label_all.extend(labels.cpu().numpy())
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            true_positives += ((predicted == 1) & (labels == 1)).sum().item()
            false_positives += ((predicted == 1) & (labels == 0)).sum().item()
            false_negatives += ((predicted == 0) & (labels == 1)).sum().item()

    accuracy = correct / total
    if (true_positives + false_positives) != 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # or any other suitable value

    if (true_positives + false_negatives) != 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # or any other suitable value


    # Convert the lists to numpy arrays
    label_all = np.array(label_all)
    prob_all = np.array(prob_all)
    
    prob_all = np.exp(prob_all) / np.sum(np.exp(prob_all), axis=1, keepdims=True)
    
    label_all = label_all.reshape(-1, 1)  # Reshape label_all to (1800, 1)
    prob_all = prob_all.reshape(-1, 8)    # Reshape prob_all to (1800, 8

#     AUC = roc_auc_score(label_all, prob_all, multi_class='ovr')
    
    return accuracy, precision, recall


In [91]:
results_path = ROOT + '/results/LeNet5_multiple.pt'
model = LeNet5()
accuracy, precision, recall = test_model(model, results_path)

print('leNet5:')
print('  accuracy: {accuracy:.2f}'.format(accuracy=accuracy))
print('  precision: {precision:.2f}'.format(precision=precision))
print('  recall: {recall:.2f}'.format(recall=recall))
# print('  AUC: {AUC:.2f}'.format(AUC=AUC))

leNet5:
  accuracy: 0.38
  precision: 0.98
  recall: 1.00
  AUC: 0.71


In [95]:
model_vgg16 = torchvision.models.vgg16(pretrained=False)
in_features = model_vgg16.classifier[6].in_features
model_vgg16.classifier[6] = nn.Linear(in_features, len(CLASS_NAMES), True)

results_path = ROOT + '/results/model_vgg_multiclass.pt'

accuracy, precision, recall = test_model(model_vgg16, results_path)

print('VGG16:')
print('  accuracy: {accuracy:.2f}'.format(accuracy=accuracy))
print('  precision: {precision:.2f}'.format(precision=precision))
print('  recall: {recall:.2f}'.format(recall=recall))
# print('  AUC: {AUC:.2f}'.format(AUC=AUC))

VGG16:
  accuracy: 0.46
  precision: 0.00
  recall: 0.00


In [96]:
resnet18 = torchvision.models.resnet18(pretrained=False)
in_features = resnet18.fc.in_features
resnet18.fc = nn.Linear(in_features, len(CLASS_NAMES), True)

results_path = ROOT + '/results/model_resnet18_multiclass.pt'

accuracy, precision, recall = test_model(resnet18, results_path)

print('ResNet18:')
print('  accuracy: {accuracy:.2f}'.format(accuracy=accuracy))
print('  precision: {precision:.2f}'.format(precision=precision))
print('  recall: {recall:.2f}'.format(recall=recall))
# print('  AUC: {AUC:.2f}'.format(AUC=AUC))

ResNet18:
  accuracy: 0.26
  precision: 0.96
  recall: 0.81
