In [None]:
# !pip install matplotlib
# !pip install pandas
# !pip install numpy
# !pip install tensorboard
# !pip install tqdm

# resnet

1.Introduction

ResNet, short for Residual Network, is a convolutional neural network introduced by Microsoft Research in 2015. The main innovation of ResNet is the use of residual connections, which allows the network to learn deeper architectures than traditional CNNs.

A residual connection is a shortcut connection that skips one or more layers and connects the input of one layer directly to the output of a layer further down in the network. This allows the network to learn the residual mapping between the input and output of the skipped layer rather than the mapping itself.

The original ResNet architecture, called ResNet-50, had 50 layers and was trained on the ImageNet dataset, achieving state-of-the-art results at the time of release. Since then, several variants of the ResNet architecture have been proposed, such as ResNet-101, ResNet-152, and ResNet-200, with more layers, and ResNet-18, with a smaller version of 18 layers.

ResNet has been widely used in many computer vision tasks, such as image classification, object detection, semantic segmentation, etc. It is also a popular choice for transfer learning, where pre-trained ResNet models can be fine-tuned for different tasks with relatively small datasets.



2.Detail

Next, I will introduce in detail what the code of each box does

import and add name

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torchvision
import torchvision.models as models
import os
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from tqdm import tqdm
from net import *
from PIL import Image


import torch.nn.functional as F
import torch.nn as nn
from util import get_transform, get_acc


The SummaryWriter class is used to write TensorFlow summary data to a specified directory, which can be visualized in TensorBoard. The batch size, learning rate, weight decay and number of epochs are hyperparameters that control the training process of the model.

In [None]:
writer = SummaryWriter ('tf_logs_exp') 
batch_Size = 32
learning_rate = 3e-4
weight_decay = 1e-3
num_epoch = 20


 Define the model used

In [None]:
#model = get_resnext() 
#model = get_resnet()
model = get_resnet18()

#model = get_resnet18_1()

Define the name of the training weights of the saved model

In [None]:
#model_path = r'model_save/resnet.pth' 
# model_path = r'model_save/resnext.pth'
model_path = r'model_save/resnet18.pth'

In [None]:
get_resnet18

Use gpu if not can use cpu.
 using a GPU with CUDA is generally better for machine learning and deep learning tasks because it allows for faster processing of large amounts of data in parallel, and it is also more versatile and easier to use than other GPU programming frameworks.

In [None]:
def get_device():
    return 'cuda' if torch.cuda.is_available () else 'cpu'

Check if gpu is used correctly

In [None]:
device = get_device ()
print (device)

Import the correct address for train and valid

In [None]:
model = model.to (device)
train_path = r'dataset/train'
val_path = r'dataset/valid'

Do a transform on the data

In [None]:
# transforms_train = transforms.Compose (
#     [
#         transforms.Resize ([224, 224]),
#         transforms.ToTensor ()
#     ])

# transforms_vaild = transforms.Compose (
#     [
#         transforms.Resize ([224, 224]),
#         transforms.ToTensor ()
#     ])

train_dataset = torchvision.datasets.ImageFolder (root=train_path, transform=get_transform())
val_dataset = torchvision.datasets.ImageFolder (root=val_path, transform=get_transform())

print ('The dataset corresponding labels are:{}'.format (train_dataset.class_to_idx))

In [None]:
train_loader = DataLoader (dataset=train_dataset, batch_size=batch_Size, shuffle=True, num_workers=2)
val_loader = DataLoader (dataset=val_dataset, batch_size=batch_Size, shuffle=True, num_workers=2)
print(train_loader,val_loader)

In [None]:
len(val_loader)*32

In [None]:
batch_images, batch_labels=next(iter(train_loader))

In [None]:
print(f"Image shape: {batch_images.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {batch_labels.shape}")

In [None]:
class_name=train_dataset.classes

In [None]:
class_name

In [None]:
plt.figure(figsize=(12,8))

for i in range(8):
    plot_img=torch.tensor(data=batch_images[i].permute(1, 2, 0))
    plt.subplot(2,4,i+1)
    plt.title(class_name[batch_labels[i]])
    plt.imshow(plot_img)     

Define cross-entropy loss function, adam optimizer, cos cosine learning rate adjustment

In [None]:

criterion = nn.CrossEntropyLoss ()
optimizer = torch.optim.Adam (model.parameters (), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR (optimizer, T_max=10, eta_min=0, last_epoch=-1)

n_epochs = num_epoch

best_acc = 0.0

train_acc_list = []
train_loss_list = []

valid_loss_list = []
valid_acc_list = []


Start training

In [None]:

for epoch in tqdm(range(n_epochs)):

    model.train ()
    train_loss = []
    train_accs = []

    i = 0

    for batch in tqdm (train_loader):

        imgs, labels = batch
        imgs = imgs.to (device)
        labels = labels.to (device)
        logits = model (imgs)
        # 计算loss
        loss = criterion (logits, labels)

        # 网络更新
        optimizer.zero_grad ()
        loss.backward ()
        optimizer.step ()
        scheduler.step ()

        #if (i % 500 == 0):
            #print ("learning_rate:", scheduler.get_last_lr ()[0])
       # i = i + 1

        acc = (logits.argmax (dim=-1) == labels).float ().mean ()

        train_loss.append (loss.item ())
        train_accs.append (acc.item ())

    train_loss = sum (train_loss) / len (train_loss)
    train_acc = sum (train_accs) / len (train_accs)

    print (f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    train_acc_list.append (train_acc)
    train_loss_list.append (train_loss)
    

    # 记录到tensorboard
    writer.add_scalar ('Train Loss ', train_acc, epoch)
    writer.add_scalar ('Train Accuracy ', train_loss, epoch)

    # ---------- Validation ----------
    model.eval ()

    valid_loss = []
    valid_accs = []

    for batch in tqdm (val_loader):
        imgs, labels = batch

        with torch.no_grad ():
            logits = model (imgs.to (device))

        loss = criterion (logits, labels.to (device))

        acc = (logits.argmax (dim=-1) == labels.to (device)).float ().mean ()

        valid_loss.append (loss.item ())
        valid_accs.append (acc)

    valid_loss = sum (valid_loss) / len (valid_loss)
    valid_acc = sum (valid_accs) / len (valid_accs)

    print (f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    valid_loss_list.append (valid_loss)
    valid_acc_list.append (valid_acc)

    # 保留在验证集上最好的模型
    if valid_acc > best_acc:
        best_acc = valid_acc
        torch.save (model.state_dict (), model_path)
        print ('saving model with acc {:.3f}'.format (best_acc))

    writer.add_scalar ('Valid Loss ', valid_acc, epoch)
    writer.add_scalar ('Valid Accuracy ', valid_loss, epoch)


In [None]:
# ------ Complete subplot functions in the following lines -------
plt.figure(figsize=(15, 10))

# Plot train loss with label, title, legend
plt.subplot(2, 2, 1)
plt.plot(train_acc_list,label='model_0_loss')  
plt.title('train_loss')
plt.xlabel('epochs')
plt.legend()

# Plot test loss
plt.subplot(2, 2, 2)
plt.plot(train_loss_list,label='model_0_loss')  
plt.title('test_loss')
plt.xlabel('epochs')
plt.legend()

# Plot train accuracy
plt.subplot(2, 2, 3)
plt.plot(valid_loss_list,label='model_0_acc') 
plt.title('train_acc')
plt.xlabel('epochs')
plt.legend()

valid_accuracy_list=[]
for i in valid_acc_list:
    valid_accuracy_list.append(i.tolist())
    
# Plot test accuracy
plt.subplot(2, 2, 4)
plt.plot(valid_accuracy_list,label='model_0_acc')   

plt.title('test_acc')
plt.xlabel('epochs')
plt.legend()


In [None]:
valid_accuracy_list=[]
for i in valid_acc_list:
    valid_accuracy_list.append(i.tolist())

In [None]:
valid_accuracy_list

Build the model and read the trained weights. First define the model to be used, and then define the name to save the model training weights

In [None]:


#model = get_resnext()
#model = get_resnet()
model = get_resnet18()
#model_path = r'model_save/resnet.pth'
# model_path = r'model_save/resnext.pth'
model_path = r'model_save/resnet18.pth'


In [None]:
test_path = r'dataset/test'


test_dataset = torchvision.datasets.ImageFolder (root=test_path, transform=get_transform())

test_loader = DataLoader (dataset=test_dataset, batch_size=32, shuffle=True)

In [None]:
num_ftrs = model.fc.in_features
model.fc = nn.Linear (num_ftrs, 5)
model.load_state_dict (torch.load (model_path))
model = model.to (device)

criterion = nn.CrossEntropyLoss ()

model.eval ()
test_loss = []
test_accs = []


In [None]:
test_acc = get_acc(model, test_loader, criterion)
print (f"Test  acc = {test_acc:.5f}")

Predict a single image and add a one-dimensional batch

In [None]:

def model_test(model, img_path):
    device = 'cuda'
    data_transform = transforms.Compose ([
        transforms.Resize ((224, 224)),
        transforms.ToTensor (),
    ])

    img = Image.open (img_path)
    img = data_transform (img)
    img = torch.unsqueeze (img, dim=0)

    model.eval ()
    with torch.no_grad ():
        output = model (img.to (device))
        number = torch.argmax (output.to ('cpu')[0]).numpy ().item ()
    return number



Build the model and read the trained weights，Show the names of the predicted images as text

In [None]:
if __name__ == '__main__':
    model = get_resnet18 ()
    device = get_device ()
    model.to (device)

    model_path = r'model_save/resnet18.pth'
    model.load_state_dict (torch.load (model_path))

  
    label_dict = {0:'Albedo', 1:'CC', 2:'CuChulainn', 3:'Gilgamesh', 4:'Sesshomaru'}
    img_pre_labels=[]
    count = 0
    for i in os.listdir(r'prediction'):
        img_path = r'prediction'+'/'+i
        img_pre_label = label_dict [model_test (model, img_path)]
        img_pre_labels.append(img_pre_label)

        print ('picture {} name is： {}'.format(i,img_pre_labels[count]))
        count+=1

Show the names of the predicted images as images

In [None]:
plt.figure(figsize=[20,20])    
count1=1
for i in os.listdir(r'prediction'):
        custom_image = r'prediction'+'/'+ i
        img = torchvision.io.read_image(custom_image)
        plt.subplot(10,4,count1)
        plt.imshow(img.permute(1, 2, 0))
        plt.title(f"Pred label: {img_pre_labels[count1-1]}")
        plt.axis(False)
        count1+=1