In [1]:
import argparse
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm, trange

# 读取数据
数据来源：scene

In [2]:
x_list = []
y_list = []
for pic in tqdm(os.listdir('scene')):
    pic_path = './scene/' + pic
    # 读取RGB三通道图像(640, 640, 3)
    pic_data = cv2.imread(pic_path, cv2.IMREAD_COLOR)
    pic_data = cv2.resize(pic_data, (224, 224))
    x_list.append(pic_data)
    y_list.append(int(pic[6:8]))
x_list = np.array(x_list)
y_list_int = np.array(y_list)

scene_label = pd.read_excel('scene_label.xlsx')
y_list = np.zeros((y_list_int.shape[0], 20))
for i in trange(y_list_int.shape[0]):
    y_list[i, scene_label[scene_label.id==y_list_int[i]].iloc[:, 1:].dropna(axis=1).astype(int).to_numpy()[0].tolist()] = 1

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

# 假设标签数据保存在label_list中，其中每个标签是一个整数
X = x_list
y = y_list

# 分层抽样，其中train_size和test_size分别表示训练集和测试集的比例
# n_splits表示抽取的次数，random_state表示随机数种子
split = StratifiedShuffleSplit(n_splits=1, train_size=0.8, test_size=0.2, random_state=42)
train_index, test_index = next(split.split(X, y))

# 得到训练集和测试集
X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

# 将训练集进一步划分为训练集和验证集，其中test_size表示验证集的比例
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_index, valid_index = next(split.split(X_train, y_train))

# 得到训练集、验证集和测试集的索引
train_index = [train_index[i] for i in range(len(train_index))]
valid_index = [valid_index[i] for i in range(len(valid_index))]
test_index = [test_index[i] for i in range(len(test_index))]

# 分类器

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from tqdm.notebook import tqdm, trange

# 定义超参数
batch_size = 8
learning_rate = 0.0001
num_epochs = 100

# 设置 GPU
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

class MyDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img = self.data[index]
        label = self.labels[index]
        if self.transform is not None:
            img = self.transform(img)
        return img, label

# 加载数据集
train_data = x_list[train_index]
train_labels = y_list[train_index]
valid_data = x_list[valid_index]
valid_labels = y_list[valid_index]
test_data = x_list[test_index]
test_labels = y_list[test_index]

# # 定义数据增强和标准化
# # 在scene数据集中只做了标准化
transform = transforms.Compose([
#     transforms.RandomAffine(5),
#     transforms.ColorJitter(hue=.05, saturation=.05),
#     transforms.RandomCrop((88, 88)),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomVerticalFlip(),
    transforms.ToTensor(), # 转换为张量
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 标准化张量
])

# 加载数据集
train_dataset = MyDataset(train_data, train_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataset = MyDataset(valid_data, valid_labels, transform=transform)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
test_dataset = MyDataset(test_data, test_labels, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [5]:
import torchvision

model = torchvision.models.densenet121(weights=False)

num_fits = model.classifier.in_features
model.classifier = nn.Linear(num_fits, 20)
model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)



In [None]:
# 训练模型
loss_train = []
loss_valid = []
for epoch in trange(num_epochs):
    running_loss = 0.0
    model.train()
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_dataset)
    print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, epoch_loss))
    torch.save(model.state_dict(), "./classifier/model-DenseNet121-notrain-scene/epoch-%d.pt" % epoch)
    loss_train.append(epoch_loss)
    
    model.eval()
    with torch.no_grad():
        running_loss = 0.0
        for inputs, labels in tqdm(valid_loader):
            inputs, labels = inputs.to(device), labels
            outputs = model(inputs)
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(valid_dataset)
        print('Loss of the model on the valid images: %f' % loss)
    loss_valid.append(epoch_loss)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [1/100], Loss: 4.8147


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 4.504932


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [2/100], Loss: 4.8178


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 4.505998


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [3/100], Loss: 4.7946


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 4.774998


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [4/100], Loss: 4.8048


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 4.462124


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [5/100], Loss: 4.7931


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 5.365833


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch [6/100], Loss: 4.8021


  0%|          | 0/50 [00:00<?, ?it/s]

Loss of the model on the valid images: 5.368299


  0%|          | 0/200 [00:00<?, ?it/s]

# 评价

In [None]:
y_pred = []
y_true = []
# 测试模型（这里是针对单个杯子设计的准确率，多个杯子需修改代码）
model.eval()
with torch.no_grad():
    for inputs, labels in tqdm(test_loader):
        inputs, labels = inputs.to(device), labels
        outputs = model(inputs)
        predicted = np.int64(np.array(outputs.data.cpu()) > 0)
        y_pred.append(predicted)
        y_true.append(np.int64(np.array(labels.cpu())))

In [None]:
y_pred = np.row_stack(y_pred)
y_true = np.row_stack(y_true)

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, precision_score, recall_score, f1_score

# 计算混淆矩阵
mcm = multilabel_confusion_matrix(y_true, y_pred)

# 计算精确度、召回率和F1分数
precision = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

# 打印结果
print("Multilabel Confusion Matrix:")
print(mcm)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)