In [1]:
from google.colab import drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    workspace = '/content/drive/MyDrive/Colab Notebooks'
except:
    workspace = '.'
# !pip install torch==1.11.0 torchvision==0.12.0 torchaudio==0.12.0
# %pip install torch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0
# %pip install -U opencv-python
# %pip install -U opencv-contrib-python

Mounted at /content/drive


In [2]:
import json
import os
root = os.path.join(workspace, "clothes")
meta_path = os.path.join(root, "meta.json")
meta = json.load(open(meta_path,'r'))

classes = meta['classes']
print(f"Classes : {classes}")

Classes : ['black', 'blue', 'brown', 'green', 'white', 'red', 'dress', 'pants', 'shorts', 'shoes', 'shirt']


In [3]:
import os

def list_dir_in_directory(directory):
  dirs = os.listdir(directory)
  d_list = [d for d in dirs if os.path.isdir(os.path.join(directory, d))]
  return d_list

d_list = list_dir_in_directory(root)
img_sets = {}

for d in d_list:
  img_sets[d] = []


images_info = meta['images']
for i, img_info in enumerate(images_info):
  # path = img_info['path']
  cls = img_info['class']
  # label = img_info['label']

  cls = cls[0] + "_" + cls[1]
  img_sets[cls].append(i) # 往对应的类别list中，添加(在整个images_info素材集中的)下标


import random
# 从每个类中随机挑选20个元素作为样本(训练+测试)
for cls in img_sets.keys():
  temp = random.sample(img_sets[cls], 20)
  img_sets[cls].clear()
  img_sets[cls] = temp


In [5]:
import os
import cv2

class Sample:
  def __init__(self, img=None, classPair=None, labelList=None, feat=None) -> None:
      # self.path = path
      self.img = img
      self.classColor = classPair[0]
      self.classCos = classPair[1]

      self.labelColor = labelList[:6]
      self.labelCos = labelList[6:]
      self.feat = feat
      self.pred = None

samples = {'train': [], 'val': []}

# 从挑选好的样本集中(img_sets)，每类前17个样本用于训练，后3个用于测试
for cls in img_sets.keys():
  smp_list = img_sets[cls]
  for i, idx in enumerate(smp_list):
    img_info = images_info[idx]

    fpath = os.path.join(root, img_info['path'])
    if not os.path.isfile(fpath):
      raise ValueError('%s not found' % fpath)
    else:
      img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1] # BGR to RGB
      if len(samples['train']) == 0 and len(samples['val'])==0:
        H, W, C = img.shape
      else:
        cv2.resize(img, (W, H))

      if i < 17:
        samples['train'].append(Sample(img=img, classPair=img_info['class'], labelList=img_info['label']))
      else:
        samples['val'].append(Sample(img=img, classPair=img_info['class'], labelList=img_info['label']))

# print(samples)
# print(samples['train'][0].__dict__)

In [14]:
import torch

color_label = [sample.labelColor for sample in samples['train']]
cos_label = [sample.labelCos for sample in samples['train']]

# 将独热向量转换为类别索引
y_color_train = torch.argmax(torch.tensor(color_label), dim=1).to(torch.long)
y_cos_train = torch.argmax(torch.tensor(cos_label), dim=1).to(torch.long)

# print(y_color_train)
# print(y_cos_train)
print(y_color_train.shape)
print(y_cos_train.shape)

torch.Size([408])
torch.Size([408])


In [7]:
import os
import torch.nn as nn
import torch
import torchvision.transforms as transforms
from PIL import Image
from matplotlib import pyplot as plt
import torchvision.models as models

norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

inference_transform = transforms.Compose([
  transforms.Resize((256, 256)),
  transforms.ToTensor(),
  transforms.Normalize(norm_mean, norm_std),
])

def img_transform(img_rgb, transform=None):
  """
  transform images
  :param img_rgb: PIL Image
  :param transform: torchvision.transform
  :return: tensor
  """

  if transform is None:
    raise ValueError("there is no transform")

  img_t = transform(Image.fromarray(img_rgb))
  return img_t

# 加载数据
train_imgs = [img_transform(sample.img, inference_transform) for sample in samples['train']]
train_imgs = torch.stack(train_imgs, dim=0)


test_imgs = [img_transform(sample.img, inference_transform) for sample in samples['val']]
test_imgs = torch.stack(test_imgs, dim=0)


In [8]:
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict


# 设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 多任务学习框架
class Net(nn.Module):
  def __init__(self, num_color_classes, num_cos_classes):
      super().__init__()
      self.net = models.resnet18(pretrained=True) # 使用在ImageNet上预训练权重的ResNet18
      self.n_features = self.net.fc.in_features  # 得到最后一层的输入特征个数
      self.net.fc = nn.Identity() # 将最后一层替换，不做任何操作

      self.net.fc1 = nn.Sequential(OrderedDict(
          [('linear', nn.Linear(self.n_features,self.n_features)),
          ('relu1', nn.ReLU()),
          ('final', nn.Linear(self.n_features, num_color_classes))])) # 根据衣物颜色进行分类

      self.net.fc2 = nn.Sequential(OrderedDict(
          [('linear', nn.Linear(self.n_features,self.n_features)),
          ('relu1', nn.ReLU()),
          ('final', nn.Linear(self.n_features, num_cos_classes))]))  # 根据衣物种类进行分类

      # 冻结 ResNet 除了最后一层的权重参数
      for param in self.net.parameters():
          param.requires_grad = False  # 先将所有参数设置为不需要梯度

      # 仅允许最后的全连接层（自定义部分）进行训练
      for param in self.net.fc1.parameters():
          param.requires_grad = True
      for param in self.net.fc2.parameters():
          param.requires_grad = True


  def forward(self, x):
      features = self.net(x)
      color_head = self.net.fc1(features)
      cos_head = self.net.fc2(features)
      return color_head, cos_head

model = Net(num_color_classes=6, num_cos_classes=5)
model.to(device)

# 损失设计
color_criterion = nn.CrossEntropyLoss()
cos_criterion = nn.CrossEntropyLoss()

# 优化器
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 85.3MB/s]


In [15]:
import torch.nn as nn
import torch
from torch.autograd import Variable

model.train()

# 训练
num_epochs = 100
for epoch in range(num_epochs):
  if torch.cuda.is_available():
    inputs = Variable(train_imgs).cuda()
    color_target = Variable(y_color_train).cuda()
    cos_target = Variable(y_cos_train).cuda()
  else:
    inputs = Variable(train_imgs)
    color_target = Variable(y_color_train)
    cos_target = Variable(y_cos_train)


  # forward
  color_out, cos_out = model(inputs)
  loss_color = color_criterion(color_out, color_target)
  loss_cos = cos_criterion(cos_out, cos_target)

  # total loss
  total_loss = loss_color + loss_cos

  # backward
  optimizer.zero_grad()
  total_loss.backward()
  optimizer.step()

  if (epoch+1) % 10 == 0:
    print(f'Epoch[{epoch+1}/{num_epochs}] Color Loss: {loss_color.item()}, Type Loss: {loss_cos.item()}, Total Loss: {total_loss.item()}')

Epoch[10/100] Color Loss: 0.6639485359191895, Type Loss: 0.3926098644733429, Total Loss: 1.05655837059021
Epoch[20/100] Color Loss: 0.21719376742839813, Type Loss: 0.11930090188980103, Total Loss: 0.33649468421936035
Epoch[30/100] Color Loss: 0.08084291219711304, Type Loss: 0.04548873007297516, Total Loss: 0.1263316422700882
Epoch[40/100] Color Loss: 0.0349988155066967, Type Loss: 0.019477052614092827, Total Loss: 0.05447586625814438
Epoch[50/100] Color Loss: 0.018284335732460022, Type Loss: 0.010516917333006859, Total Loss: 0.02880125306546688
Epoch[60/100] Color Loss: 0.011568279005587101, Type Loss: 0.006812244653701782, Total Loss: 0.01838052272796631
Epoch[70/100] Color Loss: 0.008350650779902935, Type Loss: 0.004965667612850666, Total Loss: 0.013316318392753601
Epoch[80/100] Color Loss: 0.006524668075144291, Type Loss: 0.0038963970728218555, Total Loss: 0.010421065613627434
Epoch[90/100] Color Loss: 0.005334836430847645, Type Loss: 0.0031968243420124054, Total Loss: 0.00853166077

In [22]:
import numpy as np
import torch
from sklearn.metrics import f1_score

# 测试数据集
y_color_test = np.array([sample.labelColor for sample in samples['val']])
y_color_test = np.argmax(y_color_test, axis=1)
y_cos_test = np.array([sample.labelCos for sample in samples['val']])
y_cos_test = np.argmax(y_cos_test, axis=1)

# 在训练完成后，使用模型进行预测
model.eval()
with torch.no_grad():
  outputs_color, outputs_cos = model(test_imgs)
  predicted_color = torch.sigmoid(outputs_color).cpu().numpy()
  predicted_cos = torch.sigmoid(outputs_cos).cpu().numpy()

  # 基于预测结果计算F1-score
  y_color_pred = np.argmax(predicted_color, axis=1)
  y_cos_pred = np.argmax(predicted_cos, axis=1)

  f1_color = f1_score(y_color_test, y_color_pred, average='weighted')
  f1_cos = f1_score(y_cos_test, y_cos_pred, average='weighted')

  print(f'F1-score for color classification: {f1_color}')
  print(f'F1-score for type classification: {f1_cos}')

F1-score for color classification: 0.8897808321946253
F1-score for type classification: 0.944295900178253
