In [1]:
from google.colab import drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    workspace = '/content/drive/MyDrive/Colab Notebooks'
except:
    workspace = '.'
# !pip install torch==1.11.0 torchvision==0.12.0 torchaudio==0.12.0
# %pip install torch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0
# %pip install -U opencv-python
# %pip install -U opencv-contrib-python

Mounted at /content/drive


In [None]:
# load the images
import os
import csv
import cv2
import numpy as np
from sklearn import datasets
from pprint import pprint
from collections import namedtuple
from matplotlib import pyplot as plt
from skimage.feature import hog
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler

root = os.path.join(workspace, 'Figures_T7')
label_path = os.path.join(root, 'label.csv')



def labels2strs(labels: list) -> list:
    # Max length of each column
    lengths = np.max([[len(j) for j in i] for i in labels], axis=0)
    # Output format
    format_str = ' '.join(['{:^%ds}'%length for length in lengths])
    seps = ' '.join(['-'*length for length in lengths])
    contents = [format_str.format(*row) for row in labels]
    contents.insert(1, seps)
    return contents

class Sample:
    def __init__(self, idx=0, fname='', img=None, feat=None, label=None):
        self.idx = idx
        self.fname = fname
        self.img = img
        self.feat = feat
        self.label = label
        self.pred = None

if os.path.exists(label_path):
    with open(label_path) as f:
        flabels = list(csv.reader(f))
    print(*labels2strs(flabels), sep='\n')
else:
    raise ValueError('Invalid label file path [%s]'%label_path)


samples = {'train': [], 'val': []}


# index 0: heads [ID, filename, label, split]
for idx, fname, label, split in flabels[1:]:
    idx, label = int(idx), int(label)
    if idx % 4 == 0:
        plt.figure(figsize=(16, 4))
    plt.subplot(1, 4, idx%4+1)
    plt.title(f'{fname} in G{label}({split})')

    fpath = os.path.join(root, fname)
    if not os.path.isfile(fpath):
        raise ValueError('%s not found' % fpath)
    else:
        img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1] #BGR to RGB
        if idx == 0:
          H, W, C = img.shape
        else:
          img = cv2.resize(img, (W, H))

        plt.imshow(img)

        samples[split].append(Sample(idx, fname, img, None, label))
        

In [None]:
## extract features

def get_feat(img):
      return gray_histogram(img)
    #  return color_histogram(img)

def calc_distance(x, y):
    return L2_distance(x, y)

def gray_histogram(img: np.array, norm: bool = True) -> np.array:
    if img.shape[-1] == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    hist = np.array([len(img[img == i]) for i in range(256)])
    if norm:
        return hist / np.size(img)
    return hist

def color_histogram(img : np.array, norm : bool = True) -> np.array:
    return np.concatenate([gray_histogram(img[..., i], norm=norm) for i in range(3)])


def L2_distance(x, y):
    return ((x - y) ** 2).sum() ** 0.5

def L2_distance_sift(x, y):
    dist = ((x[:, None] - y[None, :])**2).sum(axis=-1).min(axis=-1)
    dist.sort()
    return dist[:15].mean()

# 以样本图像的灰度直方图对应的向量作为特征向量
for sample in samples['train']:
    sample.feat = get_feat(sample.img)

for sample in samples['val']:
    sample.feat = get_feat(sample.img)

In [None]:
print(sample.feat.shape)

(256,)


In [None]:
# use traditional classifier

from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler

train_samples = [sample.feat for sample in samples['train']]
train_labels = [sample.label for sample in samples['train']]


# StandardScaler() aims to normalize input dataset via (x-mean)/var
# SGDClassifier(): max_iter (iteration times); tol is used for early stop; when learning rate is constant, eta0 is seen as learning rate; log loss means logistic regression
# make_pipeline: conduct StandardScaler() first, then create classifier
classifier = make_pipeline(StandardScaler(), SGDClassifier(max_iter=50000, tol=1e-3, learning_rate='constant', eta0=0.1, loss='log_loss'))


# call classifier for training
classifier.fit(train_samples, train_labels)


test_samples = [sample.feat for sample in samples['val']]
test_labels = [sample.label for sample in samples['val']]

# call classifier for prediction
results = classifier.predict(test_samples)

# display the results
for sample, result in zip(samples['val'], results):
    sample.pred = result
    print(sample.fname, 'with label', sample.label, 'is predicted as', sample.pred)

2.jpg with label 0 is predicted as 0
7.jpg with label 0 is predicted as 1
10.jpg with label 1 is predicted as 1
11.jpg with label 0 is predicted as 1
13.jpg with label 1 is predicted as 1


In [None]:
from torch.functional import Tensor
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim

input_size = len(train_samples[0])
output_size = 1
learning_rate = 1


# To define a neural network class by pytorch, you have to inhert nn.Module class.
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()

        # input_size corresponds to input feature dimension.
        # output_size coressponds to class number.
        self.linear = nn.Linear(input_size, output_size)

        #Then, we use sigmoid activation function to gain the probability.
        # when probability is larger than 0.5, we treat it as positive 1. Else, we treat it as negative 0.
        self.sigmoid = nn.Sigmoid()

    # forward function is inherted from parent's class. x denotes the input feature.
    def forward(self, x):
        y_pred = self.linear(x)
        y_pred = self.sigmoid(y_pred)
        return y_pred


x_train = torch.tensor(train_samples).to(torch.float32)
y_train = torch.tensor(train_labels).reshape(-1, 1).to(torch.float32)
x_test = torch.tensor(test_samples).to(torch.float32)

# create model
model = SimpleNN()

# create loss function. BCE is binary cross entropy loss
criterion = nn.BCELoss()

# create optimizer. 1st parameter: the parameters will be optimized; 2nd: learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# training
# set training flag
model.train()
num_epochs = 50000
for epoch in range(num_epochs):
    if torch.cuda.is_available():
        inputs = Variable(x_train).cuda()
        target = Variable(y_train).cuda()
    else:
        inputs = Variable(x_train)
        target = Variable(y_train)

    # forward() function
    out = model(inputs)

    # calculate loss
    loss = criterion(out, target)

    # clear gradient
    optimizer.zero_grad()

    # backward propagation
    loss.backward()

    # Updating parameters via SGD
    optimizer.step()

    if (epoch+1) % 1000 == 0:
        print('Epoch[{}/{}], loss: {:.6f}'
              .format(epoch+1, num_epochs, loss.item()))

# testing
model.eval()
results = model(Variable(x_test))

# display the results
for sample, result in zip(samples['val'], results):
    sample.pred = 1 if result > 0.5 else 0
    print(sample.fname, 'with label', sample.label, 'is predicted as', sample.pred)

  x_train = torch.tensor(train_samples).to(torch.float32)


Epoch[1000/50000], loss: 0.521153
Epoch[2000/50000], loss: 0.430351
Epoch[3000/50000], loss: 0.373414
Epoch[4000/50000], loss: 0.333474
Epoch[5000/50000], loss: 0.303141
Epoch[6000/50000], loss: 0.278819
Epoch[7000/50000], loss: 0.258577
Epoch[8000/50000], loss: 0.241290
Epoch[9000/50000], loss: 0.226250
Epoch[10000/50000], loss: 0.212987
Epoch[11000/50000], loss: 0.201169
Epoch[12000/50000], loss: 0.190553
Epoch[13000/50000], loss: 0.180955
Epoch[14000/50000], loss: 0.172230
Epoch[15000/50000], loss: 0.164262
Epoch[16000/50000], loss: 0.156955
Epoch[17000/50000], loss: 0.150233
Epoch[18000/50000], loss: 0.144027
Epoch[19000/50000], loss: 0.138282
Epoch[20000/50000], loss: 0.132951
Epoch[21000/50000], loss: 0.127990
Epoch[22000/50000], loss: 0.123364
Epoch[23000/50000], loss: 0.119041
Epoch[24000/50000], loss: 0.114993
Epoch[25000/50000], loss: 0.111196
Epoch[26000/50000], loss: 0.107628
Epoch[27000/50000], loss: 0.104269
Epoch[28000/50000], loss: 0.101102
Epoch[29000/50000], loss: 0.0

In [None]:
hidden_size = input_size * 2

# see if a 3-layer NN helps
# display the results
class SimpleNN_3layer(nn.Module):
    def __init__(self):
        super(SimpleNN_3layer, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        y_pred = self.sigmoid(self.fc3(x))
        return y_pred

model = SimpleNN_3layer()
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1) # 原来1.0的学习率太大

# training and testing
model.train()
num_epochs = 50000
for epoch in range(num_epochs):
    if torch.cuda.is_available():
        inputs = Variable(x_train).cuda()
        target = Variable(y_train).cuda()
    else:
        inputs = Variable(x_train)
        target = Variable(y_train)

    # forward
    out = model(inputs)
    loss = criterion(out, target)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 1000 == 0:
        print('Epoch[{}/{}], loss: {:.6f}'
              .format(epoch+1, num_epochs, loss.item()))

model.eval()
results = model(Variable(x_test))

# display the results
for sample, result in zip(samples['val'], results):
    sample.pred = 1 if result > 0.5 else 0
    print(sample.fname, 'with label', sample.label, 'is predicted as', sample.pred)

Epoch[1000/50000], loss: 0.668511
Epoch[2000/50000], loss: 0.364247
Epoch[3000/50000], loss: 0.014159
Epoch[4000/50000], loss: 0.004353
Epoch[5000/50000], loss: 0.002321
Epoch[6000/50000], loss: 0.001524
Epoch[7000/50000], loss: 0.001114
Epoch[8000/50000], loss: 0.000867
Epoch[9000/50000], loss: 0.000704
Epoch[10000/50000], loss: 0.000589
Epoch[11000/50000], loss: 0.000504
Epoch[12000/50000], loss: 0.000439
Epoch[13000/50000], loss: 0.000388
Epoch[14000/50000], loss: 0.000347
Epoch[15000/50000], loss: 0.000313
Epoch[16000/50000], loss: 0.000285
Epoch[17000/50000], loss: 0.000261
Epoch[18000/50000], loss: 0.000240
Epoch[19000/50000], loss: 0.000222
Epoch[20000/50000], loss: 0.000207
Epoch[21000/50000], loss: 0.000193
Epoch[22000/50000], loss: 0.000181
Epoch[23000/50000], loss: 0.000171
Epoch[24000/50000], loss: 0.000161
Epoch[25000/50000], loss: 0.000152
Epoch[26000/50000], loss: 0.000145
Epoch[27000/50000], loss: 0.000138
Epoch[28000/50000], loss: 0.000131
Epoch[29000/50000], loss: 0.0

In [None]:
# load pre-trained ResNet for feature extraction
# add a few FC for traning (fix the weights of ResNet)
# test and display the results
import os
import time
import torch.nn as nn
import torch
import torchvision.transforms as transforms
from PIL import Image
from matplotlib import pyplot as plt
import torchvision.models as models
try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

# define device, gpu or cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# dataset construction
# This time we don't have to extract the features. Deep neural networks usually take the images as the input directly.

norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

inference_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])
def img_transform(img_rgb, transform=None):
    """
    transform images
    :param img_rgb: PIL Image
    :param transform: torchvision.transform
    :return: tensor
    """

    if transform is None:
        raise ValueError("there is no transform")

    img_t = transform(Image.fromarray(img_rgb))
    return img_t

# load data
train_imgs = [img_transform(sample.img, inference_transform) for sample in samples['train']]
train_imgs = torch.stack(train_imgs, dim=0)


test_imgs = [img_transform(sample.img, inference_transform) for sample in samples['val']]
test_imgs = torch.stack(test_imgs, dim=0)


# define a classifier following the network
class classification_head(nn.Module):
	def __init__(self,in_ch,num_classes):
		super(classification_head,self).__init__()
		self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
		self.fc = nn.Linear(in_ch,num_classes)

	def forward(self, x):
		x = self.avgpool(x)
		x = torch.flatten(x, 1)
		x = self.fc(x)
		return x


# define ResNet model.
# a simple tips here: you can follow the execute sequence in forward() to understand what a network is.
# For original ResNet, its final layer will output 1000 class number. Here, we change it for our task.
class Net(nn.Module):
	def __init__(self, num_class,pretrained=True):
		super(Net,self).__init__()
		model = models.resnet50(pretrained=pretrained)
		self.backbone =  nn.Sequential(*list(model.children())[:-2]) #remove the last Avgpool and Fully Connected Layer
		self.classification_head = classification_head(2048, num_class)

	def forward(self,x):
		x = self.backbone(x)
		output = self.classification_head(x)
		return output


# creat a model
model = Net(1)

# fix the weights of ResNet except the last layer. This is because the training set is small.
for p in model.backbone.parameters():
  p.requires_grad = False

model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

model.train()

# training
num_epochs = 200
for epoch in range(num_epochs):
    if torch.cuda.is_available():
        inputs = Variable(train_imgs).cuda()
        target = Variable(y_train).cuda()
    else:
        inputs = Variable(train_imgs)
        target = Variable(y_train)

    # forward
    out = model(inputs)
    loss = criterion(out, target)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 20 == 0:
        print('Epoch[{}/{}], loss: {:.6f}'
              .format(epoch+1, num_epochs, loss.item()))

#testing
model.eval()
results = model(Variable(test_imgs))

# display the results
for sample, result in zip(samples['val'], results):
    sample.pred = 1 if result > 0.5 else 0
    print(sample.fname, 'with label', sample.label, 'is predicted as', sample.pred)

Epoch[20/200], loss: 0.025402
Epoch[40/200], loss: 0.004448
Epoch[60/200], loss: 0.001825
Epoch[80/200], loss: 0.001019
Epoch[100/200], loss: 0.000634
Epoch[120/200], loss: 0.000418
Epoch[140/200], loss: 0.000286
Epoch[160/200], loss: 0.000200
Epoch[180/200], loss: 0.000143
Epoch[200/200], loss: 0.000103
2.jpg with label 0 is predicted as 0
7.jpg with label 0 is predicted as 0
10.jpg with label 1 is predicted as 1
11.jpg with label 0 is predicted as 0
13.jpg with label 1 is predicted as 1



```
以下内容为多标签分类任务的代码
```

In [None]:
test = os.path.join(workspace, "Figures_T7/1.jpg")
import cv2
import numpy as np
from skimage.feature import hog
from matplotlib import pyplot as plt

# Extract the feature of images
def gray_hist(img : np.array, norm : bool = True) -> np.array:
  if img.shape[-1] == 3:
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

  hist = np.array([len(img[img==i]) for i in range(256)])
  if norm:
    hist = hist / np.size(img)
  return hist

def color_hist(img : np.array, norm : bool = True) -> np.array:
  return np.array([gray_hist(img[..., i], norm) for i in range(3)])


def get_feat(img):
  return hog(img, channel_axis=-1)

img = cv2.imread(test)[..., ::-1] # To BGR channel
print(get_feat(img))

[0.20252343 0.02968957 0.03978538 ... 0.04491646 0.0716226  0.03410894]


In [2]:
import json
import os
root = os.path.join(workspace, "clothes")
meta_path = os.path.join(root, "meta.json")
meta = json.load(open(meta_path,'r'))

classes = meta['classes']
print(f"Classes : {classes}")

Classes : ['black', 'blue', 'brown', 'green', 'white', 'red', 'dress', 'pants', 'shorts', 'shoes', 'shirt']


In [3]:
import os

def list_dir_in_directory(directory):
  dirs = os.listdir(directory)
  d_list = [d for d in dirs if os.path.isdir(os.path.join(directory, d))]
  return d_list

d_list = list_dir_in_directory(root)
img_sets = {}

for d in d_list:
  img_sets[d] = []


images_info = meta['images']
for i, img_info in enumerate(images_info):
  # path = img_info['path']
  cls = img_info['class']
  # label = img_info['label']

  cls = cls[0] + "_" + cls[1]
  img_sets[cls].append(i) # 往对应的类别list中，添加(在整个images_info素材集中的)下标


import random
# 从每个类中随机挑选20个元素作为样本(训练+测试)
for cls in img_sets.keys():
  temp = random.sample(img_sets[cls], 20)
  img_sets[cls].clear()
  img_sets[cls] = temp


In [4]:
# print(img_sets)
for cls in img_sets.keys():
  lens = len(img_sets[cls])
  print(f"{cls} : {lens}")

green_shirt : 20
white_pants : 20
green_shoes : 20
red_dress : 20
white_shoes : 20
white_shorts : 20
red_shoes : 20
white_dress : 20
red_pants : 20
green_shorts : 20
blue_dress : 20
green_pants : 20
blue_pants : 20
brown_pants : 20
black_shorts : 20
blue_shoes : 20
blue_shorts : 20
brown_shorts : 20
blue_shirt : 20
brown_shoes : 20
black_pants : 20
black_dress : 20
black_shoes : 20
black_shirt : 20


In [5]:
import os
import cv2

class Sample:
  def __init__(self, img=None, classPair=None, labelList=None, feat=None) -> None:
      # self.path = path
      self.img = img
      self.classColor = classPair[0]
      self.classCos = classPair[1]

      self.labelColor = labelList[:6]
      self.labelCos = labelList[6:]
      self.feat = feat
      self.pred = None

samples = {'train': [], 'val': []}

# 从挑选好的样本集中(img_sets)，每类前17个样本用于训练，后3个用于测试
for cls in img_sets.keys():
  smp_list = img_sets[cls]
  for i, idx in enumerate(smp_list):
    img_info = images_info[idx]

    fpath = os.path.join(root, img_info['path'])
    if not os.path.isfile(fpath):
      raise ValueError('%s not found' % fpath)
    else:
      img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1] # BGR to RGB
      if len(samples['train']) == 0 and len(samples['val'])==0:
        H, W, C = img.shape
      else:
        cv2.resize(img, (W, H))

      if i < 17:
        samples['train'].append(Sample(img=img, classPair=img_info['class'], labelList=img_info['label']))
      else:
        samples['val'].append(Sample(img=img, classPair=img_info['class'], labelList=img_info['label']))

# print(samples)
# print(samples['train'][0].__dict__)

In [14]:
import torch

color_label = [sample.labelColor for sample in samples['train']]
cos_label = [sample.labelCos for sample in samples['train']]

# 将独热向量转换为类别索引
y_color_train = torch.argmax(torch.tensor(color_label), dim=1).to(torch.long)
y_cos_train = torch.argmax(torch.tensor(cos_label), dim=1).to(torch.long)

# print(y_color_train)
# print(y_cos_train)
print(y_color_train.shape)
print(y_cos_train.shape)

torch.Size([408])
torch.Size([408])


In [7]:
import os
import torch.nn as nn
import torch
import torchvision.transforms as transforms
from PIL import Image
from matplotlib import pyplot as plt
import torchvision.models as models

norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

inference_transform = transforms.Compose([
  transforms.Resize((256, 256)),
  transforms.ToTensor(),
  transforms.Normalize(norm_mean, norm_std),
])

def img_transform(img_rgb, transform=None):
  """
  transform images
  :param img_rgb: PIL Image
  :param transform: torchvision.transform
  :return: tensor
  """

  if transform is None:
    raise ValueError("there is no transform")

  img_t = transform(Image.fromarray(img_rgb))
  return img_t

# 加载数据
train_imgs = [img_transform(sample.img, inference_transform) for sample in samples['train']]
train_imgs = torch.stack(train_imgs, dim=0)


test_imgs = [img_transform(sample.img, inference_transform) for sample in samples['val']]
test_imgs = torch.stack(test_imgs, dim=0)


In [8]:
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict


# 设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 多任务学习框架
class Net(nn.Module):
  def __init__(self, num_color_classes, num_cos_classes):
      super().__init__()
      self.net = models.resnet18(pretrained=True) # 使用在ImageNet上预训练权重的ResNet18
      self.n_features = self.net.fc.in_features  # 得到最后一层的输入特征个数
      self.net.fc = nn.Identity() # 将最后一层替换，不做任何操作

      self.net.fc1 = nn.Sequential(OrderedDict(
          [('linear', nn.Linear(self.n_features,self.n_features)),
          ('relu1', nn.ReLU()),
          ('final', nn.Linear(self.n_features, num_color_classes))])) # 根据衣物颜色进行分类

      self.net.fc2 = nn.Sequential(OrderedDict(
          [('linear', nn.Linear(self.n_features,self.n_features)),
          ('relu1', nn.ReLU()),
          ('final', nn.Linear(self.n_features, num_cos_classes))]))  # 根据衣物种类进行分类

      # 冻结 ResNet 除了最后一层的权重参数
      for param in self.net.parameters():
          param.requires_grad = False  # 先将所有参数设置为不需要梯度

      # 仅允许最后的全连接层（自定义部分）进行训练
      for param in self.net.fc1.parameters():
          param.requires_grad = True
      for param in self.net.fc2.parameters():
          param.requires_grad = True


  def forward(self, x):
      features = self.net(x)
      color_head = self.net.fc1(features)
      cos_head = self.net.fc2(features)
      return color_head, cos_head

model = Net(num_color_classes=6, num_cos_classes=5)
model.to(device)

# 损失设计
color_criterion = nn.CrossEntropyLoss()
cos_criterion = nn.CrossEntropyLoss()

# 优化器
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 85.3MB/s]


In [15]:
import torch.nn as nn
import torch
from torch.autograd import Variable

model.train()

# 训练
num_epochs = 100
for epoch in range(num_epochs):
  if torch.cuda.is_available():
    inputs = Variable(train_imgs).cuda()
    color_target = Variable(y_color_train).cuda()
    cos_target = Variable(y_cos_train).cuda()
  else:
    inputs = Variable(train_imgs)
    color_target = Variable(y_color_train)
    cos_target = Variable(y_cos_train)


  # forward
  color_out, cos_out = model(inputs)
  loss_color = color_criterion(color_out, color_target)
  loss_cos = cos_criterion(cos_out, cos_target)

  # total loss
  total_loss = loss_color + loss_cos

  # backward
  optimizer.zero_grad()
  total_loss.backward()
  optimizer.step()

  if (epoch+1) % 10 == 0:
    print(f'Epoch[{epoch+1}/{num_epochs}] Color Loss: {loss_color.item()}, Type Loss: {loss_cos.item()}, Total Loss: {total_loss.item()}')

Epoch[10/100] Color Loss: 0.6639485359191895, Type Loss: 0.3926098644733429, Total Loss: 1.05655837059021
Epoch[20/100] Color Loss: 0.21719376742839813, Type Loss: 0.11930090188980103, Total Loss: 0.33649468421936035
Epoch[30/100] Color Loss: 0.08084291219711304, Type Loss: 0.04548873007297516, Total Loss: 0.1263316422700882
Epoch[40/100] Color Loss: 0.0349988155066967, Type Loss: 0.019477052614092827, Total Loss: 0.05447586625814438
Epoch[50/100] Color Loss: 0.018284335732460022, Type Loss: 0.010516917333006859, Total Loss: 0.02880125306546688
Epoch[60/100] Color Loss: 0.011568279005587101, Type Loss: 0.006812244653701782, Total Loss: 0.01838052272796631
Epoch[70/100] Color Loss: 0.008350650779902935, Type Loss: 0.004965667612850666, Total Loss: 0.013316318392753601
Epoch[80/100] Color Loss: 0.006524668075144291, Type Loss: 0.0038963970728218555, Total Loss: 0.010421065613627434
Epoch[90/100] Color Loss: 0.005334836430847645, Type Loss: 0.0031968243420124054, Total Loss: 0.00853166077

In [22]:
import numpy as np
import torch
from sklearn.metrics import f1_score

# 测试数据集
y_color_test = np.array([sample.labelColor for sample in samples['val']])
y_color_test = np.argmax(y_color_test, axis=1)
y_cos_test = np.array([sample.labelCos for sample in samples['val']])
y_cos_test = np.argmax(y_cos_test, axis=1)

# 在训练完成后，使用模型进行预测
model.eval()
with torch.no_grad():
  outputs_color, outputs_cos = model(test_imgs)
  predicted_color = torch.sigmoid(outputs_color).cpu().numpy()
  predicted_cos = torch.sigmoid(outputs_cos).cpu().numpy()

  # 基于预测结果计算F1-score
  y_color_pred = np.argmax(predicted_color, axis=1)
  y_cos_pred = np.argmax(predicted_cos, axis=1)

  f1_color = f1_score(y_color_test, y_color_pred, average='weighted')
  f1_cos = f1_score(y_cos_test, y_cos_pred, average='weighted')

  print(f'F1-score for color classification: {f1_color}')
  print(f'F1-score for type classification: {f1_cos}')

F1-score for color classification: 0.8897808321946253
F1-score for type classification: 0.944295900178253
