## 导入包

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
from copy import deepcopy
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.utils.data.sampler import RandomSampler
import matplotlib.pyplot as plt
import cv2

## 配置

In [2]:
# 需要修改成的数据大小
dsize = 128
# 最大学习率(优化器)
max_lr = 0.012 
# 正则项权值的衰减(优化器)
weight_decay = 1e-4 
# 一般0.9 (优化器)
momentum = 0.9 
# 最小学习率(退火学习)
min_lr = 0.00001
# 设置GPU运行
device = torch.device('cuda')
# 退火学习，下降次数
scheduler_step = 40
# 打包个数
batch_size = 5
# 运行批次
epoch_Num = 100

## 文件路径

In [3]:
src = './competition_data'
save_weight_path = src + '/weight'
train_image_dir = src + '/train/images'
train_mask_dir = src + '/train/masks'
test_image_dir = src + '/test/images'

## 获取数据id

In [4]:
depths = pd.read_csv(src + '/train.csv')
fold = (list(range(5))*1000)[:len(depths)] # [0,1,2,3,4,0,1,2...]
depths['fold'] = fold # 将数据标记为五份
all_ids = depths['id'].values # 取出所有id


## 图片id分为五类

In [5]:
fold = []
for i in range(5):
  tem = depths.loc[depths['fold']==i,'id'].values
  fold.append(tem)

## 获取图片（输入，输出）

In [6]:
def get_train_images(ids):
  images = []
  masks = []
  for id in ids:
    image = plt.imread(train_image_dir+'/'+id+'.png')[:,:,0:3]
    mask = plt.imread(train_mask_dir+'/'+id+'.png')
    masks.append(mask)
    images.append(image)
  return images,masks

## 创建数据集类型
数据集类型有三个常用魔法方法
1. 初始化（获取参数）
2. 获取数据（数据处理，返回数据）
3. 获取数据集长度（返回数据集长度）

In [7]:
# 训练数据集
class TensorDataset(Dataset):
  def __init__(self, data, target):
    self.data = data
    self.target = target

  def __getitem__(self, index):
    # 改变尺寸，并且变为张量
    resolved_data = torch.Tensor(
      cv2.resize(self.data[index], dsize=(dsize,dsize))
    ).reshape(3,dsize,dsize)
    # 改变尺寸，并且变为张量
    resolved_target = torch.Tensor(
      cv2.resize(self.target[index], dsize=(dsize,dsize))
    ).reshape(1,dsize,dsize)
    # 返回
    # (3,128,128),(1,128,128)
    return resolved_data,resolved_target

  def __len__(self):
    return len(self.data)

## 我的模型(全连接)

In [8]:
class encode_model(nn.Module):
  def __init__(self,in_channels,out_channels,kernel_size,stride=4,padding=0) -> None:
    super().__init__()

    self.layer = nn.Sequential(
      nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding),
      nn.BatchNorm2d(num_features=out_channels),
      nn.ReLU(inplace=True)
    )
  def forward(self,input):
    return self.layer(input)

class decode_model(nn.Module):
  def __init__(self,in_channels,out_channels,scale) -> None:
    super().__init__()

    self.layer = nn.Sequential(
      nn.ConvTranspose2d(in_channels=in_channels,out_channels=out_channels,kernel_size=scale,stride=scale,padding=0),
      nn.ReLU(inplace=True)
    )
  def forward(self,input):
    return self.layer(input)


class test_model(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    # 编码
    self.layer_0 = encode_model(in_channels=3,out_channels=8,kernel_size=4,stride=4)
    self.layer_1 = encode_model(in_channels=8,out_channels=64,kernel_size=3,stride=4)
    self.layer_2 = encode_model(in_channels=64,out_channels=128,kernel_size=3,stride=4)
    # 全连接
    self.layer_3 = nn.Sequential(
      nn.Linear(in_features=128*2*2,out_features=256),
      nn.ReLU(inplace=True),
      nn.Linear(in_features=256,out_features=512),
      nn.ReLU(inplace=True),
      nn.Linear(in_features=512,out_features=2048),
      nn.ReLU(inplace=True)
      )
    # 解码
    self.layer_4 = decode_model(in_channels=512,out_channels=256,scale=4)
    self.layer_5 = decode_model(in_channels=256,out_channels=128,scale=4)
    self.layer_6 = decode_model(in_channels=128,out_channels=64,scale=4)
    # 最后一层
    self.layer_7 = nn.Sequential(
      nn.Conv2d(64, 32, kernel_size=3, padding=1, bias=False),
      nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
      nn.Conv2d(in_channels=64,out_channels=1,stride=1,kernel_size=1)
    )

  def forward(self,input):
    x0 = self.layer_0(input) # torch.Size([12, 8, 32, 32])
    x1 = self.layer_1(x0) # torch.Size([12, 64, 8, 8])
    x2 = self.layer_2(x1) # torch.Size([12, 128, 2, 2])

    x3 = self.layer_3(x2.reshape(-1,128*2*2)).reshape(-1,512,2,2) # torch.Size([12, 256, 8, 8])

    x4 = self.layer_4(x3) # torch.Size([12, 256, 8, 8])
    x5 = self.layer_5(x4) # torch.Size([12, 128, 32, 32])
    x6 = self.layer_6(x5) # torch.Size([12, 64, 128, 128])
    output = self.layer_7(x6)# torch.Size([12, 1, 128, 128])

    return output

In [9]:
# inp = torch.Tensor(12,3,128,128)
# models = test_model()
# outp = models(inp)
# outp.size()

## 获取模型

In [10]:
def get_model(key):
  model = test_model()
  model.load_state_dict(torch.load("./weight/" + key+ ".pth"))
  return model

## 创建模型对象

In [11]:
salt = test_model()
# salt = get_model("epoch_best")

# GPU 运算
salt.to(device=device)

test_model(
  (layer_0): encode_model(
    (layer): Sequential(
      (0): Conv2d(3, 8, kernel_size=(4, 4), stride=(4, 4))
      (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (layer_1): encode_model(
    (layer): Sequential(
      (0): Conv2d(8, 64, kernel_size=(3, 3), stride=(4, 4))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (layer_2): encode_model(
    (layer): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(4, 4))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (layer_3): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=512, out_features=2048, b

## 进行一次训练

In [12]:

optimizer = torch.optim.SGD(salt.parameters(), lr=max_lr, momentum=momentum, weight_decay=weight_decay)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, scheduler_step, min_lr)

def train(loader_data,model):
  running_loss = 0
  model.train()
  for input,mask in loader_data:
    input, mask = input.to(device), mask.to(device)
    
    optimizer.zero_grad()# 梯度初始化为零
    # 使用with，会自动关闭梯度计算
    # 设置梯度可算
    with torch.set_grad_enabled(True):
      logit = model(input)# 进行一次计算
      loss = nn.BCEWithLogitsLoss()(logit.squeeze(),mask.squeeze())# 计算误差
      loss.backward()# 反馈
      optimizer.step()# 进行一次参数更新
    running_loss += loss.item()*input.size()[0]# 累计平均误差
  epoch_loss = running_loss / len(loader_data)# 计算平均误差
  return epoch_loss

## 进行一次测试

In [13]:
def test(loader_test,model):
  running_loss = 0.0
  data_size = len(loader_test)
  # 测试
  model.eval()
  for input, mask in loader_test:
    input, mask = input.to(device), mask.to(device)
    with torch.set_grad_enabled(False):
      output = model(input)
      loss = nn.BCEWithLogitsLoss()(output.squeeze(), mask.squeeze())
    running_loss += loss.item() * input.size(0)
  return running_loss/data_size

## 主函数部分

In [14]:
for idx in range(5):
  if idx == 1:
    break

  optimizer = torch.optim.SGD(salt.parameters(), lr=max_lr, momentum=momentum, weight_decay=weight_decay)
  lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, scheduler_step, min_lr)

  # setdiff1d 取不同的元素
  train_id = np.setdiff1d(all_ids, fold[idx])
  val_id = fold[idx]
  # 取出数据
  X_train, y_train = get_train_images(train_id)
  X_val, y_val = get_train_images(val_id)
  # 制作数据集
  train_data = TensorDataset(X_train, y_train)
  val_data = TensorDataset(X_val, y_val)
  # 打乱，制作可迭代数据集
  train_loader = DataLoader(train_data,shuffle=True,batch_size=batch_size) 
  val_loader = DataLoader(val_data,shuffle=False,batch_size=batch_size) 

  # num_snapshot = 0
  lowest_loss = 10000
  # last_train_loss = 0.0
# 训练
  for epoch_ in range(epoch_Num): # 100
    train_loss = train(train_loader, salt)
    last_train_loss = train_loss
    val_loss = test(val_loader, salt)
    # 每训练一次调整学习率（退火学习）
    if (epoch_ < scheduler_step-1):
      lr_scheduler.step()

    #
    if epoch_ % 10 == 0:
      torch.save(salt.state_dict(), "./test_weight/"+ "epoch_" + str(epoch_) + '.pth')

    if lowest_loss > val_loss:
      lowest_loss = val_loss
      best_param = salt.state_dict()
      torch.save(salt.state_dict(), "./test_weight/"+ "epoch_best" + '.pth')

    # 调节一个
    # if (epoch_ + 1) % scheduler_step == 0:
      # torch.save(best_param, "./weight/" + str(idx) +"_"+ str(num_snapshot) + '.pth')
      # 重置优化器，以及退火学习
      # optimizer = torch.optim.SGD(salt.parameters(), lr=max_lr, momentum=momentum, weight_decay=weight_decay)
      # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, scheduler_step, min_lr)
      # num_snapshot += 1
      # lowest_loss = 10000

    print('epoch: {} train_loss: {:.4f} val_loss: {:.4f}  lr: {:.6f}'.format(epoch_ + 1, train_loss*100, val_loss*100, lr_scheduler.get_last_lr()[0]))

epoch: 1 train_loss: 283.9057 val_loss: 275.2072  lr: 0.011982
epoch: 2 train_loss: 281.6325 val_loss: 275.1507  lr: 0.011926
epoch: 3 train_loss: 281.4827 val_loss: 275.8826  lr: 0.011834
epoch: 4 train_loss: 281.5008 val_loss: 275.0802  lr: 0.011707
epoch: 5 train_loss: 281.6345 val_loss: 275.2994  lr: 0.011544
epoch: 6 train_loss: 281.4724 val_loss: 275.5171  lr: 0.011347
epoch: 7 train_loss: 281.5463 val_loss: 275.2286  lr: 0.011117
epoch: 8 train_loss: 281.4675 val_loss: 275.0744  lr: 0.010855
epoch: 9 train_loss: 281.5360 val_loss: 275.4124  lr: 0.010564
epoch: 10 train_loss: 281.6531 val_loss: 275.3764  lr: 0.010244


In [None]:
# img = plt.imread(train_image_dir+"/" +all_ids[2] + ".png")

# plt.imshow(img,"./sdfds.png") 3.9110 3.7660
# plt.show()

## torch 保存参数

|操作|函数|
|-|-|
|保存|torch.save(model.state_dict(),path)|
|读取|model.load_state_dict(torch.load(path))|

## torch 保存模型

|操作|函数|
|-|-|
|保存|torch.save(model,path)|
|读取|model = torch.load(path)|