In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
from copy import deepcopy
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.utils.data.sampler import RandomSampler
import matplotlib.pyplot as plt

### GPU运行工具，后面会用

In [2]:
device = torch.device('cuda')

### 创建根目录，利用跟目录找到其他目录

In [5]:
src = './competition_data'
save_weight_path = src + '/weight'
train_image_dir = src + '/train/images'
train_mask_dir = src + '/train/masks'
test_image_dir = src + '/test/images'

1. 读取depth数据
2. 按z的大小排序，替换
3. 删除z列，替换
4. 分组


In [6]:
depths = pd.read_csv(src + '/depths.csv')
depths.sort_values('z', inplace=True)
depths.drop('z', axis=1, inplace=True)
depths['fold'] = (list(range(0,5)) * depths.shape[0])[:depths.shape[0]]

1. 读取train的数据
2. 合并数据
3. 创建空数列dist
4. 读取图片，灰度图（101，101）
5. 计算图片不同像素值个数
6. 存入dist

In [7]:
train_df = pd.read_csv(src + '/train.csv')
train_df = train_df.merge(depths)
dist = []

for id in train_df.id.values:
  # f的使用就是将大括号内的变量转为字符
  img = plt.imread(train_image_dir+f'/{id}.png')
  dist.append(np.unique(img).shape[0])
train_df['unique_pixels'] = dist

1. 获取id列表
2. 根据fold对数据进行分组（5组）

In [8]:
all_id = train_df['id'].values
fold = []
for i in range(5):
  fold.append(train_df.loc[train_df['fold']==i, 'id'].values)

## 扩张
|函数|作用|描述|
|-|-|-|
|ConvTranspose2d|扩张，倍数与stride有关|输入有限制|

In [9]:
ConvTranspose2d = nn.ConvTranspose2d(64,32,2,2)
input  = torch.Tensor(64,64,32,32)
print('input',input.shape)
output = ConvTranspose2d(input)
print('output',output.shape)

input torch.Size([64, 64, 32, 32])
output torch.Size([64, 32, 64, 64])


## 归一化
|函数|作用|描述|
|-|-|-|
|BatchNorm2d|归一化，防止梯度消失或则过大|输入有限制，且对输出维度不影响|

In [10]:
BatchNorm2d = nn.BatchNorm2d(num_features=64)
input  = torch.Tensor(64,64,32,32)
print('input',input.shape)
output = BatchNorm2d(input)
print('output',output.shape)

input torch.Size([64, 64, 32, 32])
output torch.Size([64, 64, 32, 32])


## 最大池化
|函数|作用|描述|
|-|-|-|
|BatchNorm2d|最大池化，缩小倍数与stride有关，防止过拟合|输入无限制，|

In [11]:
BatchNorm2d = nn.MaxPool2d(kernel_size=2,stride=2,dilation=1)
input  = torch.Tensor(64,64,32,32)
print('input',input.shape)
output = BatchNorm2d(input)
print('output',output.shape)

input torch.Size([64, 64, 32, 32])
output torch.Size([64, 64, 16, 16])


## 卷积
|函数|作用|描述|
|-|-|-|
|Conv2d|卷积层，增加数据深度，<br>相比于全连接参数更少，适合更深的模型|输入有限制，<br>参数调节输出深度，以及长度宽度|

In [12]:
Conv2d= nn.Conv2d(in_channels=1,out_channels=8,kernel_size=7,stride=2,padding=2)
input  = torch.Tensor(64,1,32,32)
print('input',input.shape)
output = Conv2d(input)
print('output',output.shape)

input torch.Size([64, 1, 32, 32])
output torch.Size([64, 8, 15, 15])


## 全连接
|函数|作用|描述|
|-|-|-|
|Linear|全连接层，所有变量都与参数相连接，<br>拟合能力强，但是运算量大|输入只能是1维，<br>调节输出长度|

In [18]:
Linear= nn.Linear(in_features=64*32*32,out_features=3)
input  = torch.Tensor(64,1,32,32).reshape(-1)
print('input',input.shape)
output = Linear(input)
print('output',output.shape)

input torch.Size([65536])
output torch.Size([3])


# 激活函数
|函数|作用|描述|
|-|-|-|
|ReLU|激活函数<br>是神经网络能够拟合非线性的关键<br>不改变数据维度|输入无限制<br>输出无限制|

In [31]:
ReLU= nn.ReLU()
input  = torch.Tensor(64,1,32,32)
print('input',input.shape)
output = ReLU(input)
print('output',output.shape)

input torch.Size([64, 1, 32, 32])
output torch.Size([64, 1, 32, 32])


# 上采样
|函数|作用|描述|
|-|-|-|
|Upsample|上采样层<br>和转置卷积类似<br>用于扩张|scale_factor决定扩大倍数|
|UpsamplingBilinear2d|线性上层采样<br>||
|UpsamplingNearest2d|最近上层采样||

In [4]:

Upsample= nn.UpsamplingBilinear2d(scale_factor=2)
input  = torch.Tensor(64,1,32,32)
print('input',input.shape)
output = Upsample(input)
print('output',output.shape)

input torch.Size([64, 1, 32, 32])
output torch.Size([64, 1, 64, 64])


## UNet网络设计思路
#### 1. 我们有两个输出<br>
+ output_1 (3,64,64,64)<br>
+ output_2 (3,128,32,32)<br>
+ 可以看到input_2更加深，但是切面更小，是后面层的输出

#### 2. 如何将两个输出一起放入卷积层处理？
+ Convtranspose对input_2进行扩张，
+ 让其切面大小与input_1一致在进行拼接

#### 3. 再对拼接后的数据进行卷积


In [30]:
from turtle import forward

output_1 = torch.Tensor(3,64,64,64)
output_2 = torch.Tensor(3,128,32,32)

# 定义网络模型
class Unet(nn.Module):
  def __init__(self) -> None:
    super().__init__()

    self.layer1 = nn.ConvTranspose2d(in_channels=128,out_channels=64,kernel_size=4,stride=2,padding=1)
    self.layer2 = nn.Sequential(
      nn.Conv2d(in_channels=128,out_channels=256,kernel_size=5,stride=2,padding=2),
      nn.ReLU()
    )

  def forward(self,in_1,in_2):
    x1 = self.layer1(in_2) # output torch.Size([3, 64, 64, 64])
    x1 = torch.cat((x1,output_1),dim=1) # output torch.Size([3, 128, 64, 64])
    x2 = self.layer2(x1) # output torch.Size([3, 256, 32, 32])
    return x2

my_unet = Unet()
output = my_unet(output_1,output_2)
print('output',output.shape)

output torch.Size([3, 256, 32, 32])
