In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
from copy import deepcopy
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.utils.data.sampler import RandomSampler
import matplotlib.pyplot as plt
import cv2

In [29]:
# 需要修改成的数据大小
dsize = 128

In [3]:
src = './competition_data'
save_weight_path = src + '/weight'
train_image_dir = src + '/train/images'
train_mask_dir = src + '/train/masks'
test_image_dir = src + '/test/images'

depths = pd.read_csv(src + '/depths.csv')
depths.sort_values('z', inplace=True)
depths.drop('z', axis=1, inplace=True)
depths['fold'] = (list(range(0,5)) * depths.shape[0])[:depths.shape[0]]

In [4]:
train_df = pd.read_csv(src + '/train.csv')
train_df = train_df.merge(depths)
dist = []

for id in train_df.id.values:
  # f的使用就是将大括号内的变量转为字符
  img = plt.imread(train_image_dir+f'/{id}.png')
  dist.append(np.unique(img).shape[0])
train_df['unique_pixels'] = dist

# 图片id
all_id = train_df['id'].values
# 数据分为5份
fold = []
for i in range(5):
  fold.append(train_df.loc[train_df['fold']==i, 'id'].values)

In [5]:
# 找到两数组内的不同值
# 数值分成了五份，每一份都能拿来做测试集
# 而其他的就是训练集

train_id = np.setdiff1d(all_id, fold[0])
val_id = fold[0]

In [25]:
# 根据id获取图片
def get_train_images(ids):
  images = []
  masks = []
  for id in ids:
    image = plt.imread(train_image_dir+'/'+id+'.png')[0] / 255
    mask = plt.imread(train_mask_dir+'/'+id+'.png')[0] / 255
    masks.append(mask)
    images.append(image)
  return images,masks
# 获取训练集和验证集
train_images,train_masks = get_train_images(train_id)
val_images,val_masks = get_train_images(val_id)



## 创建数据集类型
数据集类型有三个常用魔法方法
1. 初始化（获取参数）
2. 获取数据（数据处理，返回数据）
3. 获取数据集长度（返回数据集长度）

In [69]:
# 训练数据集
class TensorDataset(Dataset):
  def __init__(self, data, target):
    self.data = data
    self.target = target

  def __getitem__(self, index):
    # 改变尺寸，并且变为张量
    resolved_data = torch.Tensor(
      cv2.resize(self.data[index], dsize=(dsize,dsize))
    ).reshape(1,dsize,dsize)
    # 改变尺寸，并且变为张量
    resolved_target = torch.Tensor(
      cv2.resize(self.target[index], dsize=(dsize,dsize))
    ).reshape(1,dsize,dsize)
    # 返回
    # (1,128,128),(1,128,128)
    return resolved_data,resolved_target

  def __len__(self):
    return len(self.data)

## 创建数据集对象

In [70]:
train_data_set = TensorDataset(train_images,train_masks)
val_data_set = TensorDataset(val_images,val_masks)

# (18,1,128,128)
train_loader = DataLoader(
  train_data_set,
  shuffle=True,
  batch_size=18
) 
# (18,1,128,128)
val_loader = DataLoader(
  val_data_set,
  shuffle=False,
  batch_size=18
) 

torch.Size([1, 128, 128])