In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai import *
from fastai.vision import *

## Getting the data

In [None]:
path = Config.data_path()/'planet'
path.mkdir(exist_ok=True)
path

## Multiclassification 多标签分类

### 查看标签文件

In [None]:
df = pd.read_csv(path/'train_v2.csv')
df.head()

### data block API

In [None]:
np.random.seed(42)
src = (ImageFileList.from_folder(path)
       .label_from_csv('train_v2.csv', sep=' ', folder='train-jpg', suffix='.jpg')
       .random_split_by_pct(0.2))  # 随机划出20%作为验证机

In [None]:
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)  # flip_vert:反转图像，warp:透视变换
data = (src.datasets().transform(tfms, size=128).databunch().normalize(imagenet_stats))
data.show_batch(row=3, figsize=(10, 9))

In [None]:
def acc_02(inp, targ):
    return accuracy_thresh(inp, targ, thresh=0.2)

In [None]:
arch = models.resnet50
acc_02 = partial(accuracy_thresh, thresh=0.2)  # 偏函数，用参数为thresh=0.2调用accuracy_thresh生成新函数
f_score = partial(fbeta, thresh=0.2)  # 预测概率的阈值，概率超过该阈值就认为识别到了该类
learn = cnn_learner(data, arch, metrics=[acc_02, f_score])
learn.lr_find()
learn.recorder.plot()

In [None]:
lr = 0.01    # 在上图中下降最快的地方
learn.fit_one_cycle(5, slice(lr))
learn.save('stage-1-rn50')

In [None]:
learn.unfreeze()
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, slice(1e-5, lr/5))  # 在上图中突然飙升的地方往前10倍....3.5的8分20秒
learn.save('stage-2-rn50')

### 用迁移学习，将上面擅长识别128*128卫星图片的模型学会擅长识别256*256的图片

In [None]:
data = (src.datasets(ImageMultiDataset)
       .transform(trms, size=256)
       .databunch().normalize(imagenet_stats))
learn.data = data          # 新的数据替换为新的databunch(256*256)
data.train_ds[0][0].shape

In [None]:
learn.freeze()             # freeze后在之前的基础上只要训练最后几层
learnl.lr_find()
learn.recorder.plot()

In [None]:
lr = le-2/2                # 从上图中可知，因为是已经训练好的一个模型了(对128*128效果很好的模型了)所有没有得到像之前那么陡峭的曲线，选择一个在曲线攀升前的值，再缩小10倍， 
learn.fit_one_cycle(5, slice(lr))
learn.save('stage-1-256-rn50')

### 也可以不用迁移学习，用之前的方法，重新训练

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, slice(1e-5, lr/5))
learn.recorder.plot_losses()
learn.save('stage-2-256-rn50')

## Image segmentation with CamVid  图像分割数据集

In [None]:
path = untar_data(URLs.CAMVID)
path.ls()

In [None]:
path_lb1 = path/'labels'
path_img = path/'images'

### Data

In [None]:
fnames = get_image_files(path_img)
fnames[:3]

In [None]:
lb1_names = get_image_files(path_lb1)
lb1_names[:3]

In [None]:
img_f = fnames[0]
img = open_image(img_f)                                # 打开普通图片文件用 open_image()
img.show(figsize=(5,5))

In [None]:
get_y_fn = lambda x: path_lb1/f'{x.stem}_P{x.suffix}'  # 把带后缀P的文件保存到一处
mask = open_mask(get_y_fn(img_f))                      # 打开保存的文件，打开带标注的图像文件用 open_mask()
mask.show(figsize=(5,5), alpha=1)                      # 显示图片

In [None]:
src_size = np.array(mask.shape[1:])
src_size, mask.data

In [None]:
# 查看 codes.txt 文件内容
codes = np.loadtxt(path/'codes.txt', dtype=str)
codes

### Datasets 通过 dataset api 创建 Databunch

In [None]:
size = src_size // 2
bs = 8                                                    # 随机梯度下降的随机批次大小，根据GPU内存调整适当值
src = (ImageFileList.from_folder(path_img)                # 加载图片文件列表
       .label_from_func(get_y_fn)                         # 创建标签
       .split_by_fnames_file('../valid.txt'))             # 拆分训练集和验证集（不随机是因为数据为视频帧，如果随机就可能相邻两帧一个在训练集一个在验证集）
data = (src.datasets(SegmentationDataset, classes=codes)  # codes 解释每个数字代表什么类别
       .transform(get_transforms(), size=size, tfm_y=True)
       .databunch(bs=bs)
       .normalize(imagenet_stats))
data.show_batch(2， figsize=(10,7))

In [None]:
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)

### Model

In [None]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

metrics = acc_camvid
# metrics = accuracy

learn = Learner.create_unet(data, models.resnet34, metrics=metrics)#.to_fp1y()     # 如果GPU内存严重不足，加上to_fp1y()用混精度训练，得到一个用16位精度训练的模型(需要由最新的CUDA)驱动 
lr_find(learn)
learn.recorer.plot()

In [None]:
lr = 1e-2
learn.fit_one_cycle(10, slice(lr))
learn.save('stage-1')

#### 解冻，再训练

In [None]:
learn.load('stage-1')
learn.unfreeze()
lr_find(learn)
learn.recorder.plot()

In [None]:
lrs = slice(1e-5, lr/5)
learn.fit_one_cycle(12, lrs)
learn.recorder.plot_losses()  # 画出损失

In [None]:
learn.recorder.plot_lr()      # 画出学习率

In [None]:
learn.save('stage-2')

#### Go big

In [None]:
size = src_size         # 数据为完整图像大小
bs = 4
data = (src.datasets(SegmentationDataset, classes=codes)
       .transform(get_transforms(), size=size, tfm_y=True)
       .databunch(bs=bs)
       .normalize(imagenet_stats))

# 因为 GPU 内存不够用，所以重启内核，新建一个 learn，加载上次保存的权重
learn = Learner.create_unet(data, models.resnet34, metrics=metrics)
learn.load('stage-2')
lr_find(learn)
learn.recorder.plot()

In [None]:
lr = 1e-3
learn.fit_one_cycle(10, slice(lr))
learn.save('stage-1-big')

In [None]:
learn.load('stage-1-big')
learn.unfreeze()
lrs = slice(1e-6, lr)
learn.fit_one_cycle(10, lrs, wd=1e-3)
learn.save('stage-2-big')              # 'stage-2-big-fp16'

In [None]:
learn.load('stage-2-big')
learn.show_results()       # 看看结果和真实值相比如何

## Regression with BIWI head pose dataset (用BIWI头部姿势数据集做图像回归)

### Getting and converting the data

In [None]:
path = Path('data/biwi_head_pose')
cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6)
cal

In [None]:
fname = path/'09'/'frame_00667_rgb.jpg'
def img2txt_name(f):
    return f'{str(f)[:-7]}pose.txt'
img = open_image(fname)
img.show()

In [None]:
ctr = np.genfromtxt(img2txt_name(fname), skip_header=3)
ctr

In [None]:
def convert_biwi(coords):
    c1 = coords[0] * cal[0][0]/coords[2] + cal[0][2]
    c2 = coords[1] * cal[1][1]/coords[2] + cal[1][2]
    return tensor([c2, c1])

def get_ctr(f):
    ctr = np.genfromtxt(img2txt_name(f), skip_header=3)
    return convert_biwi(ctr)

def get_ip(img, ptf):
    return ImagePoints(FlowField(img.size, pts), scale=True)

ctr = get_ctr(fname)
img.show(y=get_ip(img, ctr), figsize=(6, 6))

### Creating a dataset

In [None]:
data = (ImageFileList.from_folder(path)
       .label_from_func(get_ctr)
       .split_by_valid_func(lambda o: o[0].parent.name == '13')
       .datasets(PointsDataset)
       .transform(get_transforms(), tfm_y=True, size=(120, 160))
       .databunch().normalize(imagenet_stats)
       )
data.show_batch(3, figsize=(9, 6))

### Train model

In [None]:
learn = create_cnn(data, models.resnet34)
learn.loss_func = MSELossFlat()    # 均方误差损失函数
learn.lr_find()
learn.recorder.plot()

In [None]:
lr = 2e-2
learn.fit_one_cycle(5, slice(lr))
learn.save('stage-1')
learn.show_results()