### 1. Install dependent libraries

In [None]:
# ! pip install -q paddleseg

### 2. Unzip datasets
**Data organization**
```
dataset
  ├ img
  │  ├ build0.jpg
  │  └ ....jpg
  └ gt
     ├ build0.png
     └ ....png
```
**Label pixel**
```
0 background
1 building
```
**Image size**
```
512x512
```

In [None]:
# ! mkdir -p dataset  # create a folder for save dataset
# ! unzip -oq data.zip -d dataset  # unzip the zip data to the dataset folder

In [None]:
import os


PATH = os.getcwd()
print(PATH)

### 3. Split datasets and create data_list

In [None]:
import os
import os.path as osp
import random


def create_list(data_path: str, val_num: int=2000) -> None:
    """ create list.
    args:
        data_path (str): dataset folder.
        val_num (int, optional): number of evaluation data.
    """
    image_path = osp.join(data_path, "img")
    data_names = os.listdir(image_path)
    random.shuffle(data_names)  # scramble data
    with open(os.path.join(data_path, "train_list.txt"), "w") as tf:
        with open(os.path.join(data_path, "val_list.txt"), "w") as vf:
            for idx, data_name in enumerate(data_names):
                img = os.path.join("img", data_name)
                lab = os.path.join("gt", data_name.replace("jpg", "png"))
                if idx < val_num:
                    vf.write(img + " " + lab + "\n")
                else:
                    tf.write(img + " " + lab + "\n")
    print("Data list generation completed")


create_list(osp.join(PATH, "dataset"), 1)

### 4. Create PaddlePaddle Dataset

In [None]:
import paddleseg.transforms as T
from paddleseg.datasets import Dataset


# build the training set
train_transforms = [T.RandomHorizontalFlip(),
                    T.RandomVerticalFlip(),
                    T.RandomRotation(),
                    T.RandomScaleAspect(),
                    T.RandomBlur(),
                    T.Resize(target_size=(512, 512)),
                    T.Normalize()]
train_dataset = Dataset(transforms=train_transforms,
                        dataset_root=osp.join(PATH, "dataset"),
                        num_classes=2,
                        mode="train",
                        train_path=osp.join(PATH, "dataset/train_list.txt"),
                        separator=" ")

# build validation set
val_transforms = [T.Resize(target_size=(512, 512)),
                  T.Normalize()]
val_dataset = Dataset(transforms=val_transforms,
                      dataset_root=osp.join(PATH, "dataset"),
                      num_classes=2,
                      mode="val",
                      val_path=osp.join(PATH, "dataset/val_list.txt"),
                      separator=" ")

### 5. Select model

In [None]:
import paddle
from paddleseg.models import OCRNet, HRNet_W18


model = OCRNet(num_classes=2,
               backbone=HRNet_W18(),
               backbone_indices=[0],
               pretrained=osp.join(PATH, "weight/ocrnet_hrnet_w18_512x512_rs_building.pdparams"))

### 6. Set super-parameters

In [None]:
from paddleseg.models.losses import MixedLoss, BCELoss, DiceLoss


base_lr = 3e-5
epochs = 5
batch_size = 1

iters = epochs * len(train_dataset) // batch_size
lr = paddle.optimizer.lr.PolynomialDecay(base_lr, decay_steps=iters // epochs, end_lr=base_lr / 10)
optimizer = paddle.optimizer.Adam(lr, parameters=model.parameters())
losses = {}
losses["types"] = [MixedLoss([BCELoss(), DiceLoss()], [1, 1])] * 2
losses["coef"] = [1] * 2

### 7. Train

In [None]:
from paddleseg.core import train


train(model=model,
      train_dataset=train_dataset,
      val_dataset=val_dataset,
      optimizer=optimizer,
      save_dir=osp.join(PATH, "output"),
      iters=iters,
      batch_size=batch_size,
      save_interval=iters // 5,
      log_iters=10,
      num_workers=0,
      losses=losses,
      use_vdl=True)