# Auto Tetris
## Detecting roof surfaces from satellite images with UNET architecture

Stuff

In [1]:
# Check for Nvidia gpus
! nvidia-smi

Sun Dec  4 13:15:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:15:00.0 Off |                    0 |
| N/A   30C    P0    43W / 300W |      0MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:16:00.0 Off |                    0 |
| N/A   32C    P0    54W / 300W |   7985MiB / 32768MiB |      0%      Default |
|       

### Data Augmentation

In [6]:
from data_augmentation import load_data, create_new_data_dirs, augment_data
from utils import del_new_data
# https://dev.to/rohitfarmer/how-to-run-jupyter-notebook-in-an-interactive-node-on-a-high-performance-computer-hpc-27mg
from pathlib import Path
import torch

In [7]:
base_path = Path.cwd().parent
base_path

PosixPath('/zhome/52/3/174111/auto_tetris')

In [11]:
(train_x, train_y), (val_x, val_y), (test_x, test_y) = load_data(
    base_path,
    split=(70, 15, 15),
    shuffle=True,
    max_items=200  # how many images to create dataset from, default is all images
)
print(f"{len(train_x)=} - {len(train_y)=}")
print(f"{len(val_x)=} - {len(val_y)=}")
print(f"{len(test_x)=} - {len(test_y)=}")

len(train_x)=140 - len(train_y)=140
len(val_x)=30 - len(val_y)=30
len(test_x)=30 - len(test_y)=30


In [31]:
# Create folders for the new data in project root
del_new_data()
create_new_data_dirs(base_path)

# data augmentation
augment_data(
    train_x,
    train_y,
    base_path / "data",
    base_path / "new_data/train",
    augment=False,
)
augment_data(
    val_x,
    val_y,
    base_path / "data",
    base_path / "new_data/val",
    augment=False
)
augment_data(
    test_x,
    test_y,
    base_path / "data",
    base_path / "new_data/test",
    augment=False,
)

100%|█████████████████████████████████████████████████████████████████████████████| 140/140 [00:02<00:00, 50.13it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 58.67it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 64.03it/s]


### Model setup

In [2]:
from utils import seeding, create_dir, epoch_time, sort_path_list
from Hyperparams import Hyperparams
from data import DriveDataset
from unet import build_unet
from torch.utils.data import DataLoader

In [3]:
seeding(42)
base_path = Path.cwd().parent
base_path

In [8]:
# Load train dataset
train_x = list((base_path / "new_data/train/images/").glob("*.jpeg"))
train_y = list((base_path / "new_data/train/masks/").glob("*.jpeg"))

# Sort dataset, so images and masks match
train_x.sort(key=sort_path_list)
train_y.sort(key=sort_path_list)

# Load test dataset
val_x = list((base_path / "new_data/val/images/").glob("*.jpeg"))
val_y = list((base_path / "new_data/val/masks/").glob("*.jpeg"))

# Sort dataset, so images and masks match
val_x.sort(key=sort_path_list)
val_y.sort(key=sort_path_list)

data_str = f"Dataset size:\nTrain: {len(train_x)} - Valid: {len(val_x)}"
print(data_str)

Dataset size:
Train: 140 - Valid: 30


In [9]:
# Directories
create_dir(base_path / "checkpoints")
checkpoint_path = base_path / "checkpoints/checkpoint.pth"

In [10]:
# Hyperparameters

hyperparams = Hyperparams(base_path / "train_conf.toml")

H = hyperparams.image_size
W = hyperparams.image_size
size = (H, W)

batch_size = hyperparams.batch_size
num_epochs = hyperparams.epochs
lr = hyperparams.lr

# Dataset and Dataloader
train_dataset = DriveDataset(train_x, train_y)
val_dataset = DriveDataset(val_x, val_y)

train_loader = DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
)

val_loader = DataLoader(
    dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = build_unet()
model = model.to(device)


# optimizer = hyperparams.optimizer(model.parameters(), lr=hyperparams.lr)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True)
loss_fn = hyperparams.loss_fn

128
getter method called
