In [1]:
# basics
import utils
import numpy as np
from tqdm.notebook import tqdm 


# torch
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter



# custom modules
from data_acquisition import DataHandler
from data_preparation import apply_preprocessing_pipeline


# Configure logging for the pipeline
logger = utils.setup_logger(level='INFO')

In [2]:
cities = ['Aachen', 'CapeTown', 'Hamburg', 'Johannesburg', 'London', 'Montreal', 'Paris', 'Seoul', 'Singapore', 'Sydney']

datahandler = DataHandler(logger)


2024-06-28 12:16:09,353 - root - INFO - __init__ - Data directory already exists


In [4]:
# load images and mask for all specified cites

import os
images = []
sparse_masks=[]
dense_masks=[]

for city in tqdm(cities):
    buildings = None
    if not os.path.exists(f'data/{city}/building_mask_dense.tif'):
        print("loading local buildings")
        buildings = datahandler.get_buildings(city)
    images.append(datahandler.get_satellite_image(city))
    sparse_masks.append(datahandler.get_building_mask(city, all_touched=False, loaded_buildings=buildings))
    dense_masks.append(datahandler.get_building_mask(city, all_touched=True, loaded_buildings=buildings))

  0%|          | 0/10 [00:00<?, ?it/s]

2024-06-28 12:16:16,269 - root - INFO - get_satellite_image - Aachen: Using local satellite image
2024-06-28 12:16:16,481 - root - INFO - get_building_mask - Aachen: Using local building mask
2024-06-28 12:16:16,491 - root - INFO - get_building_mask - Aachen: Using local building mask
2024-06-28 12:16:16,500 - root - INFO - get_satellite_image - CapeTown: Using local satellite image
2024-06-28 12:16:17,238 - root - INFO - get_building_mask - CapeTown: Using local building mask
2024-06-28 12:16:17,263 - root - INFO - get_building_mask - CapeTown: Using local building mask
2024-06-28 12:16:17,289 - root - INFO - get_satellite_image - Hamburg: Using local satellite image
2024-06-28 12:16:17,996 - root - INFO - get_building_mask - Hamburg: Using local building mask
2024-06-28 12:16:18,018 - root - INFO - get_building_mask - Hamburg: Using local building mask
2024-06-28 12:16:18,044 - root - INFO - get_satellite_image - Johannesburg: Using local satellite image
2024-06-28 12:16:19,956 - roo

loading local buildings


2024-06-28 12:23:10,501 - root - INFO - get_satellite_image - London: Using local satellite image
2024-06-28 12:23:12,426 - root - INFO - get_building_mask - London: Using local building mask
2024-06-28 12:23:12,485 - root - INFO - get_satellite_image - London: Using local satellite image
2024-06-28 12:24:21,988 - root - INFO - create_directory - Montreal: Directory available
2024-06-28 12:24:21,998 - root - INFO - get_buildings - Montreal: Using local building data


loading local buildings


2024-06-28 12:27:19,326 - root - INFO - get_satellite_image - Montreal: Using local satellite image
2024-06-28 12:27:20,900 - root - INFO - get_satellite_image - Montreal: Using local satellite image
2024-06-28 12:27:51,381 - root - INFO - get_satellite_image - Montreal: Using local satellite image
2024-06-28 12:28:18,980 - root - INFO - create_directory - Paris: Directory available
2024-06-28 12:28:18,982 - root - INFO - get_buildings - Paris: Using local building data


loading local buildings


2024-06-28 12:29:35,451 - root - INFO - get_satellite_image - Paris: Using local satellite image
2024-06-28 12:29:35,571 - root - INFO - get_satellite_image - Paris: Using local satellite image
2024-06-28 12:29:48,297 - root - INFO - get_satellite_image - Paris: Using local satellite image
2024-06-28 12:30:00,384 - root - INFO - create_directory - Seoul: Directory available
2024-06-28 12:30:00,385 - root - INFO - get_buildings - Seoul: Using local building data


loading local buildings


2024-06-28 12:31:31,315 - root - INFO - get_satellite_image - Seoul: Using local satellite image
2024-06-28 12:31:32,580 - root - INFO - get_satellite_image - Seoul: Using local satellite image
2024-06-28 12:31:47,112 - root - INFO - get_satellite_image - Seoul: Using local satellite image
2024-06-28 12:32:01,506 - root - INFO - create_directory - Singapore: Directory available
2024-06-28 12:32:01,509 - root - INFO - get_buildings - Singapore: Using local building data


loading local buildings


2024-06-28 12:32:46,212 - root - INFO - get_satellite_image - Singapore: Using local satellite image
2024-06-28 12:32:46,646 - root - INFO - get_satellite_image - Singapore: Using local satellite image
2024-06-28 12:32:53,794 - root - INFO - get_satellite_image - Singapore: Using local satellite image
2024-06-28 12:33:01,005 - root - INFO - create_directory - Sydney: Directory available
2024-06-28 12:33:01,008 - root - INFO - get_buildings - Sydney: Using local building data


loading local buildings


2024-06-28 12:33:54,850 - root - INFO - get_satellite_image - Sydney: Using local satellite image
2024-06-28 12:33:55,648 - root - INFO - get_satellite_image - Sydney: Using local satellite image
2024-06-28 12:34:04,747 - root - INFO - get_satellite_image - Sydney: Using local satellite image


In [17]:
# apply training pipeline
# TODO make train test split consistent so we can train with multiple sizes, dont know if there is an advantage though
train_loader, test_loader = apply_preprocessing_pipeline(images, masks, patch_size = 128, train_ratio = 0.8, batch_size = 64)

In [5]:
# initialize model, taken from exercise pdf
model = nn.Sequential(
    nn.Conv2d(6, 32, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(128, 1, kernel_size=1, padding=0),
    nn.Sigmoid())

# initialize tensorboard writer
writer = SummaryWriter()

In [7]:
# Instantiate the model, loss function, and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 50

model.train()
for epoch in tqdm(range(num_epochs)):
    for batch in train_loader:
        # splid in inputs and labels
        inputs = batch[:,:-1].to(torch.float32)
        labels = batch[:,-1, np.newaxis].to(torch.float32)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        outputs = model(inputs)

        # calculate loss
        loss = criterion(outputs, labels)

        # write to tensorboard
        writer.add_scalar("Loss/train", loss, epoch)

        # backward pass
        loss.backward()

        # optimizer step
        optimizer.step()
    


  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Save Model

In [9]:
import os

os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "saved_models/model1")

# Evaluation

In [None]:
t  = torch.Tensor(test_loader.dataset)

# splid in inputs and labels
test_inputs = t[:,:-1]#.to(torch.float32)
test_labels = t[:,-1, np.newaxis]#.to(torch.float32)

test_results = model(test_inputs).detach()

# see how many percnet where predicted right
threshold = 0.5
((test_results>threshold)==test_labels).sum()/np.prod(test_labels.shape)



In [None]:
from sklearn.metrics import RocCurveDisplay

RocCurveDisplay.from_predictions(
   test_labels.flatten(), test_results.flatten())

In [None]:
writer.flush()

# Download

In [None]:


buildings = []
sat_images = []
building_masks = []

for city in cities: 
    buildings.append(datahandler.get_buildings(city))
    sat_images.append(datahandler.get_satellite_image(city))
    building_masks.append(datahandler.get_building_mask(city))

# Plot the expected results for the first city 
datahandler.plot(city[0])

In [None]:
import data_preparation

for city in cities:
    data_preparation.create_tensor(city)

# Download

In [None]:
# Download 

for city in cities: 
    sat_image = datahandler.get_satellite_image(city)
    mask = datahandler.get_building_mask(city)

# Plot the expected results for the first city 
datahandler.plot(city[0])