In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from eolearn.core import (
    AddFeatureTask,
    EONode,
    EOPatch,
    EOTask,
    EOWorkflow,
    FeatureType,
    LoadTask,
    OverwritePermission,
    SaveTask,
)

In [3]:
import datetime
import os
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import Polygon

In [4]:
from pathlib import Path
import sys

SCRIPT_DIR = Path('/home/alina.smolina/eolearn-pipeline/src')
sys.path.append(str(SCRIPT_DIR))
SCRIPT_DIR = Path('/home/alina.smolina/DL-pipeline/src')
sys.path.append(str(SCRIPT_DIR))

In [5]:
eopatches_dir = '/beegfs/home/alina.smolina/data/sakhalin/images/EOPatches/train_2560_K_N_v2/'
num_ids_train = 598


eopatches_dir_test = '/beegfs/home/alina.smolina/data/sakhalin/images/EOPatches/test_2560_Nevelsk_v2/'
num_ids_test = 44

## Preparing dataloader

In [9]:
eopatch = EOPatch.load(f'{eopatches_dir}eopatch_20', lazy_loading=True)

In [10]:
eopatch

EOPatch(
  bbox=BBox(((637440.0, 5178880.0), (640000.0, 5181440.0)), crs=CRS('32654'))
  timestamps=[2018-03-14 01:26:50, ...]<length=8>
  data={
    10BANDS: FeatureIONumpy(/data/10BANDS.npy.gz)
    BANDS: FeatureIONumpy(/data/BANDS.npy.gz)
    CLP: FeatureIONumpy(/data/CLP.npy.gz)
  }
  mask_timeless={
    2_GROUPS: FeatureIONumpy(/mask_timeless/2_GROUPS.npy.gz)
  }
  mask={
    CLM: FeatureIONumpy(/mask/CLM.npy.gz)
    IS_DATA: FeatureIONumpy(/mask/IS_DATA.npy.gz)
    SCL: FeatureIONumpy(/mask/SCL.npy.gz)
  }
  vector_timeless={
    5-2groups-32654: FeatureIOGeoDf(/vector_timeless/5-2groups-32654.gpkg.gz)
  }
  scalar={
    COVERAGE: FeatureIONumpy(/scalar/COVERAGE.npy.gz)
  }
)

## Model

In [6]:
from datamodule_eolearn import GeoEOModule

import torch
torch.set_float32_matmul_precision('high')

In [7]:
import albumentations as A
import albumentations.pytorch as AP

transforms = A.Compose([
    A.Flip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=(-0.0625, 0.0625), 
        scale_limit=0, #no scale
        rotate_limit=(-90, 90), 
        p=0.5
    ),
    AP.ToTensorV2(transpose_mask=True),
    ],
)

test_transform = A.Compose([
    AP.ToTensorV2(transpose_mask=True),
    ],
)

In [8]:
all_dates = GeoEOModule(
    transform = transforms,
    test_transform = test_transform,
    target_mask_name = 'age_group_code',
    train_eopatches_dir = eopatches_dir, 
    test_eopatches_dir = eopatches_dir_test, 
    predict_eopatches_dir = eopatches_dir_test, 
    train_eopatches_ids = [x for x in range(num_ids_train)],
    test_eopatches_ids = [x for x in range(num_ids_test)],
    predict_eopatches_ids = [x for x in range(num_ids_test)], 
    train_date_range = ['2018-01-01', '2018-12-31'],
    test_date_range = ['2018-01-01', '2018-12-31'],
    predict_date_range = ['2018-01-01', '2018-12-31'],
    batch_size = 128,
    num_workers = 24,
)

In [9]:
max([x for x in range(num_ids_train)])

597

In [12]:
import wandb
import pytorch_lightning as pl
from module import SegModule
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor

WANDB__SERVICE_WAIT=300
wandb_logger = WandbLogger(project='dl-pipeline-sakhalin', log_model=True)
print(f'🤖: Look at {wandb.run.url}')


lr_monitor_callback = LearningRateMonitor(logging_interval='step')


checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath=f'../weights/group-agegroup-sakhalin/{wandb.run.name}/',
    filename='{epoch}-{val_loss:.4f}', 
    monitor='val/mean_acc',
    mode='max',
    save_top_k=1
)

                   
trainer = pl.Trainer(
    max_epochs=300, 
    benchmark=True, 
    check_val_every_n_epoch=10, 
    logger=wandb_logger, 
    callbacks=[
        checkpoint_callback,
        lr_monitor_callback,
              ],
)

model = SegModule(
    optimizer='Adam', 
    scheduler='StepLR',
    step_size=50,
    gamma=0.1,
    lr=6e-3,
    in_channels=10,
    ignore_index=255,
    min_channels = 16,
    max_channels = 512,
    num_down_blocks = 5,
    # num_classes=2, 
    # class_labels_dict={0: 'лиственные', 1: 'хвойные', 255:'нет_данных'},
    # labels_to_calc_metric=['лиственные', 'хвойные'],
    # possible_classes=[0, 1]
    num_classes=5, 
    class_labels_dict={0: 'молодняки' ,
                       1: 'приспевающие',
                       2: 'средневозрастные',
                       3: 'спелые',
                       4: 'перестойные',
                       255:'нет_данных'},
    labels_to_calc_metric=['молодняки', 'приспевающие', 'средневозрастные', 'спелые', 'перестойные'],
    possible_classes=[0, 1, 2, 3, 4]
)

trainer.fit(model, all_dates) 

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


🤖: Look at https://wandb.ai/nali/dl-pipeline-sakhalin/runs/f44x5t0c
🤖: Setup data...
🤖: Fit stage.
🤖: Collecting all the time indices... 

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
trainer.test(model, all_dates)

  rank_zero_warn(


🤖: Setup data...
🤖: Collecting all the time indices... Done.
🤖: Generating mapping: idx -> (patch_id, time_id)... Elapsed time: 0.67 min
Done.
🤖: Loading all the patches and time frames into the memory... 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Done.
Elapsed time: 4.51 min 🤖: Test stage.
🤖: Test frames >>> 787
🤖: #iterations in test dataloader: 7


Testing: 0it [00:00, ?it/s]

In [None]:
[0, 2, 8, 12]

In [9]:
cumsum = [0, 2, 8, 12]
def get_patch_index(idx): # get patch index
        left, right = 0, len(cumsum) - 1
        while left < right:
            middle = (left + right + 1) // 2
            if cumsum[middle] > idx:
                right = middle - 1
            else:
                left = middle
        return left

In [None]:
#
patch_0 : [date_0, date_1]
patch_1 : [date_0, date_1, date_2, ..., date_5]
pat

In [42]:
idx = 
patch_id = get_patch_index(idx)
patch_id, idx - cumsum[patch_id]

(2, 0)

# debug

In [55]:
%%time
uniq_vals = []
# # for item in eo_ds:
all_dates.setup(stage='fit')
for item in all_dates.train_dataloader():
    print(len(item))
    print(item[0].shape)
    print(item[1].shape)
    uniq_vals.append(torch.unique(item[-1]).numpy())
    break

🤖: Setup data...
🤖: Fit stage.
🤖: Collecting all time indices... Done.
🤖: Generating mapping: idx -> (patch_id, time_id)... Done.
🤖: Fit stage.
🤖: Fit frames >>> 1485
🤖: Val frames >>> 371
🤖: Total >>>>>>>> 1856
🤖: #iterations in train dataloader: 62
3
torch.Size([24, 10, 256, 256])
torch.Size([24, 256, 256])
CPU times: user 22.7 s, sys: 1.93 s, total: 24.7 s
Wall time: 4min 34s


[rank: 0] Received SIGTERM: 15
[rank: 0] Received SIGTERM: 15
[rank: 0] Received SIGTERM: 15
[rank: 0] Received SIGTERM: 15
[rank: 0] Received SIGTERM: 15
[rank: 0] Received SIGTERM: 15


In [53]:
%%time
uniq_vals = []
# # for item in eo_ds:
all_dates.setup(stage='fit')
for item in all_dates.train_dataloader():
    print(len(item))
    print(item[0].shape)
    print(item[1].shape)
    uniq_vals.append(torch.unique(item[-1]).numpy())
    break

🤖: Setup data...
🤖: Fit stage.
🤖: Collecting all time indices... Done.
🤖: Generating mapping: idx -> (patch_id, time_id)... Done.
🤖: Fit stage.
🤖: Fit frames >>> 1485
🤖: Val frames >>> 371
🤖: Total >>>>>>>> 1856
🤖: #iterations in train dataloader: 62
3
torch.Size([24, 10, 256, 256])
torch.Size([24, 256, 256])
CPU times: user 22.7 s, sys: 1.03 s, total: 23.8 s
Wall time: 37.3 s


In [None]:
uniq_vals

In [20]:
from datamodule_eolearn import EOLearnDataset

eo_ds = EOLearnDataset(
    transform = transforms,
    eopatches_dir = eopatches_dir,
    eopatches_ids = [x for x in range(69, 106)],
    date_range = ['2018-01-01', '2018-12-31'],
    target_mask_name = 'PREVAIL_GROUP_2',
    demand_target = True,
)

🤖: Collecting all time indices...
🤖: Done.


In [25]:
for item in eo_ds:
    print(len(item))
    print(item[0].shape)
    print(item[1].shape)
    break

3
torch.Size([10, 256, 256])
torch.Size([256, 256])
