Check how to get labels on dfcdataloader

In [63]:
import os
# os.chdir('/Users/aradinka/Documents/GitHub/koltiva/SSLTransformerRS')

from tqdm import tqdm
import torch
import torchvision.models as models

from dfc_dataset import DFCDataset
from metrics import ClasswiseAccuracy

data_config = {
    'train_dir': '../data/data_disini',
    'val_dir': '../data/data_disini',
    'train_mode': 'test', # 'test', 'validation'
    'val_mode': 'validation', # 'test', 'validation'
    'num_classes': 9, # kepake
    'clip_sample_values': True, # clip (limit) values
    'train_used_data_fraction': 1, # fraction of data to use, should be in the range [0, 1]
    'val_used_data_fraction': 1,
    'image_px_size': 224,
    'cover_all_parts_train': True, # if True, if image_px_size is not 224 during training, we use a random crop of the image
    'cover_all_parts_validation': True, # if True, if image_px_size is not 224 during validation, we use a non-overlapping sliding window to cover the entire image
    'seed': 42,
}

train_config = {
    's1_input_channels': 2,
    's2_input_channels': 13,
    'finetuning': True, # If false, backbone layers is frozen and only the head is trained
    'classifier_lr': 3e-6,
    'learning_rate': 0.00001,
    'adam_betas': (0.9, 0.999), 
    'weight_decay': 0.001,
    'dataloader_workers': 4, # dipake
    'batch_size': 16, # dipake
    'epochs': 5, # diapke
    'target': 'dfc_label' # dipake
}

train_dataset = DFCDataset(
    data_config['train_dir'],
    mode=data_config['train_mode'],
    clip_sample_values=data_config['clip_sample_values'],
    used_data_fraction=data_config['train_used_data_fraction'],
    image_px_size=data_config['image_px_size'],
    cover_all_parts=data_config['cover_all_parts_train'],
    seed=data_config['seed'],
    add_cacao=True
)
val_dataset = DFCDataset(
    data_config['val_dir'],
    mode=data_config['val_mode'],
    clip_sample_values=data_config['clip_sample_values'],
    used_data_fraction=data_config['val_used_data_fraction'],
    image_px_size=data_config['image_px_size'],
    cover_all_parts=data_config['cover_all_parts_validation'],
    seed=data_config['seed'],
    add_cacao=True
)

In [64]:
len(train_dataset)

5132

In [65]:
len(val_dataset)

995

In [66]:
train_dataset[5131]

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


{'s2': tensor([[[0.9253, 0.9253, 0.9253,  ..., 0.9445, 0.9445, 0.9445],
          [0.9253, 0.9253, 0.9253,  ..., 0.9445, 0.9445, 0.9445],
          [0.9253, 0.9253, 0.9253,  ..., 0.9445, 0.9445, 0.9445],
          ...,
          [0.9146, 0.9146, 0.9146,  ..., 0.9304, 0.9304, 0.9304],
          [0.9146, 0.9146, 0.9146,  ..., 0.9304, 0.9304, 0.9304],
          [0.9146, 0.9146, 0.9146,  ..., 0.9304, 0.9304, 0.9304]],
 
         [[0.7335, 0.7320, 0.7229,  ..., 0.7248, 0.7297, 0.7267],
          [0.7392, 0.7297, 0.7236,  ..., 0.7263, 0.7267, 0.7248],
          [0.7278, 0.7233, 0.7267,  ..., 0.7297, 0.7183, 0.7175],
          ...,
          [0.7012, 0.7145, 0.7145,  ..., 0.7377, 0.7248, 0.7290],
          [0.7054, 0.7198, 0.7054,  ..., 0.7214, 0.7175, 0.7263],
          [0.7107, 0.7141, 0.7149,  ..., 0.7195, 0.7111, 0.7191]],
 
         [[0.7310, 0.7137, 0.7036,  ..., 0.6542, 0.6639, 0.6527],
          [0.7295, 0.7129, 0.6996,  ..., 0.6635, 0.6560, 0.6527],
          [0.7176, 0.7021, 0.6982,

Load dfc 

In [1]:
import numpy as np
from enum import Enum
import rasterio
from rasterio.windows import Window


class S1Bands(Enum):
    VV = 1
    VH = 2
    ALL = [VV, VH]
    NONE = None

class Sensor(Enum):
    s1 = "s1"
    s2 = "s2"
    lc = "lc"
    dfc = "dfc"

class S2Bands(Enum):
    B01 = aerosol = 1
    B02 = blue = 2
    B03 = green = 3
    B04 = red = 4
    B05 = re1 = 5
    B06 = re2 = 6
    B07 = re3 = 7
    B08 = nir1 = 8
    B08A = nir2 = 9
    B09 = vapor = 10
    B10 = cirrus = 11
    B11 = swir1 = 12
    B12 = swir2 = 13
    ALL = [B01, B02, B03, B04, B05, B06, B07, B08, B08A, B09, B10, B11, B12]
    RGB = [B04, B03, B02]
    NONE = None

class LCBands(Enum):
    LC = lc = 0
    DFC = dfc = 1
    ALL = [DFC]
    NONE = None

def custom_get_patch(patch_path, bands=LCBands.DFC, window=None):
    # season = Seasons(season).value
    sensor = None

    if not bands:
        return None, None

    if isinstance(bands, (list, tuple)):
        b = bands[0]
    else:
        b = bands
    
    if isinstance(b, S1Bands):
        sensor = Sensor.s1.value
        bandEnum = S1Bands
    elif isinstance(b, S2Bands):
        sensor = Sensor.s2.value
        bandEnum = S2Bands
    elif isinstance(b, LCBands):
        if LCBands(bands) == LCBands.LC:
            sensor = Sensor.lc.value 
        else:
            sensor = Sensor.dfc.value 

        bands = LCBands(1)
        bandEnum = LCBands
    else:
        raise Exception("Invalid bands specified")

    if isinstance(bands, (list, tuple)):
        bands = [b.value for b in bands]
    else:
        bands = bandEnum(bands).value

    with rasterio.open(patch_path) as patch:
        if window is not None:
            data = patch.read(bands, window=window) 
        else:
            data = patch.read(bands)
        bounds = patch.bounds

    # Remap IGBP to DFC bands
    # if sensor  == "lc":
    #     data = IGBP2DFC[data]

    if len(data.shape) == 2:
        data = np.expand_dims(data, axis=0)

    return data, bounds

In [53]:
from dfc_sen12ms_dataset import DFCSEN12MSDataset


base_dir = "../data/data_disini"
dfc = DFCSEN12MSDataset(base_dir)
image_px_size = 224
if image_px_size != 256:
    x_offset, y_offset = np.random.randint(0, 256 - image_px_size, 2)
    window = Window(x_offset, y_offset, image_px_size, image_px_size)
else:
    window = None

dfc, bounds4 = custom_get_patch(
    patch_path="../data/data_disini/ROIs0000_test/dfc_0/ROIs0000_test_dfc_0_p2.tif",
    window=window
)
dfc[dfc == 3] = 0
dfc[dfc == 8] = 0
dfc[dfc >= 3] -= 1
dfc[dfc >= 8] -= 1
dfc -= 1
dfc[dfc == -1] = 255

dfc_unique, dfc_counts = np.unique(dfc, return_counts=True)
dfc_label = dfc_unique[
    dfc_counts.argmax()
]

dfc_label

4

Create a dfc label 

In [1]:
from PIL import Image
import numpy as np

def create_array(value, shape=(1, 224, 224)):
    """
    Create a numpy array of a given shape filled with a specific value.

    :param value: The value to fill the array with.
    :param shape: The shape of the array, default is (1, 224, 224).
    :return: numpy.ndarray filled with the specified value.
    """
    return np.full(shape, value, dtype=np.int32)

def save_array_as_tif(array, filename):
    """
    Save a numpy array as a TIFF file.

    :param array: The numpy array to be saved.
    :param filename: The filename for the saved TIFF file.
    """
    image = Image.fromarray(array[0])
    image.save(filename, format="TIFF")



array = create_array(11) # 11 = cacao

# Test
for index in range(5128, 5131+1):
    save_array_as_tif(array, f"cacao training image dfc/ROIs0000_test_dfc_0_p{index}.tif")

# Validation
for index in range(986, 994+1):
    save_array_as_tif(array, f"cacao training image dfc/ROIs0000_validation_dfc_0_p{index}.tif")

In [11]:
import numpy as np
import tifffile
import rasterio
from rasterio.transform import from_origin

def create_array(value, shape=(1, 256, 256)):
    return np.full(shape, value, dtype=np.int32)

def save_array_as_geotiff(array, filename):
    # Save the array as a TIFF file first
    tifffile.imwrite(filename, array[0], dtype=np.int32)

    # Define the transformation and CRS (Coordinate Reference System)
    transform = from_origin(0, 0, 1, 1)  # This is an example, set your own values
    crs = "EPSG:4326"

    # Open the saved TIFF file and write the CRS and transform
    with rasterio.open(filename, 'r+') as dataset:
        dataset.transform = transform
        dataset.crs = crs

# Example usage
array = create_array(11)  # 11 = cacao

# Test
for index in range(5128, 5131+1):
    save_array_as_geotiff(array, f"cacao training image dfc/ROIs0000_test_dfc_0_p{index}.tif")

# Validation
for index in range(986, 994+1):
    save_array_as_geotiff(array, f"cacao training image dfc/ROIs0000_validation_dfc_0_p{index}.tif")


  dataset.transform = transform


In [11]:
from dfc_sen12ms_dataset import DFCSEN12MSDataset
import numpy as np
from enum import Enum
import rasterio
from rasterio.windows import Window

base_dir = "../data/data_disini"
dfc = DFCSEN12MSDataset(base_dir)
image_px_size = 224
if image_px_size != 256:
    x_offset, y_offset = np.random.randint(0, 256 - image_px_size, 2)
    window = Window(x_offset, y_offset, image_px_size, image_px_size)
else:
    window = None


s2, bounds4 = custom_get_patch(
    patch_path="cacao training image tif/ROIs0000_validation_s2_0_p987.tif",
    window=None
)
# dfc[dfc == 3] = 0
# dfc[dfc == 8] = 0
# dfc[dfc >= 3] -= 1
# dfc[dfc >= 8] -= 1
# dfc -= 1
# dfc[dfc == -1] = 255

# dfc_unique, dfc_counts = np.unique(dfc, return_counts=True)
# dfc_label = dfc_unique[
#     dfc_counts.argmax()
# ]

# dfc_label
s2.shape

(1, 223, 223)

In [6]:
window

Window(col_off=17, row_off=24, width=224, height=224)

Load new cacao train data 

In [13]:
dir = "data 02/tif/ROIs0000_validation_s2_0_p986.tif"
dir = "data 03/ROIs0000_validation_s2_0_p986.tif"
# dir = "data 03/ROIs0000_validation_s2_0_p986.tif"
dir = "../data/data_disini/ROIs0000_test/s2_0/ROIs0000_test_s2_0_p0.tif"

dir = "../data/data_disini/ROIs0000_validation/dfc_0/ROIs0000_validation_dfc_0_p996.tif"
dir = "../data/data_disini/ROIs0000_test/dfc_0/ROIs0000_test_dfc_0_p5132.tif"
dir = "cacao training image dfc/ROIs0000_test_dfc_0_p5128.tif"

import os
import numpy as np
import rasterio
from rasterio.windows import Window

image_px_size = 224
if image_px_size != 256:
    # crop the data to image_px_size times image_px_size (e.g. 128x128)
    x_offset, y_offset = np.random.randint(0, 256 - image_px_size, 2)
    window = Window(x_offset, y_offset, image_px_size, image_px_size)

else:
    window = None

bands = [i for i in range(1, 14)]
bands = 1 # dfc


with rasterio.open(dir) as patch:
    if window is not None:
        data = patch.read(bands, window=window) 
        print(patch.crs)
    else:
        data = patch.read(bands)
    bounds = patch.bounds

EPSG:4326


In [3]:
os.getcwd()

'/Users/aradinka/Documents/GitHub/koltiva/SSLTransformerRS/finetune_cacao'

In [5]:
os.listdir(data_dir)

FileNotFoundError: [Errno 2] No such file or directory: 'data 05'

In [13]:
import os
import numpy as np
import rasterio
from rasterio.windows import Window

# dir = "data 02/tif/ROIs0000_validation_s2_0_p986.tif"
# dir = "data 03/ROIs0000_validation_s2_0_p986.tif"
# # dir = "data 03/ROIs0000_validation_s2_0_p986.tif"
# # dir = "../data/data_disini/ROIs0000_test/s2_0/ROIs0000_test_s2_0_p0.tif"
# dir = "../data/data_disini/ROIs0000_validation/dfc_0/ROIs0000_validation_dfc_0_p996.tif"

data_dir = "data 04"
imgs = [os.path.join(data_dir, i) for i in os.listdir(data_dir) if i.split(".")[-1] == "tif"]
# print(imgs)



image_px_size = 224
if image_px_size != 256:
    # crop the data to image_px_size times image_px_size (e.g. 128x128)
    x_offset, y_offset = np.random.randint(0, 256 - image_px_size, 2)
    window = Window(x_offset, y_offset, image_px_size, image_px_size)

else:
    window = None

bands = [i for i in range(1, 14)]

# window = None

for dir in imgs:
    with rasterio.open(dir) as patch:
        if window is not None:
            data = patch.read(bands, window=window) 
            # print(patch.crs)
        else:
            data = patch.read(bands)
        bounds = patch.bounds
    # print(dir)
    if data.shape[2] < 224 or data.shape[1] < 224:
        print(data.shape)

['data 04/ebm_192.tif', 'data 04/ebm_804.tif', 'data 04/ebm_637.tif', 'data 04/ebm_145.tif', 'data 04/ebm_179.tif', 'data 04/ebm_958.tif', 'data 04/ebm_780.tif', 'data 04/ebm_757.tif', 'data 04/ebm_756.tif', 'data 04/ebm_971.tif', 'data 04/ebm_781.tif', 'data 04/ebm_959.tif', 'data 04/ebm_178.tif', 'data 04/ebm_144.tif', 'data 04/ebm_636.tif', 'data 04/ebm_811.tif', 'data 04/ebm_193.tif', 'data 04/ebm_807.tif', 'data 04/ebm_813.tif', 'data 04/ebm_146.tif', 'data 04/ebm_634.tif', 'data 04/ebm_783.tif', 'data 04/ebm_973.tif', 'data 04/ebm_754.tif', 'data 04/ebm_998.tif', 'data 04/ebm_768.tif', 'data 04/ebm_769.tif', 'data 04/ebm_999.tif', 'data 04/ebm_755.tif', 'data 04/ebm_972.tif', 'data 04/ebm_782.tif', 'data 04/ebm_635.tif', 'data 04/ebm_153.tif', 'data 04/ebm_147.tif', 'data 04/ebm_812.tif', 'data 04/ebm_33.tif', 'data 04/ebm_27.tif', 'data 04/ebm_802.tif', 'data 04/ebm_816.tif', 'data 04/ebm_180.tif', 'data 04/ebm_194.tif', 'data 04/ebm_143.tif', 'data 04/ebm_631.tif', 'data 04/ebm