In [1]:
import datetime
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
from framework.dataset import LandCoverData as LCD, parse_image, load_image_train, load_image_test
from framework.model import UNet
from framework.tensorflow_utils import plot_predictions
from framework.utils import YamlNamespace
from framework.train import PlotCallback

In [2]:
# Configuration
config = YamlNamespace({
    'xp_rootdir': Path('./experiments').expanduser(),
    'dataset_folder': Path('./data').expanduser(),
    'batch_size': 8,
    'epochs': 5,
    'lr': 1e-4,
    'seed': 42,
    'val_samples_csv': None
})
config.xp_rootdir.mkdir(parents=True, exist_ok=True)
config.dataset_folder.mkdir(parents=True, exist_ok=True)

# Set random seed for reproducibility
random.seed(config.seed)
np.random.seed(config.seed)
tf.random.set_seed(config.seed)

In [3]:
# Sample data
train_files = list(map(str, config.dataset_folder.glob('train/images/*.tif')))[:100]
val_files = list(map(str, config.dataset_folder.glob('test/images/*.tif')))[:20]

# Create datasets
train_dataset = tf.data.Dataset.from_tensor_slices(train_files)\
    .map(parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .shuffle(buffer_size=1024, seed=config.seed)\
    .repeat()\
    .batch(config.batch_size)\
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices(val_files)\
    .map(parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .map(load_image_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
    .repeat()\
    .batch(config.batch_size)\
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [4]:
# Create U-Net model
unet_kwargs = dict(
    input_shape=(LCD.IMG_SIZE, LCD.IMG_SIZE, LCD.N_CHANNELS),
    num_classes=LCD.N_CLASSES,
    num_layers=2
)
model = UNet(**unet_kwargs)

In [5]:
# Compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=config.lr)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=[])

# Callbacks
xp_dir = config.xp_rootdir / datetime.datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
xp_dir.mkdir(parents=True)
callbacks = [
    PlotCallback(save_folder=xp_dir/'plots', num=5),
    tf.keras.callbacks.TensorBoard(log_dir=xp_dir/'tensorboard', update_freq='epoch'),
    tf.keras.callbacks.ModelCheckpoint(filepath=xp_dir/'checkpoints/epoch{epoch}.keras', save_best_only=False, verbose=1),
    tf.keras.callbacks.CSVLogger(filename=(xp_dir/'fit_logs.csv')),
    tf.keras.callbacks.ReduceLROnPlateau(patience=20, factor=0.5, verbose=1)
]

In [6]:
callbacks

[<framework.train.PlotCallback at 0x22895800ef0>,
 <keras.src.callbacks.tensorboard.TensorBoard at 0x22895801460>,
 <keras.src.callbacks.model_checkpoint.ModelCheckpoint at 0x2289525b0e0>,
 <keras.src.callbacks.csv_logger.CSVLogger at 0x2289585b1d0>,
 <keras.src.callbacks.reduce_lr_on_plateau.ReduceLROnPlateau at 0x228952df110>]

In [None]:
# Train model
history = model.fit(
    train_dataset,
    epochs=config.epochs,
    # callbacks=callbacks,
    steps_per_epoch=len(train_files) // config.batch_size,
    validation_data=val_dataset,
    validation_steps=len(val_files) // config.batch_size
)

Epoch 1/5


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(None, 256, 256, 4))


[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m2:40[0m 32s/step - loss: 2.6261

In [None]:
# Evaluate model
val_samples_s = pd.Series([int(f.stem) for f in val_files], name='sample_id', dtype='uint32')
val_samples_s.to_csv(xp_dir/'val_samples.csv', index=False)
model.evaluate(val_dataset, steps=len(val_files) // config.batch_size)