# Use Temporal Sentinel-2 Data to Train a Stacked ResNet Model

In [3]:
from time import time

In [4]:
from biomasstry.datasets import TemporalSentinel2Dataset
from biomasstry.models import TemporalSentinel2Model
from biomasstry.models.utils import run_training
import torch
import torch.nn as nn
from torch.utils.data import random_split, DataLoader

cpu


In [5]:
%load_ext autoreload
%autoreload 2

## Dataset

In [6]:
ds = TemporalSentinel2Dataset(data_url="s3://drivendata-competition-biomassters-public-us")

In [7]:
start = time()
sample = ds[0]
end = time()

In [8]:
print(f"Time: {end - start}")

Time: 2.7208352088928223


In [9]:
print(f"No. of temporal samples: {len(sample['image'])}")
print(f"Shape: {sample['image'][0].shape}")

No. of temporal samples: 5
Shape: torch.Size([10, 256, 256])


In [10]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(f"Device: {device}")

Device: cpu


In [11]:
torch.manual_seed(0)
train_size = int(0.8*len(ds))
valid_size = len(ds) - train_size
train_set, val_set = random_split(ds, [train_size, valid_size])
print(f"Train samples: {len(train_set)} "
      f"Val. samples: {len(val_set)}")

Train samples: 6951 Val. samples: 1738


## Model

In [12]:
model = TemporalSentinel2Model(
    n_samples=5, 
    output_nc=1,
)

In [None]:
initial_output = []
for each in sample['image']:
    initial_output.append(model.model_initial(each))

In [None]:
x = torch.stack(initial_output, dim=1)

In [None]:
x.size()

In [None]:
print(initial_output[0].shape)
print(x.shape)

In [13]:
yh = model(sample['image'])

AssertionError: x: torch.Size([16, 5, 256, 256]) is not 1 x C x D x H x W

In [14]:
loss_module = nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)

## DataLoaders

In [15]:
batch_size = 4
num_workers = 1

train_dataloader = DataLoader(train_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=num_workers,
                            pin_memory=True
                            )

val_dataloader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True
                        )

In [16]:
b = next(iter(train_dataloader))

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 128, in collate
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 128, in <dictcomp>
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 151, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/fetch.py", line 61, in fetch
    return self.collate_fn(data)
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 131, in collate
    return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 131, in <dictcomp>
    return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/collate.py", line 151, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>


## Model Training

In [None]:
artifacts_dir = "/project/artifacts"
model_name = "TemporalS2"
n_epochs = 1
date = "20221220"
save_path = artifacts_dir + f"/{date}_{model_name}_B{batch_size}_E{n_epochs}.pt"

In [None]:
metrics = run_training(model=model,
                    loss_module=loss_module,
                    optimizer=optimizer,
                    train_dataloader=train_dataloader,
                    val_dataloader=val_dataloader,
                    save_path=save_path,
                    n_epochs=n_epochs)

In [None]:
# Save the metrics to a file
train_metrics_df = pd.DataFrame(metrics['training'], columns=["step", "score"])
val_metrics_df = pd.DataFrame(metrics["validation"], columns=["step", "score"])
train_metrics_df.to_csv(artifacts_dir + "/train_metrics.csv")
val_metrics_df.to_csv(artifacts_dir + "/val_metrics.csv")