# Use Temporal Sentinel-2 Data to Train a Stacked ResNet Model

In [1]:
%load_ext autoreload

In [2]:
%aimport biomasstry.datasets, biomasstry.models

In [3]:
%aimport

Modules to reload:
biomasstry.datasets biomasstry.models

Modules to skip:



In [4]:
%autoreload 1

In [5]:
from time import time

In [6]:
from biomasstry.datasets import TemporalSentinel2Dataset
from biomasstry.models import TemporalSentinel2Model
from biomasstry.models.utils import run_training
import torch
import torch.nn as nn
from torch.utils.data import random_split, DataLoader

cuda


## Dataset

In [7]:
S3_DIRECT = False  # Access S3 directly or as a mounted data source
if S3_DIRECT:
    data_url="s3://drivendata-competition-biomassters-public-us"
else:
    data_url = ""
ds = TemporalSentinel2Dataset(data_url=data_url)

In [8]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(f"Device: {device}")

Device: cuda


In [9]:
torch.manual_seed(0)
train_size = int(0.8*len(ds))
valid_size = len(ds) - train_size
train_set, val_set = random_split(ds, [train_size, valid_size])
print(f"Train samples: {len(train_set)} "
      f"Val. samples: {len(val_set)}")

Train samples: 6951 Val. samples: 1738


## Model

In [10]:
model = TemporalSentinel2Model(
    n_samples=5, 
    output_nc=1,
)

In [11]:
loss_module = nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)

## DataLoaders

In [12]:
batch_size = 4
num_workers = 2

train_dataloader = DataLoader(train_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=num_workers,
                            pin_memory=True
                            )

val_dataloader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True
                        )

In [13]:
start = time()
b = next(iter(train_dataloader))
end = time()
print(f"Time to read one batch of size {batch_size} = {end - start} seconds.")

Time to read one batch of size 4 = 5.993261814117432 seconds.


In [14]:
len(b['image'][0])

4

In [15]:
yb = model(b['image'])

In [None]:
yb.shape

In [21]:
initial_output = []
for each in b['image']:
    initial_output.append(model.model_initial(each))

In [25]:
x = torch.stack(initial_output, dim=2)

In [26]:
x.size()

torch.Size([4, 10, 5, 256, 256])

In [27]:
print(initial_output[0].shape)
print(x.shape)

torch.Size([4, 10, 256, 256])
torch.Size([4, 10, 5, 256, 256])


## Model Training

In [None]:
artifacts_dir = "/project/artifacts"
model_name = "TemporalS2"
n_epochs = 1
date = "20221220"
save_path = artifacts_dir + f"/{date}_{model_name}_B{batch_size}_E{n_epochs}.pt"

In [None]:
metrics = run_training(model=model,
                    loss_module=loss_module,
                    optimizer=optimizer,
                    train_dataloader=train_dataloader,
                    val_dataloader=val_dataloader,
                    save_path=save_path,
                    n_epochs=n_epochs)

In [None]:
# Save the metrics to a file
train_metrics_df = pd.DataFrame(metrics['training'], columns=["step", "score"])
val_metrics_df = pd.DataFrame(metrics["validation"], columns=["step", "score"])
train_metrics_df.to_csv(artifacts_dir + "/train_metrics.csv")
val_metrics_df.to_csv(artifacts_dir + "/val_metrics.csv")