In [6]:
import sys, os
sys.path.append(os.path.abspath("../../../"))

# Tutorial: Quickstart

Here we will demonstrate a simple example of training a convolutional conditional neural process (ConvCNP) to spatially interpolate ERA5 data.

We can go from imports to predictions with a trained model in less than 30 lines of code!

In [7]:
import deepsensor.torch
from deepsensor.data.loader import TaskLoader
from deepsensor.data.processor import DataProcessor
from deepsensor.model.convnp import ConvNP
from deepsensor.train.train import train_epoch

import xarray as xr
import pandas as pd
import numpy as np

# Load raw data
ds_raw = xr.tutorial.open_dataset("air_temperature")

# Normalise data
data_processor = DataProcessor(x1_name="lat", x1_map=(15, 75), x2_name="lon", x2_map=(200, 330))
ds = data_processor(ds_raw)

# Set up task loader
task_loader = TaskLoader(context=ds, target=ds)

# Set up model
model = ConvNP(data_processor, task_loader)

# Generate training tasks with up to 10% of grid cells passed as context and all grid cells
# passed as targets
train_tasks = []
for date in pd.date_range("2013-01-01", "2014-11-30")[::7]:
    task = task_loader(date, context_sampling=np.random.uniform(0.0, 0.1), target_sampling="all")
    train_tasks.append(task)

# Train model
for epoch in range(10):
    train_epoch(model, train_tasks, progress_bar=True)

# Predict on new task with 10% of context data and a dense grid of target points
test_task = task_loader("2014-12-31", 0.1)
mean_ds, std_ds = model.predict(test_task, X_t=ds_raw)

dim_yc inferred from TaskLoader: (1,)
dim_yt inferred from TaskLoader: 1
dim_aux_t inferred from TaskLoader: 0
points_per_unit inferred from TaskLoader: 52
encoder_scales inferred from TaskLoader: [0.009615384042263031]
decoder_scale inferred from TaskLoader: 0.019230769230769232


100%|██████████| 100/100 [00:09<00:00, 10.68it/s]
100%|██████████| 100/100 [00:09<00:00, 10.90it/s]
100%|██████████| 100/100 [00:09<00:00, 11.08it/s]
100%|██████████| 100/100 [00:08<00:00, 11.16it/s]
100%|██████████| 100/100 [00:08<00:00, 11.14it/s]
100%|██████████| 100/100 [00:08<00:00, 11.18it/s]
100%|██████████| 100/100 [00:08<00:00, 11.16it/s]
100%|██████████| 100/100 [00:08<00:00, 11.17it/s]
100%|██████████| 100/100 [00:08<00:00, 11.21it/s]
100%|██████████| 100/100 [00:09<00:00, 10.93it/s]


After training, the model can predict directly to `xarray` in your data's original units and coordinate system:

In [8]:
mean_ds

We can also predict directly to `pandas` containing a timeseries of predictions at off-grid locations
by passing a `numpy` array of target locations to the `X_t` argument of `.predict`:

In [9]:
# Predict at two off-grid locations for three days in December 2014
test_tasks = task_loader(pd.date_range("2014-12-01", "2014-12-31"), 0.1)
mean_df, std_df = model.predict(test_tasks, X_t=np.array([[50, 280], [40, 250]]).T)

In [10]:
mean_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,air
time,lat,lon,Unnamed: 3_level_1
2014-12-01,50,280,266.371704
2014-12-01,40,250,278.317871
2014-12-02,50,280,253.647766
2014-12-02,40,250,281.236542
2014-12-03,50,280,261.98349
...,...,...,...
2014-12-29,40,250,270.200928
2014-12-30,50,280,253.476517
2014-12-30,40,250,269.566864
2014-12-31,50,280,250.828751
