# Getting Started

In [1]:
# Install torchgeo
%pip install torchgeo

Collecting torchgeo
  Downloading torchgeo-0.5.2-py3-none-any.whl (381 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m381.1/381.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops>=0.3 (from torchgeo)
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting kornia>=0.6.9 (from torchgeo)
  Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.4/825.4 kB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightly!=1.4.26,>=1.4.4 (from torchgeo)
  Downloading lightly-1.5.4-py3-none-any.whl (745 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m745.1/745.1 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning[pytorch-extra]>=2 (from torchgeo)
  Downloading lightning-2.2.4-py3-none-any.whl (2.0 MB)
[2K     [90m

In [1]:
# Import packages
import os
import tempfile

from torch.utils.data import DataLoader

from torchgeo.datasets import NAIP, ChesapeakeDE, stack_samples
from torchgeo.datasets.utils import download_url
from torchgeo.samplers import RandomGeoSampler

## Datasets

For this tutorial, we'll be using imagery from the [National Agriculture Imagery Program (NAIP)](https://catalog.data.gov/dataset/national-agriculture-imagery-program-naip) and labels from the [Chesapeake Bay High-Resolution Land Cover Project](https://www.chesapeakeconservancy.org/conservation-innovation-center/high-resolution-data/land-cover-data-project/). First, we manually download a few NAIP tiles and create a PyTorch Dataset.

In [2]:
naip_root = os.path.join(tempfile.gettempdir(), "naip")
naip_url = (
    "https://naipeuwest.blob.core.windows.net/naip/v002/de/2018/de_060cm_2018/38075/"
)
tiles = [
    "m_3807511_ne_18_060_20181104.tif",
    "m_3807511_se_18_060_20181104.tif",
    "m_3807512_nw_18_060_20180815.tif",
    "m_3807512_sw_18_060_20180815.tif",
]
for tile in tiles:
    download_url(naip_url + tile, naip_root)

naip = NAIP(naip_root)

Downloading https://naipeuwest.blob.core.windows.net/naip/v002/de/2018/de_060cm_2018/38075/m_3807511_ne_18_060_20181104.tif to /tmp/naip/m_3807511_ne_18_060_20181104.tif


100%|██████████| 513332284/513332284 [00:39<00:00, 12958977.98it/s]


Downloading https://naipeuwest.blob.core.windows.net/naip/v002/de/2018/de_060cm_2018/38075/m_3807511_se_18_060_20181104.tif to /tmp/naip/m_3807511_se_18_060_20181104.tif


100%|██████████| 521985441/521985441 [00:37<00:00, 13798210.95it/s]


Downloading https://naipeuwest.blob.core.windows.net/naip/v002/de/2018/de_060cm_2018/38075/m_3807512_nw_18_060_20180815.tif to /tmp/naip/m_3807512_nw_18_060_20180815.tif


100%|██████████| 489865657/489865657 [00:33<00:00, 14567041.39it/s]


Downloading https://naipeuwest.blob.core.windows.net/naip/v002/de/2018/de_060cm_2018/38075/m_3807512_sw_18_060_20180815.tif to /tmp/naip/m_3807512_sw_18_060_20180815.tif


100%|██████████| 484476647/484476647 [00:35<00:00, 13474705.28it/s]


Next, we tell TorchGeo to automatically download the corresponding Chesapeake labels.

In [3]:
chesapeake_root = os.path.join(tempfile.gettempdir(), "chesapeake")
os.makedirs(chesapeake_root, exist_ok=True)
chesapeake = ChesapeakeDE(chesapeake_root, crs=naip.crs, res=naip.res, download=True)

Downloading https://cicwebresources.blob.core.windows.net/chesapeakebaylandcover/DE/_DE_STATEWIDE.zip to /tmp/chesapeake/_DE_STATEWIDE.zip


100%|██████████| 287350495/287350495 [00:05<00:00, 54458246.50it/s]


Finally, we create an IntersectionDataset so that we can automatically sample from both GeoDatasets simultaneously.

In [4]:
dataset = naip & chesapeake

## Sampler

Unlike typical PyTorch Datasets, TorchGeo GeoDatasets are indexed using lat/long/time bounding boxes. This requires us to use a custom GeoSampler instead of the default sampler/batch_sampler that comes with PyTorch.

In [6]:
sampler = RandomGeoSampler(dataset, size=10000, length=10)

## DataLoader

Now that we have a Dataset and Sampler, we can combine these into a single DataLoader.

In [7]:
dataloader = DataLoader(dataset, sampler=sampler, collate_fn=stack_samples)

## Training

Other than that, the rest of the training pipeline is the same as it is for torchvision.

In [8]:
for sample in dataloader:
    image = sample["image"]
    target = sample["mask"]