In [None]:
import sys

sys.path.append("..")

import os

import numpy as np
import torch
from landsatbench.datamodule import LandsatDataModule
from landsatbench.embed import ImageStatisticsModel, extract_features
from landsatbench.eval import eval_knn, eval_linear_probe

root = "../data"
output_dir = "../embeddings"
os.makedirs(output_dir, exist_ok=True)

k = 5
device = torch.device("mps")

### EuroSAT-L

In [None]:
dm = LandsatDataModule(name="eurosat", root=root, batch_size=16, num_workers=8, download=False)
dm.prepare_data()
dm.setup("fit")
model = ImageStatisticsModel()
model = model.to(device)

In [None]:
dm.setup("fit")
x_train, y_train = extract_features(model, dm.train_dataloader(), device)

dm.setup("test")
x_test, y_test = extract_features(model, dm.test_dataloader(), device)

filename = os.path.join(output_dir, "eurosat-imagestats.npz")
np.savez(
    filename,
    x_train=x_train,
    y_train=y_train.astype(np.int16),
    x_test=x_test,
    y_test=y_test.astype(np.int16),
)

100%|██████████| 1013/1013 [00:19<00:00, 51.03it/s] 
100%|██████████| 338/338 [00:14<00:00, 23.09it/s] 


In [None]:
# KNN eval
filename = os.path.join(output_dir, "eurosat-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_knn(x_train, y_train, x_test, y_test, k=5, scale=True)

+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+
|   overall_accuracy |   overall_precision |   average_precision |   overall_recall |   average_recall |   overall_f1 |   average_f1 |
|            0.78963 |             0.78963 |            0.784608 |          0.78963 |         0.782498 |      0.78963 |      0.78098 |
+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+


In [None]:
# LP eval
filename = os.path.join(output_dir, "eurosat-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_linear_probe(x_train, y_train, x_test, y_test, scale=True)

+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+
|   overall_accuracy |   overall_precision |   average_precision |   overall_recall |   average_recall |   overall_f1 |   average_f1 |
|           0.740741 |            0.740741 |            0.733893 |         0.740741 |         0.731843 |     0.740741 |     0.728071 |
+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+


### BigEarthNet-L

In [7]:
dm = LandsatDataModule(name="bigearthnet", root=root, batch_size=128, num_workers=8, download=False)
dm.prepare_data()
dm.setup("fit")
model = ImageStatisticsModel()
model = model.to(device)

Archive ../data/bigearthnet-l.tar.gz already exists. Skipping download.
Archive ../data/bigearthnet-l.tar.gz already exists. Skipping download.


In [None]:
dm.setup("fit")
x_train, y_train = extract_features(model, dm.train_dataloader(), device)

dm.setup("test")
x_test, y_test = extract_features(model, dm.test_dataloader(), device)

filename = os.path.join(output_dir, "bigearthnet-imagestats.npz")
np.savez(
    filename,
    x_train=x_train,
    y_train=y_train.astype(np.int16),
    x_test=x_test,
    y_test=y_test.astype(np.int16),
)

In [None]:
# KNN eval
filename = os.path.join(output_dir, "bigearthnet-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_knn(x_train, y_train, x_test, y_test, k=5, scale=True, multilabel=True)

In [None]:
# LP eval
filename = os.path.join(output_dir, "bigearthnet-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_linear_probe(x_train, y_train, x_test, y_test, scale=True, multilabel=True)

### LC100

In [2]:
dm = LandsatDataModule(name="lc100", root=root, batch_size=1, num_workers=8, download=False)
dm.prepare_data()
dm.setup("fit")
model = ImageStatisticsModel()
model = model.to(device)

In [3]:
dm.setup("fit")
x_train, y_train = extract_features(model, dm.train_dataloader(), device)

dm.setup("test")
x_test, y_test = extract_features(model, dm.test_dataloader(), device)

filename = os.path.join(output_dir, "lc100-imagestats.npz")
np.savez(
    filename,
    x_train=x_train,
    y_train=y_train.astype(np.int16),
    x_test=x_test,
    y_test=y_test.astype(np.int16),
)

100%|██████████| 5181/5181 [09:48<00:00,  8.80it/s]
100%|██████████| 1726/1726 [03:22<00:00,  8.53it/s]


In [4]:
# KNN eval
filename = os.path.join(output_dir, "lc100-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_knn(x_train, y_train, x_test, y_test, k=5, scale=True, multilabel=True)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+---------------+---------------+
|   overall_accuracy |   overall_precision |   average_precision |   overall_recall |   average_recall |   overall_f1 |   average_f1 |   overall_map |   average_map |
|         0.00637312 |            0.593792 |            0.365575 |         0.576391 |         0.355167 |     0.584962 |     0.357161 |      0.562911 |      0.366211 |
+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+---------------+---------------+


In [5]:
# LP eval
filename = os.path.join(output_dir, "lc100-imagestats.npz")
embeddings = np.load(filename)
x_train, y_train, x_test, y_test = (
    embeddings["x_train"],
    embeddings["y_train"],
    embeddings["x_test"],
    embeddings["y_test"],
)
metrics = eval_linear_probe(x_train, y_train, x_test, y_test, scale=True, multilabel=True)

+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+---------------+---------------+
|   overall_accuracy |   overall_precision |   average_precision |   overall_recall |   average_recall |   overall_f1 |   average_f1 |   overall_map |   average_map |
|         0.00521437 |            0.622087 |            0.367012 |         0.561877 |           0.3292 |     0.590451 |     0.335824 |      0.622671 |      0.368448 |
+--------------------+---------------------+---------------------+------------------+------------------+--------------+--------------+---------------+---------------+


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
