In [1]:
import os

os.chdir("..")

In [2]:
from src.datasets import MARIDA, MADOS, ShipS2AIS, SWED, EarthSurfaceWater
from src.datasets.stats import RunningStatsButFast
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch

In [3]:
# used this to compute which samples in the MARIDA and MADOS datasets have partial NaNs
def check_nans(ds):
    nan_idxs = []
    for i in tqdm(range(len(ds))):
        img = ds[i]["image"]
        if torch.any(torch.isnan(img)):
            nan_idxs.append(i)
    return nan_idxs

In [3]:
ds = MARIDA(root="data/benchmark-datasets/marida/", split="train")
dl = DataLoader(ds, batch_size=16, num_workers=8)
rs = RunningStatsButFast((11), (0, 2, 3))
for batch in tqdm(dl):
    rs(batch["image"])
print(rs.mean)
print(rs.std)

  0%|          | 0/44 [00:00<?, ?it/s]

100%|██████████| 44/44 [00:01<00:00, 26.19it/s]

tensor([0.0520, 0.0478, 0.0406, 0.0316, 0.0297, 0.0346, 0.0388, 0.0344, 0.0392,
        0.0236, 0.0159])
tensor([0.0473, 0.0474, 0.0470, 0.0497, 0.0495, 0.0646, 0.0759, 0.0712, 0.0825,
        0.0511, 0.0352])





In [4]:
ds = MADOS(root="data/benchmark-datasets/mados/", split="train")
dl = DataLoader(ds, batch_size=16, num_workers=8)
rs = RunningStatsButFast((11), (0, 2, 3))
for batch in tqdm(dl):
    rs(batch["image"])
print(rs.mean)
print(rs.std)

100%|██████████| 90/90 [00:03<00:00, 22.88it/s]

tensor([0.0581, 0.0521, 0.0437, 0.0356, 0.0340, 0.0367, 0.0399, 0.0356, 0.0396,
        0.0267, 0.0197])
tensor([0.0323, 0.0344, 0.0355, 0.0376, 0.0377, 0.0497, 0.0586, 0.0554, 0.0640,
        0.0419, 0.0300])





In [5]:
ds = ShipS2AIS(root="data/benchmark-datasets/ship-s2-ais/", split="train", bands="all")
dl = DataLoader(ds, batch_size=16, num_workers=8)
rs = RunningStatsButFast((5), (0, 2, 3))
for batch in tqdm(dl):
    rs(batch["image"])
print(rs.mean)
print(rs.std)

  0%|          | 1/716 [00:00<03:53,  3.06it/s]

100%|██████████| 716/716 [00:07<00:00, 92.39it/s] 


tensor([1570.0336, 1171.0029,  909.0641,  684.6077,  575.1781])
tensor([746.2094, 672.5553, 673.0739, 815.1528, 666.2916])


In [7]:
ds = SWED(root="data/benchmark-datasets/SWED/", split="train")
dl = DataLoader(ds, batch_size=16, num_workers=8)
rs = RunningStatsButFast((12), (0, 2, 3))
for batch in tqdm(dl):
    rs(batch["image"])
print(rs.mean)
print(rs.std)

100%|██████████| 1673/1673 [01:46<00:00, 15.66it/s]

tensor([ 560.0963,  669.8031,  938.8026, 1104.3842, 1374.6296, 1826.4390,
        2012.0269, 2095.9023, 2159.6445, 2191.1631, 2105.7415, 1568.9823])
tensor([ 678.8931,  748.4851,  918.1321, 1278.0764, 1362.1965, 1479.4902,
        1598.6714, 1661.6722, 1692.9138, 1803.0081, 1924.1908, 1635.6689])





In [8]:
ds = EarthSurfaceWater(
    root="data/benchmark-datasets/earth_surface_water", split="train", pad_sizes=False
)
dl = DataLoader(ds, batch_size=1, num_workers=8)
rs = RunningStatsButFast((6), (0, 2, 3))
for batch in tqdm(dl):
    rs(batch["image"])
print(rs.mean)
print(rs.std)

100%|██████████| 64/64 [00:01<00:00, 43.29it/s]

tensor([ 771.4490,  989.0422,  975.8994, 2221.6182, 1854.8079, 1328.8887])
tensor([ 738.8903,  812.4620, 1000.6935, 1314.1964, 1384.8275, 1225.1549])



