allenai · yawenzzzz · Nov 20, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 13, 2025
diff --git a/data/olmoearth_evals/tasks/lfmc_base.yaml b/data/olmoearth_evals/tasks/lfmc_base.yaml
@@ -46,7 +46,7 @@ data:
         input_mapping:
           eval_task:
             label: "targets"
-    batch_size: 16
+    batch_size: 8
     num_workers: 32
     default_config:
       transforms:

diff --git a/data/olmoearth_evals/tasks/mangrove_base.yaml b/data/olmoearth_evals/tasks/mangrove_base.yaml
@@ -47,7 +47,7 @@ data:
         input_mapping:
           eval_task:
             label: "targets"
-    batch_size: 16
+    batch_size: 8
     num_workers: 32
     default_config:
       transforms:

diff --git a/data/olmoearth_evals/tasks/sentinel2_vessel_length.yaml b/data/olmoearth_evals/tasks/sentinel2_vessel_length.yaml
@@ -44,7 +44,7 @@ data:
         input_mapping:
           eval_task:
             info: "targets"
-    batch_size: 32
+    batch_size: 16
     num_workers: 16
     default_config:
       transforms:

diff --git a/data/olmoearth_evals/tasks/sentinel2_vessel_type.yaml b/data/olmoearth_evals/tasks/sentinel2_vessel_type.yaml
@@ -45,7 +45,7 @@ data:
         input_mapping:
           eval_task:
             info: "targets"
-    batch_size: 32
+    batch_size: 16
     num_workers: 16
     default_config:
       transforms:

diff --git a/rslp/olmoearth_evals/clay.py b/rslp/olmoearth_evals/clay.py
@@ -27,11 +27,13 @@ def get_model(
     task_timesteps: int = 1,
 ) -> torch.nn.Module:
     """Get appropriate Clay model."""
+    # Clay resizes to 128x128 and always has 16x16 output feature map.
+    downsample_factor = input_size // 16
     if task_type == "segment":
         decoders = dict(
             eval_task=[
                 UNetDecoder(
-                    in_channels=[[8, 1024]],
+                    in_channels=[[downsample_factor, 1024]],
                     out_channels=task_channels,
                     conv_layers_per_resolution=2,
                     num_channels={8: 512, 4: 512, 2: 256, 1: 128},
@@ -53,7 +55,7 @@ def get_model(
         decoders = dict(
             eval_task=[
                 FasterRCNN(
-                    downsample_factors=[8],
+                    downsample_factors=[downsample_factor],
                     num_channels=1024,
                     num_classes=task_channels,
                     anchor_sizes=[[32]],
@@ -110,6 +112,7 @@ def get_model(
                         encoder=Clay(
                             model_size=ClaySize.LARGE,
                             modality=clay_modality,
+                            do_resizing=True,
                         ),
                         image_keys=image_keys,
                     ),
@@ -137,6 +140,7 @@ def get_model(
                 encoder=Clay(
                     model_size=ClaySize.LARGE,
                     modality=clay_modality,
+                    do_resizing=True,
                 ),
                 image_keys=image_keys,
             ),

diff --git a/rslp/olmoearth_evals/croma.py b/rslp/olmoearth_evals/croma.py
@@ -7,6 +7,7 @@
 from rslearn.models.faster_rcnn import FasterRCNN
 from rslearn.models.multitask import MultiTaskModel
 from rslearn.models.pooling_decoder import PoolingDecoder
+from rslearn.models.resize_features import ResizeFeatures
 from rslearn.models.simple_time_series import SimpleTimeSeries
 from rslearn.models.unet import UNetDecoder
 from rslearn.train.tasks.classification import ClassificationHead
@@ -47,6 +48,7 @@ def get_model(
                     out_channels=task_channels,
                     conv_layers_per_resolution=2,
                     num_channels={8: 512, 4: 512, 2: 256, 1: 128},
+                    original_size_to_interpolate=[input_size, input_size],
                 ),
                 SegmentationHead(),
             ]
@@ -64,12 +66,14 @@ def get_model(
     elif task_type == "detect":
         decoders = dict(
             eval_task=[
+                # CROMA patch_size = 8
+                ResizeFeatures(out_sizes=[(input_size // 8, input_size // 8)]),
                 FasterRCNN(
                     downsample_factors=[8],
                     num_channels=embedding_size,
                     num_classes=task_channels,
                     anchor_sizes=[[32]],
-                )
+                ),
             ]
         )
     elif task_type == "classify":
@@ -123,6 +127,7 @@ def get_model(
                             size=croma_size,
                             modality=modality,
                             image_resolution=input_size,
+                            do_resizing=True,
                         ),
                         image_keys=image_keys,
                     ),
@@ -151,6 +156,7 @@ def get_model(
                     size=croma_size,
                     modality=modality,
                     image_resolution=input_size,
+                    do_resizing=True,
                 ),
                 image_keys=image_keys,
             ),

diff --git a/rslp/olmoearth_evals/launch.py b/rslp/olmoearth_evals/launch.py
@@ -48,7 +48,7 @@ def launch(
     image_name: str,
     project: str,
     priority: str = "high",
-    clusters: list[str] = ["ai2/jupiter", "ai2/ceres", "ai2/titan"],
+    clusters: list[str] = ["ai2/jupiter", "ai2/ceres"],
     test: bool = False,
 ) -> None:
     """Launch OlmoEarth fine-tuning evaluation.

diff --git a/rslp/olmoearth_evals/terramind.py b/rslp/olmoearth_evals/terramind.py
@@ -6,6 +6,7 @@
 from rslearn.models.faster_rcnn import FasterRCNN
 from rslearn.models.multitask import MultiTaskModel
 from rslearn.models.pooling_decoder import PoolingDecoder
+from rslearn.models.resize_features import ResizeFeatures
 from rslearn.models.simple_time_series import SimpleTimeSeries
 from rslearn.models.terramind import Terramind, TerramindNormalize, TerramindSize
 from rslearn.models.unet import UNetDecoder
@@ -47,6 +48,7 @@ def get_model(
                     out_channels=task_channels,
                     conv_layers_per_resolution=2,
                     num_channels={16: 512, 8: 512, 4: 512, 2: 256, 1: 128},
+                    original_size_to_interpolate=[input_size, input_size],
                 ),
                 SegmentationHead(),
             ]
@@ -64,12 +66,14 @@ def get_model(
     elif task_type == "detect":
         decoders = dict(
             eval_task=[
+                # TerraMind patch_size = 16
+                ResizeFeatures(out_sizes=[(input_size // 16, input_size // 16)]),
                 FasterRCNN(
                     downsample_factors=[16],
                     num_channels=embedding_size,
                     num_classes=task_channels,
                     anchor_sizes=[[32]],
-                )
+                ),
             ]
         )
     elif task_type == "classify":
@@ -116,6 +120,7 @@ def get_model(
                         encoder=Terramind(
                             model_size=terramind_size,
                             modalities=modalities,
+                            do_resizing=True,
                         ),
                         image_keys=image_keys,
                     ),
@@ -143,6 +148,7 @@ def get_model(
                 encoder=Terramind(
                     model_size=terramind_size,
                     modalities=modalities,
+                    do_resizing=True,
                 ),
                 image_keys=image_keys,
             ),