In [1]:
from huggingface_hub import notebook_login, upload_folder, HfApi, upload_large_folder


In [2]:
notebook_login()

In [3]:
ORG = "e4e-mangrove-monitoring"  # or your HF org name

# Each dataset gets its own repo
REPOS = {
    "mangrove_05m":  f"{ORG}/E4E_Mangrove_05m",
    "landcover":     f"{ORG}/E4E_Landcover_ai_v1",
    "ensemble_cvat": f"{ORG}/E4E_Mangrove_Ensemble_CVAT",
    "weights":       f"{ORG}/E4E_Mangrove_Weights",
}

# Create all repos (skip if they already exist)
api = HfApi()
for name, repo_id in REPOS.items():
    repo_type = "model" if name == "weights" else "dataset"
    api.create_repo(repo_id, repo_type=repo_type, private=True, exist_ok=True)
    print(f"Ready: {repo_id} ({repo_type})")

print(f"\nAll repos created under {ORG}")

Ready: e4e-mangrove-monitoring/E4E_Mangrove_05m (dataset)
Ready: e4e-mangrove-monitoring/E4E_Landcover_ai_v1 (dataset)
Ready: e4e-mangrove-monitoring/E4E_Mangrove_Ensemble_CVAT (dataset)
Ready: e4e-mangrove-monitoring/E4E_Mangrove_Weights (model)

All repos created under e4e-mangrove-monitoring


## 1. Mangrove 0.5m Dataset (~574 MB)

Raw 0.5m drone imagery + binary mangrove labels.

| File | Description |
|------|-------------|
| `512dataset_images.npy` | 573 tiles, 512x512, 3-channel |
| `512dataset_labels.npy` | 573 tiles, 512x512, values 0/1/255 |
| `class_weights.json` | Class balancing weights |
| `train_indices.npy` | Training split indices |
| `val_indices.npy` | Validation split indices |

In [9]:
upload_folder(
    folder_path="../data/0_5m",
    path_in_repo=".",
    repo_id=REPOS["mangrove_05m"],
    repo_type="dataset",
    ignore_patterns=["cvat_export/**"],  # exclude CVAT export (separate repo)
)
print(f"Uploaded: {REPOS['mangrove_05m']}")

Processing Files (2 / 4):  99%|█████████▊|  593MB /  601MB, 3.07MB/s  
New Data Upload: 100%|██████████|  587MB /  587MB, 3.03MB/s  


Uploaded: e4e-mangrove-monitoring/E4E_Mangrove_05m


## 2. Landcover.ai v1 (~2.5 GB)

Landcover.ai dataset tiled to 512x512 for training.

| Contents | Description |
|----------|-------------|
| `images/` | 41 raw GeoTIFF images |
| `masks/` | 41 corresponding mask TIFFs |
| `output/` | 21,348 tiled 512x512 JPG/PNG pairs |
| `train.txt`, `val.txt`, `test.txt` | Split definitions |
| `class_weights.json` | Class balancing weights |

In [4]:
import shutil
from pathlib import Path

output_dir = Path("../data/landcover.ai.v1/output")
zip_path = Path("../data/landcover.ai.v1/output_tiles")

if not zip_path.with_suffix(".zip").exists():
    print("Zipping output/ folder (21k tiles)...")
    shutil.make_archive(str(zip_path), "zip", str(output_dir))
    zip_size = zip_path.with_suffix(".zip").stat().st_size / (1024**2)
    print(f"Created: output_tiles.zip ({zip_size:.0f} MB)")
else:
    zip_size = zip_path.with_suffix(".zip").stat().st_size / (1024**2)
    print(f"Already exists: output_tiles.zip ({zip_size:.0f} MB)")

# Upload everything EXCEPT the raw output/ folder (replaced by zip)
upload_folder(
    folder_path="../data/landcover.ai.v1",
    path_in_repo=".",
    repo_id=REPOS["landcover"],
    repo_type="dataset",
    ignore_patterns=["output/**"],
)
print(f"Uploaded: {REPOS['landcover']}")

Zipping output/ folder (21k tiles)...
Created: output_tiles.zip (972 MB)


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Uploaded: e4e-mangrove-monitoring/E4E_Landcover_ai_v1


## 3. Ensemble CVAT Export (~220 MB)

6-class gated ensemble predictions, ready for CVAT import.

| Contents | Description |
|----------|-------------|
| `images/` | 573 tile PNGs (for CVAT task upload) |
| `cvat_ensemble_masks.zip` | Segmentation Mask 1.1 annotation ZIP |

In [5]:
upload_folder(
    folder_path="../data/cvat_export",
    path_in_repo=".",
    repo_id=REPOS["ensemble_cvat"],
    repo_type="dataset",
)
print(f"Uploaded: {REPOS['ensemble_cvat']}")

It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Uploaded: e4e-mangrove-monitoring/E4E_Mangrove_Ensemble_CVAT


## 4. Model Weights (~255 MB)

Uploaded as a HF **Model** repo (not dataset).

| File | Description |
|------|-------------|
| `human_segformer.pth` | SegFormer trained on Landcover.ai (5-class) |
| `mangrove_segformer.pth` | SegFormer trained on mangrove data (binary) |

In [6]:
upload_folder(
    folder_path="../weights",
    path_in_repo=".",
    repo_id=REPOS["weights"],
    repo_type="model",
)
print(f"Uploaded: {REPOS['weights']}")

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Uploaded: e4e-mangrove-monitoring/E4E_Mangrove_Weights
