# Join the extractions DataFrames

In [4]:
from pathlib import Path
from tqdm import tqdm
import geopandas as gpd
import pandas as pd
import warnings

warnings.simplefilter('ignore')

required_columns = ["sample_id", "ref_id", "ewoc_code", "valid_time", "extract", "h3_l3_cell", "geometry"]
dataset_folder = Path("/vitodata/worldcereal_data/EXTRACTIONS/all_datasets/")
target_folder = dataset_folder / "grouped_datasets"
all_datasets = list(dataset_folder.glob("*.geoparquet"))

all_datasets = sorted(all_datasets, key=lambda x: x.stem)

maximum_size_mb = 500 # 1.5 GB per dataset

dataframe_groups = []
group_memory_usage_mb = 0
group = []
group_idx = 0

for f_path in tqdm(all_datasets):
    # Make sure all the datasets are in the same CRS
    df = gpd.read_parquet(f_path).to_crs(epsg=4326)
    # Only keep the relevant columns necessary for extractions
    df = df[required_columns]
    # Remove all the datasets with land-cover and not crop-type
    df = df[(df.ewoc_code > 1100000000) & (df.ewoc_code < 1200000000)]

    group.append(df)
    group_memory_usage_mb += df.memory_usage(deep=True).sum() / 1e6
    print('After adding dataset: {} group size is: {:.1f} MB'.format(f_path.stem, group_memory_usage_mb))

    if (group_memory_usage_mb > maximum_size_mb) or (f_path == all_datasets[-1]):
        print('Writing group with size: ', group_memory_usage_mb, 'MB to disk with ', len(group), 'datasets')
        group = pd.concat(group, axis=0)
        group.to_parquet(
            target_folder / f"group_{group_idx}.geoparquet"
        )
        # Clear up-memory and reset variables
        del group
        group = []
        group_memory_usage_mb = 0
        group_idx += 1

  0%|          | 0/132 [00:00<?, ?it/s]

  2%|▏         | 3/132 [00:00<00:10, 12.87it/s]

After adding dataset: 00_2017_AF_One-Acre-Fund-MEL_POINT_110 group size is: 0.3 MB
After adding dataset: 00_2019_KEN_NHI-CROP-HARVEST_POINT_100 group size is: 0.3 MB
After adding dataset: 01_2017_KEN_IIASA_POINT_100 group size is: 0.3 MB
After adding dataset: 01_2018_AF_One-Acre-Fund-MEL_POINT_110 group size is: 0.9 MB


  6%|▌         | 8/132 [00:00<00:07, 17.15it/s]

After adding dataset: 01_2019_KEN_FAO-WAPOR-1_POLY_111 group size is: 1.0 MB
After adding dataset: 01_2019_KEN_RadiantEarth-01_POLY_101 group size is: 1.0 MB
After adding dataset: 01_2019_KEN_RadiantEarth-01_POLY_111 group size is: 1.0 MB
After adding dataset: 01_2021_RWA_FAO-WAPOR-Muvu_POLY_111 group size is: 1.2 MB
After adding dataset: 02_2017_KEN_IIASA_POLY_100 group size is: 1.3 MB


  9%|▉         | 12/132 [00:00<00:06, 17.80it/s]

After adding dataset: 02_2018_KEN_IIASA_POLY_100 group size is: 1.7 MB
After adding dataset: 02_2018_UGA_WFP-field-survey_POLY_110 group size is: 1.9 MB
After adding dataset: 02_2020_GO_NHI-CROP-HARVEST_POINT_100 group size is: 1.9 MB
After adding dataset: 02_2020_RWA_FAO-WAPOR-2_POINT_111 group size is: 1.9 MB


 13%|█▎        | 17/132 [00:00<00:06, 18.05it/s]

After adding dataset: 02_2020_RWA_FAO-WAPOR-Akagera_POINT_111 group size is: 2.0 MB
After adding dataset: 02_2021_RWA_FAO-WAPOR-Akagera_POLY_111 group size is: 2.0 MB
After adding dataset: 02_2021_RWA_FAO-WAPOR-Yan_POLY_111 group size is: 2.0 MB
After adding dataset: 03_2017_UGA_RadiantEarth-01_POLY_110 group size is: 2.1 MB


 17%|█▋        | 22/132 [00:01<00:05, 19.07it/s]

After adding dataset: 03_2017_UGA_WFP-field-survey_POLY_111 group size is: 2.4 MB
After adding dataset: 03_2018_TZA_RadiantEarth-01_POLY_110 group size is: 2.5 MB
After adding dataset: 03_2019_TZA_CIMMYT-DM1_POINT_110 group size is: 2.5 MB
After adding dataset: 03_2019_TZA_CIMMYT-DM2_POINT_110 group size is: 2.5 MB
After adding dataset: 05_2017_CMR_CGIAR-GARDIAN_POINT_110 group size is: 2.6 MB


 20%|█▉        | 26/132 [00:01<00:05, 18.47it/s]

After adding dataset: 06_2017_SSD_ESA-project-Sen2Agri_POLY_111 group size is: 2.9 MB
After adding dataset: 06_2017_SSD_WFP-field-survey_POLY_111 group size is: 3.2 MB
After adding dataset: 07_2023_IDN_vito-campaign_POLY_110 group size is: 3.3 MB
After adding dataset: 07_2023_IDN_vito-manual-points_POINT_100 group size is: 3.3 MB


 23%|██▎       | 30/132 [00:01<00:05, 17.85it/s]

After adding dataset: 08_2017_SSD_ESA-project-Sen2Agri_POINT_100 group size is: 3.3 MB
After adding dataset: 08_2017_SSD_ESA-project-Sen2Agri_POLY_100 group size is: 3.3 MB
After adding dataset: 08_2018_ETH_FAO-WAPOR-2_POLY_111 group size is: 3.3 MB
After adding dataset: 08_2018_SSD_WFP-field-survey_POLY_110 group size is: 3.7 MB


 26%|██▌       | 34/132 [00:01<00:05, 16.82it/s]

After adding dataset: 08_2019_AF_DE-WA-VAL1_POINT_100 group size is: 3.7 MB
After adding dataset: 08_2019_AF_NHI-CROP-HARVEST_POLY_100 group size is: 3.7 MB
After adding dataset: 08_2021_AF_DE-WA-TRAIN1_POLY_100 group size is: 3.7 MB
After adding dataset: 08_2021_LKA_FAO-WAPOR-1_POLY_111 group size is: 3.8 MB


 27%|██▋       | 36/132 [00:02<00:06, 15.46it/s]

After adding dataset: 08_2021_LKA_FAO-WAPOR-2_POLY_111 group size is: 4.0 MB
After adding dataset: 08_2022_AF_DE-WA-TRAIN1_POLY_100 group size is: 4.0 MB
After adding dataset: 09_2019_AF_DE-WA-TRAIN1_POLY_100 group size is: 4.0 MB


 30%|███       | 40/132 [00:02<00:06, 14.18it/s]

After adding dataset: 09_2019_AF_DE-WA-TRAIN2_POLY_100 group size is: 4.0 MB
After adding dataset: 10_2017_NGA_CGIAR-GARDIAN_POINT_110 group size is: 4.2 MB
After adding dataset: 11_2017_BFA_JECAM-CIRAD_POLY_111 group size is: 4.5 MB


 33%|███▎      | 43/132 [00:02<00:05, 16.23it/s]

After adding dataset: 11_2018_BFA_JECAM-CIRAD_POLY_111 group size is: 4.7 MB
After adding dataset: 11_2018_ETH_FAO-WAPOR-1_POLY_111 group size is: 4.7 MB
After adding dataset: 11_2019_AF_DE-WA-VAL2_POINT_100 group size is: 4.7 MB
After adding dataset: 11_2020_ETH_NHI-CROP-HARVEST_POLY_100 group size is: 4.7 MB


 34%|███▍      | 45/132 [00:02<00:05, 15.89it/s]

After adding dataset: 11_2020_NGA_WFP-field-survey_POLY_111 group size is: 5.5 MB
After adding dataset: 12_2020_BRA_INPE-LEM-AUG_POLY_110 group size is: 5.9 MB


 37%|███▋      | 49/132 [00:03<00:05, 14.30it/s]

After adding dataset: 12_2020_BRA_INPE-LEM-FEB_POLY_110 group size is: 6.5 MB
After adding dataset: 12_2020_BRA_INPE-LEM-MAR_POLY_110 group size is: 6.9 MB
After adding dataset: 12_2021_AF_DE-WA-VAL1_POINT_100 group size is: 6.9 MB
After adding dataset: 12_2022_AF_DE-WA-VAL1_POINT_100 group size is: 6.9 MB


 41%|████      | 54/132 [00:03<00:04, 16.67it/s]

After adding dataset: 13_2018_MLI_NHI-CROP-HARVEST_POLY_110 group size is: 7.0 MB
After adding dataset: 13_2018_MWI_WFP-field-survey_POLY_110 group size is: 7.3 MB
After adding dataset: 13_2019_MLI_NHI-CROP-HARVEST_POLY_110 group size is: 7.3 MB
After adding dataset: 14_2018_NER_FAO-WAPOR-1_POLY_111 group size is: 7.3 MB


 42%|████▏     | 56/132 [00:03<00:04, 16.37it/s]

After adding dataset: 14_2018_SEN_JECAM-CIRAD_POLY_111 group size is: 7.5 MB
After adding dataset: 14_2020_SDN_FAO-WAPOR-1_POLY_110 group size is: 7.6 MB
After adding dataset: 14_2020_SDN_FAO-WAPOR-2_POLY_111 group size is: 7.7 MB


 45%|████▌     | 60/132 [00:03<00:04, 15.35it/s]

After adding dataset: 15_2019_SEN_JECAM_CIRAD_POLY_111 group size is: 7.8 MB
After adding dataset: 16_2020_MOZ_WFP-field-survey_POLY_111 group size is: 7.9 MB
After adding dataset: 16_2021_SEN_FAO-WAPOR-1_POLY_111 group size is: 8.0 MB
After adding dataset: 16_2021_SEN_FAO-WAPOR-2_POLY_111 group size is: 8.0 MB


 49%|████▉     | 65/132 [00:03<00:03, 16.94it/s]

After adding dataset: 19_2020_ZWE_NHI-CROP-HARVEST_POINT_110 group size is: 8.0 MB
After adding dataset: 20_2017_MDG_JECAM-CIRAD_POLY_111 group size is: 8.1 MB
After adding dataset: 20_2018_MDG_JECAM-CIRAD_POLY_111 group size is: 8.5 MB
After adding dataset: 20_2019_MDG_JECAM-CIRAD_POLY_111 group size is: 8.9 MB
After adding dataset: 23_2017_BRA_JECAM-CIRAD_POLY_111 group size is: 9.1 MB


 52%|█████▏    | 69/132 [00:04<00:04, 15.27it/s]

After adding dataset: 24_2021_MOZ_WFP-field-survey_POLY_111 group size is: 10.1 MB
After adding dataset: 25_2017_ZAF_JECAM-CIRAD_POLY_111 group size is: 10.2 MB
After adding dataset: 30_2018_IND_CGIAR-GARDIAN_POINT_110 group size is: 10.5 MB


 54%|█████▍    | 71/132 [00:04<00:05, 10.45it/s]

After adding dataset: 31_2018_GLO_EWOCO_POINT_100 group size is: 10.5 MB
After adding dataset: 31_2019_EGY_FAO-WAPOR-1_POLY_111 group size is: 10.6 MB
After adding dataset: 31_2019_EGY_FAO-WAPOR-2_POLY_111 group size is: 10.7 MB


 55%|█████▌    | 73/132 [00:04<00:06,  8.67it/s]

After adding dataset: 31_2019_GLO_EWOCO_POINT_100 group size is: 10.7 MB


 57%|█████▋    | 75/132 [00:05<00:07,  8.05it/s]

After adding dataset: 31_2020_GLO_EWOCO_POINT_100 group size is: 10.7 MB
After adding dataset: 32_2019_IRQ_WFP-field-survey_POLY_111 group size is: 10.8 MB
After adding dataset: 32_2019_MAR_FAO-WAPOR_POLY_111 group size is: 10.8 MB


 61%|██████    | 80/132 [00:05<00:04, 11.17it/s]

After adding dataset: 33_2019_ARG_BAGE-01_POLY_110 group size is: 11.6 MB
After adding dataset: 34_2017_LBN_FAO-WAPOR-1_POLY_111 group size is: 11.7 MB
After adding dataset: 34_2017_LBN_FAO-WAPOR-2_POLY_111 group size is: 11.7 MB
After adding dataset: 34_2017_LBN_FAO-WAPOR-3_POLY_111 group size is: 11.7 MB
After adding dataset: 34_2017_LBN_FAO-WAPOR-4_POLY_111 group size is: 11.7 MB


 64%|██████▎   | 84/132 [00:05<00:03, 13.49it/s]

After adding dataset: 34_2018_ARG_BAGE-01_POLY_110 group size is: 12.9 MB
After adding dataset: 34_2020_ARG_BAGE-01_POLY_110 group size is: 13.1 MB
After adding dataset: 36_2019_ARG_INTA-BA_POLY_110 group size is: 13.2 MB
After adding dataset: 37_2017_ARG_LISTA-field-data_POLY_110 group size is: 13.2 MB


 65%|██████▌   | 86/132 [00:05<00:03, 14.67it/s]

After adding dataset: 39_2018_AS_CAWA-project_POLY_111 group size is: 14.0 MB
After adding dataset: 40_2019_ESP_ESYRCE_POLY_111 group size is: 15.4 MB


 67%|██████▋   | 88/132 [00:13<00:52,  1.18s/it]

After adding dataset: 40_2019_USA_USDA2019cdls_POINT_110 group size is: 119.6 MB
After adding dataset: 40_2020_ESP_ESYRCE_POLY_111 group size is: 149.9 MB


 68%|██████▊   | 90/132 [00:17<00:58,  1.39s/it]

After adding dataset: 40_2021_ESP_ESYRCE_POLY_111 group size is: 180.9 MB
After adding dataset: 41_2017_AS_CAWA-project_POLY_111 group size is: 181.1 MB


 70%|██████▉   | 92/132 [00:41<02:58,  4.47s/it]

After adding dataset: 42_2019_ESP_SIGPAC-Catalunya_POLY_111 group size is: 306.7 MB


 70%|███████   | 93/132 [01:11<05:49,  8.95s/it]

After adding dataset: 43_2020_ESP_Eurocrops-Navarre_POLY_110 group size is: 308.4 MB


 71%|███████   | 94/132 [01:12<04:38,  7.32s/it]

After adding dataset: 45_2017_CAN_AAFC-Crop-Inventory_POINT_110 group size is: 333.9 MB


 72%|███████▏  | 95/132 [01:12<03:34,  5.79s/it]

After adding dataset: 45_2018_CAN_AAFC-Crop-Inventory_POINT_110 group size is: 356.6 MB


 73%|███████▎  | 96/132 [01:13<02:47,  4.65s/it]

After adding dataset: 45_2019_CAN_AAFC-Crop-Inventory_POINT_110 group size is: 377.2 MB


 73%|███████▎  | 97/132 [01:13<02:03,  3.53s/it]

After adding dataset: 45_2021_CAN_AAFC-Crop-Inventory_POINT_110 group size is: 388.7 MB


 74%|███████▍  | 98/132 [01:14<01:30,  2.66s/it]

After adding dataset: 46_2020_CAN_AAFC-Crop-Inventory_POINT_110 group size is: 399.4 MB
After adding dataset: 46_2020_SVN_LPIS_POLY_110 group size is: 507.4 MB
Writing group with size:  507.3810709999999 MB to disk with  99 datasets


 76%|███████▌  | 100/132 [03:07<16:32, 31.01s/it]

After adding dataset: 48_2017_AUT_LPIS_POLY_110 group size is: 324.4 MB
After adding dataset: 48_2018_AUT_LPIS_POLY_110 group size is: 646.6 MB
Writing group with size:  646.614464 MB to disk with  2 datasets


 77%|███████▋  | 102/132 [04:10<14:21, 28.72s/it]

After adding dataset: 48_2018_EU_LUCAS_POINT_110 group size is: 6.2 MB


 78%|███████▊  | 103/132 [04:47<15:00, 31.07s/it]

After adding dataset: 48_2019_AUT_LPIS_POLY_110 group size is: 315.8 MB
After adding dataset: 48_2020_AUT_LPIS_POLY_110 group size is: 622.7 MB
Writing group with size:  622.706756 MB to disk with  3 datasets


 80%|████████  | 106/132 [05:33<07:39, 17.67s/it]

After adding dataset: 49_2018_UKR_NHI-01_POINT_110 group size is: 1.3 MB
After adding dataset: 49_2019_UKR_NHI-01_POINT_110 group size is: 2.5 MB


 81%|████████  | 107/132 [05:44<06:26, 15.45s/it]

After adding dataset: 49_2021_SVK_Eurocrops_POLY_110 group size is: 43.8 MB


 82%|████████▏ | 108/132 [05:45<04:26, 11.12s/it]

After adding dataset: 50_2018_LUX_LPIS_POLY_110 group size is: 52.3 MB


 83%|████████▎ | 110/132 [05:46<02:05,  5.68s/it]

After adding dataset: 50_2019_LUX_LPIS_POLY_110 group size is: 60.6 MB
After adding dataset: 50_2019_UKR_JECAM-1_POLY_110 group size is: 60.8 MB
After adding dataset: 50_2019_UKR_JECAM-2_POLY_100 group size is: 60.8 MB


 85%|████████▍ | 112/132 [05:47<01:07,  3.37s/it]

After adding dataset: 50_2020_LUX_LPIS_POLY_110 group size is: 68.8 MB


 86%|████████▌ | 113/132 [05:48<00:52,  2.77s/it]

After adding dataset: 50_2021_LUX_LPIS_POLY_110 group size is: 76.6 MB


 86%|████████▋ | 114/132 [05:54<01:07,  3.73s/it]

After adding dataset: 51_2017_BEL_LPIS-Flanders_POLY_110 group size is: 161.1 MB


 87%|████████▋ | 115/132 [06:01<01:15,  4.46s/it]

After adding dataset: 51_2018_BEL_LPIS-Flanders_POLY_110 group size is: 246.0 MB


 88%|████████▊ | 116/132 [06:07<01:19,  4.94s/it]

After adding dataset: 51_2019_BEL_LPIS-Flanders_POLY_110 group size is: 331.6 MB


 89%|████████▊ | 117/132 [06:13<01:17,  5.15s/it]

After adding dataset: 51_2020_BEL_LPIS-Flanders_POLY_110 group size is: 418.6 MB
After adding dataset: 51_2021_BEL_LPIS-Flanders_POLY_110 group size is: 504.0 MB
Writing group with size:  504.039996 MB to disk with  14 datasets


 90%|█████████ | 119/132 [06:48<02:18, 10.66s/it]

After adding dataset: 52_2018_NLD_LPIS_POLY_110 group size is: 74.6 MB


 91%|█████████ | 120/132 [06:58<02:04, 10.34s/it]

After adding dataset: 52_2019_NLD_LPIS_POLY_110 group size is: 149.7 MB


 92%|█████████▏| 121/132 [07:08<01:52, 10.23s/it]

After adding dataset: 52_2020_NLD_LPIS_POLY_110 group size is: 225.8 MB


 92%|█████████▏| 122/132 [07:18<01:41, 10.13s/it]

After adding dataset: 52_2021_DEU_Eurocrops-NRW_POLY_110 group size is: 351.0 MB


 93%|█████████▎| 123/132 [07:27<01:28,  9.89s/it]

After adding dataset: 52_2021_NLD_LPIS_POLY_110 group size is: 426.5 MB
After adding dataset: 53_2021_DEU_Eurocrops-LS_POLY_110 group size is: 585.5 MB
Writing group with size:  585.496101 MB to disk with  6 datasets


 95%|█████████▍| 125/132 [08:28<02:22, 20.30s/it]

After adding dataset: 55_2021_LTU_Eurocrops_POLY_110 group size is: 177.9 MB


 95%|█████████▌| 126/132 [08:38<01:41, 16.99s/it]

After adding dataset: 56_2019_DNK_Eurocrops_POLY_110 group size is: 288.9 MB


 96%|█████████▌| 127/132 [08:53<01:22, 16.55s/it]

After adding dataset: 57_2019_LVA_LPIS_POLY_110 group size is: 364.4 MB


 97%|█████████▋| 128/132 [09:07<01:03, 15.82s/it]

After adding dataset: 57_2021_LVA_LPIS_POLY_110 group size is: 432.2 MB
After adding dataset: 58_2021_SWE_Eurocrops_POLY_110 group size is: 550.5 MB
Writing group with size:  550.504233 MB to disk with  5 datasets


 98%|█████████▊| 130/132 [10:40<00:58, 29.35s/it]

After adding dataset: 59_2021_EST_Eurocrops_POLY_110 group size is: 30.7 MB


 99%|█████████▉| 131/132 [11:09<00:29, 29.07s/it]

After adding dataset: 62_2020_FIN_LPIS_POLY_110 group size is: 197.6 MB
After adding dataset: 62_2021_FIN_LPIS_POLY_110 group size is: 362.7 MB
Writing group with size:  362.720546 MB to disk with  3 datasets


100%|██████████| 132/132 [12:09<00:00,  5.52s/it]


In [2]:
group = pd.concat(group, axis=0)
group.to_parquet(
    target_folder / f"grouped.geoparquet"
)

In [3]:
group

Unnamed: 0,sample_id,ref_id,ewoc_code,valid_time,extract,h3_l3_cell,geometry
3,2021_EST_Eurocrops_POLY_110_19996106,2021_EST_Eurocrops_POLY_110,1101010011,2021-05-02,0,831136fffffffff,"POLYGON ((27.42837 58.11975, 27.42839 58.11972..."
4,2021_EST_Eurocrops_POLY_110_19990620,2021_EST_Eurocrops_POLY_110,1111020030,2021-07-05,0,831136fffffffff,"POLYGON ((26.66816 57.82049, 26.66815 57.82050..."
7,2021_EST_Eurocrops_POLY_110_19990637,2021_EST_Eurocrops_POLY_110,1111020030,2021-07-05,0,831136fffffffff,"POLYGON ((26.30346 58.16094, 26.30393 58.16091..."
8,2021_EST_Eurocrops_POLY_110_19990646,2021_EST_Eurocrops_POLY_110,1111020030,2021-07-05,0,831136fffffffff,"POLYGON ((26.27513 58.16598, 26.27507 58.16599..."
9,2021_EST_Eurocrops_POLY_110_19990599,2021_EST_Eurocrops_POLY_110,1111020030,2021-07-05,0,831136fffffffff,"POLYGON ((26.50085 58.16810, 26.50086 58.16809..."
...,...,...,...,...,...,...,...
1078439,2021_FIN_LPIS_POLY_110_1086350,2021_FIN_LPIS_POLY_110,1101020003,2021-06-01,0,831122fffffffff,"POLYGON ((28.05545 60.75891, 28.05598 60.75839..."
1078442,2021_FIN_LPIS_POLY_110_1086353,2021_FIN_LPIS_POLY_110,1101020003,2021-06-01,0,831122fffffffff,"POLYGON ((27.97927 60.84420, 27.97937 60.84399..."
1078449,2021_FIN_LPIS_POLY_110_1086360,2021_FIN_LPIS_POLY_110,1111010000,2021-06-01,0,831122fffffffff,"POLYGON ((28.04229 60.85389, 28.04227 60.85386..."
1078452,2021_FIN_LPIS_POLY_110_1086363,2021_FIN_LPIS_POLY_110,1101040000,2021-06-01,0,831122fffffffff,"POLYGON ((27.94013 60.76693, 27.94053 60.76623..."


In [1]:
from tqdm import tqdm
import geopandas as gpd
import pandas as pd
from pathlib import Path

dataset_folder = Path("/vitodata/worldcereal_data/EXTRACTIONS/all_datasets/grouped_datasets/")

datasets = []
for dataset in tqdm(list(dataset_folder.iterdir())):
    print(dataset.stem)
    gdf = gpd.read_parquet(dataset)
    print('dataset read')
    gdf = gdf[gdf.extract == 1]
    datasets.append(gdf)
    del gdf

datasets = pd.concat(datasets, axis=0)
datasets.to_parquet('/data/users/Public/couchard/wc_phase2_samplings.parquet')

  0%|          | 0/7 [00:00<?, ?it/s]

group_0
dataset read


 14%|█▍        | 1/7 [00:10<01:04, 10.77s/it]

group_1
dataset read


 29%|██▊       | 2/7 [00:27<01:12, 14.56s/it]

group_2
dataset read


 43%|████▎     | 3/7 [00:42<00:57, 14.45s/it]

group_3
dataset read


 57%|█████▋    | 4/7 [00:58<00:45, 15.09s/it]

group_4
dataset read


 71%|███████▏  | 5/7 [01:16<00:32, 16.31s/it]

group_5
dataset read


 86%|████████▌ | 6/7 [01:45<00:20, 20.40s/it]

group_6
dataset read


100%|██████████| 7/7 [02:02<00:00, 17.46s/it]
