In [1]:
# %pip install netcdf4 pydap

In [4]:
import pandas as pd
from pathlib import Path

arquivo = Path("/content/Tuinenberg-etal_2020.tab")

# descobrir onde o meta-header termina
with arquivo.open(encoding="utf-8") as f:
    for i, linha in enumerate(f):
        if linha.strip() == "*/":          # última linha do header
            skip = i + 1                  # primeira linha de dados
            break

df = pd.read_csv(arquivo,
                 sep="\t",                # delimitador
                 skiprows=skip,           # pula o header
                 comment="#",             # ignora possíveis linhas iniciadas com #
                 na_values=["nan", "NaN"])  # trata ausentes

df


Unnamed: 0,Content,File name,File format,File size [kByte],URL file
0,"January, 0.5° spatial resolution",utrack_climatology_0.5_01,netCDF,10297880.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
1,"February, 0.5° spatial resolution",utrack_climatology_0.5_02,netCDF,10277300.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
2,"March, 0.5° spatial resolution",utrack_climatology_0.5_03,netCDF,9748976.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
3,"April, 0.5° spatial resolution",utrack_climatology_0.5_04,netCDF,9553866.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
4,"May, 0.5° spatial resolution",utrack_climatology_0.5_05,netCDF,9616981.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
5,"June, 0.5° spatial resolution",utrack_climatology_0.5_06,netCDF,9488671.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
6,"July, 0.5° spatial resolution",utrack_climatology_0.5_07,netCDF,9669828.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
7,"August, 0.5° spatial resolution",utrack_climatology_0.5_08,netCDF,9541763.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
8,"September, 0.5° spatial resolution",utrack_climatology_0.5_09,netCDF,9436493.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...
9,"October, 0.5° spatial resolution",utrack_climatology_0.5_10,netCDF,9531932.0,https://hs.pangaea.de/Maps/Tuinenburg-etal_202...


In [None]:
import requests, os
from tqdm import tqdm     # só para barra de progresso

pasta = "utrack_nc"       # onde salvar
os.makedirs(pasta, exist_ok=True)

for url, nome in zip(df["URL file"], df["File name"]):
    destino = os.path.join(pasta, f"{nome}.nc")
    if os.path.exists(destino):
        print(f"😎 {destino} já existe, pulando.")
        continue
    print(f"⬇️  Baixando {destino}...")
    r = requests.get(url, stream=True, timeout=60)
    r.raise_for_status()
    total = int(r.headers.get("content-length", 0))
    with open(destino, "wb") as f, tqdm(
        total=total, unit="B", unit_scale=True, desc=nome
    ) as pbar:
        for chunk in r.iter_content(chunk_size=1024 * 1024):
            f.write(chunk)
            pbar.update(len(chunk))


⬇️  Baixando utrack_nc/utrack_climatology_0.5_01.nc...


utrack_climatology_0.5_01: 100%|██████████| 10.5G/10.5G [06:36<00:00, 26.6MB/s]


⬇️  Baixando utrack_nc/utrack_climatology_0.5_02.nc...


utrack_climatology_0.5_02: 100%|██████████| 10.5G/10.5G [06:40<00:00, 26.2MB/s]


⬇️  Baixando utrack_nc/utrack_climatology_0.5_03.nc...


utrack_climatology_0.5_03: 100%|██████████| 9.98G/9.98G [05:45<00:00, 28.9MB/s]


⬇️  Baixando utrack_nc/utrack_climatology_0.5_04.nc...


utrack_climatology_0.5_04:   6%|▌         | 587M/9.78G [00:21<05:41, 26.9MB/s]