# Download monthly GPCP data
Download page: [link](https://downloads.psl.noaa.gov/Datasets/gpcp/)

Description page: [link](https://psl.noaa.gov/data/gridded/data.gpcp.html)

GPCP monthly data, preprocessed into filesystem standards (variable name changed to "pr", longitudes to -180:180, filename to CMIP-like standards)

In [1]:
import os
import re
import tarfile
import tempfile
import requests
import xarray as xr
import xagg as xa
from datetime import datetime
from tqdm import tqdm
from funcs_support import get_params, get_filepaths, utility_save
dir_list = get_params()

df = get_filepaths()



In [2]:
# ------------------------------------------------------------------------------
# Configuration
# ------------------------------------------------------------------------------
source_url = 'https://downloads.psl.noaa.gov/Datasets/gpcp/precip.mon.mean.nc'
output_dir = dir_list['raw']+'GPCP/'

if len(df.query('model == "GPCP" and freq == "Amon"')) == 0:
    os.makedirs(output_dir, exist_ok=True)
    
    # ------------------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------------------
    
    def download_file(url, dest):
        """Download URL to dest path."""
        if not os.path.exists(dest):
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                with open(dest, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
        return dest
    
    # ------------------------------------------------------------------------------
    # Download netcdfs for all years
    # ------------------------------------------------------------------------------
    fn = download_file(source_url,output_dir+'precip.mon.mean.nc')
    
    # ------------------------------------------------------------------------------
    # Open, save as single file
    # ------------------------------------------------------------------------------
    ds = xr.open_dataset(fn)
    ds = xa.fix_ds(ds)
    ds = ds.rename({'precip':'pr'})
    ds.attrs['DESCRIPTION'] = 'GPCP, preprocesed into filesystem (CMIP-like) conventions'
    ds.attrs['SOURCE'] = 'preprocess_GPCP.ipynb'

    timestr = (re.sub(r'\-','',str(ds.time.min().values)[0:8])+'0101-'+
               re.sub(r'\-','',str(ds.time.max().values)[0:8])+
               str(ds.time.max().dt.daysinmonth.values))

    output_fn = output_dir+'pr_Amon_GPCP_historical_obs_'+timestr+'.nc'
    
    utility_save(ds,output_fn)
    
    # ------------------------------------------------------------------------------
    # Remove temp file
    # ------------------------------------------------------------------------------
    os.system('rm -rf '+fn)
else:
    print('GPCP monthly data exists!')

/dx06/data/climate_raw/GPCP/pr_Amon_GPCP_historical_obs_1979010101-20250930.nc saved!
