In [18]:
import xarray as xr
import pandas as pd
import numpy as np

import cartopy
import cartopy.crs as ccrs
import matplotlib.pyplot as plt

import glob
import sys
import os
import datetime
import calendar
import rasterio
import pickle


In [2]:
chirtsVar = 'Tmax'

dirChirts = '/dartfs-hpc/rc/lab/C/CMIG/CHIRTS'
urlChirts = 'http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/%s'%chirtsVar

In [3]:
years = range(2010,2017)
months = range(1,13)

In [4]:
for year in years:
    for month in months:
        for day in range(1, calendar.monthrange(year, month)[1]+1):
            
            filename = '%s.%d.%02d.%02d.tif'%(chirtsVar, year, month, day)
            remote_filepath = '%s/%d/%s'%(urlChirts, year, filename)
            local_dir = '%s/%s/%d/'%(dirChirts, chirtsVar, year)
            local_filepath = '%s/%s/%d/%s'%(dirChirts, chirtsVar, year, filename)
            
            if not os.path.isfile(local_filepath):
                cmd = 'wget -P %s %s'%(local_dir, remote_filepath)
                print(cmd)
                os.system(cmd)
            

wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.01.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.02.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.03.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.04.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.05.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_tifs_p05/Tmax/2010/Tmax.2010.01.06.tif
wget -P /dartfs-hpc/rc/lab/C/CMIG/CHIRTS/Tmax/2010/ http://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/glo

In [None]:
# to monthly netcdf

chirts_lat = np.linspace(70, -60, 2600)
chirts_lon = np.linspace(0, 360, 7200)

for year in years:
    
    local_dir = '%s/%s/%d/'%(dirChirts, chirtsVar, year)
    output_dir = '%s/%s/netcdf/'%(dirChirts, chirtsVar)
    
    for month in months:
        
        output_netcdf_file = '%s/tmax_%d_%02d.nc'%(output_dir, year, month)
        if os.path.isfile(output_netcdf_file):
            continue
        
        cur_month_len = calendar.monthrange(year, month)[1]
        
        tmax_cur_month = np.full([chirts_lat.shape[0], chirts_lon.shape[0], cur_month_len], np.nan)
        
        error = False
        
        print('loading %d/%d...'%(year, month))
        for d, day in enumerate(range(1, cur_month_len+1)):
            
            filename = '%s.%d.%02d.%02d.tif'%(chirtsVar, year, month, day)
            remote_filepath = '%s/%d/%s'%(urlChirts, year, filename)
            local_filepath = '%s/%s/%d/%s'%(dirChirts, chirtsVar, year, filename)
            
            if not os.path.isfile(local_filepath):
                print('ERROR: skipping %d/%d, data not complete'%(year, month))
                error = True
                break
            
            tmax_tif = rasterio.open(local_filepath)
            tmax_tif_data = tmax_tif.read(1)
            tmax_tif_data[tmax_tif_data<-1000] = np.nan
            tmax_tif_data = np.roll(tmax_tif_data, -int(tmax_tif_data.shape[1]/2), axis=1)
            
            tmax_cur_month[:, :, d] = tmax_tif_data
        
        if not error:
            tmax_cur_month_ds = xr.Dataset(
            {
                "tmax": (["lat", "lon", "time"], tmax_cur_month),
            },
            coords={
                "lat":chirts_lat,
                "lon":chirts_lon,
                "time": pd.date_range(start="%d-%02d-01"%(year, month), periods=cur_month_len)
            },)

            tmax_cur_month_ds.attrs["units"] = "degC"
            tmax_cur_month_ds.attrs["name"] = "CHIRTS-TMAX"

            print('writing netcdf for %d/%d...'%(year, month))
            tmax_cur_month_ds.to_netcdf(output_netcdf_file)
        
            
            
            

loading 2010/2...
writing netcdf for 2010/2...
loading 2010/3...
writing netcdf for 2010/3...
loading 2010/4...
writing netcdf for 2010/4...
loading 2010/5...
writing netcdf for 2010/5...
loading 2010/6...
writing netcdf for 2010/6...
loading 2010/7...
writing netcdf for 2010/7...
loading 2010/8...
writing netcdf for 2010/8...
loading 2010/9...
writing netcdf for 2010/9...
loading 2010/10...
writing netcdf for 2010/10...
loading 2010/11...
writing netcdf for 2010/11...
loading 2010/12...
writing netcdf for 2010/12...
loading 2011/1...
writing netcdf for 2011/1...
loading 2011/2...
writing netcdf for 2011/2...
loading 2011/3...
writing netcdf for 2011/3...
loading 2011/4...
writing netcdf for 2011/4...
loading 2011/5...
writing netcdf for 2011/5...
loading 2011/6...
writing netcdf for 2011/6...
loading 2011/7...
writing netcdf for 2011/7...
loading 2011/8...
writing netcdf for 2011/8...
loading 2011/9...
writing netcdf for 2011/9...
loading 2011/10...
writing netcdf for 2011/10...
loadi