In [1]:
from pathlib import Path  #Object-oriented filesystem paths
import pandas as pd       #Python Data Analysis Library
import wget               #Package to retrieve content from web servers
import time               #Time access and conversions module 

#### Example 1: One year data per file
This code is part of 'ESM2.3 CMIP6 data download using python for a single-model'<br>
https://youtu.be/jf1-AXhFSkU<br>

##### Learning objectives:<br> 
* Data source (i.e., ESGF, CEDA)
* Make directory if does not exist
* for loop
* passing variables to string
* download file with url address using wget

In [2]:
# #Download from
node='http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/'
mdir='CMIP6/HighResMIP/EC-Earth-Consortium/EC-Earth3P/hist-1950/r2i1p2f1/Omon/zos/gn/v20190812/'

#Download to
path=r'\\wsl$/Ubuntu/home/aelshall/NCO/Gmap/zos/EC-Earth3P/hist-1950/'
Path(path).mkdir(parents=True, exist_ok=True)


#Loop for all files
for year in range(1990,1991):
    file='zos_Omon_EC-Earth3P_hist-1950_r2i1p2f1_gn_{}01-{}12.nc'.format(str(year),str(year))

    #path to download from and to
    dwf=node+mdir+file
    dwt=path+file
    print(dwf); print(dwt)
    
    #start timer 
    start = time.time()
    
    #Download file 
    wget.download(dwf,dwt)
    
    #stop timer 
    print('\n'+'Download time: {:.1f} sec'.format(time.time() - start))

http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/HighResMIP/EC-Earth-Consortium/EC-Earth3P/hist-1950/r2i1p2f1/Omon/zos/gn/v20190812/zos_Omon_EC-Earth3P_hist-1950_r2i1p2f1_gn_199001-199012.nc
\\wsl$/Ubuntu/home/aelshall/NCO/Gmap/zos/EC-Earth3P/hist-1950/zos_Omon_EC-Earth3P_hist-1950_r2i1p2f1_gn_199001-199012.nc
100% [..........................................................................] 3685062 / 3685062
Download time: 5.9 sec


### Example 2: Dynamic time range
This code is part of 'ESM2.3 CMIP6 data download using python for a single-model'<br>
https://youtu.be/jf1-AXhFSkU<br>

##### Learning objectives:<br> 
* iterate over two variables simultaneously (or their combination) using zip
* for loop with step 
* if condition 
* if condition for the last iteration 

In [3]:
#Download from
serv='http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/'
mdir='CMIP6/HighResMIP/NERC/HadGEM3-GC31-HH/hist-1950/r1i1p1f1/Omon/zos/gn/v20200514/'

#Download to
path=r'\\wsl$/Ubuntu/home/aelshall/NCO/Gmap/zos/HadGEM3-GC31-HH/hist-1950/'
#Path(path).mkdir(parents=True, exist_ok=True)

#file
year_step=1    #e.g., 1,5,10,25,65 years
month_flag=2
if month_flag==1:
    MM1=['01']
    MM2=['12']
elif month_flag==2:    
    MM1=['01','04','07','10']
    MM2=['03','06','09','12'] 


    
for yyyy in range(1950,1952,year_step):
    for mm1, mm2 in zip(MM1,MM2):
        yyyy2=yyyy+year_step-1
        if yyyy2>2014:
            yyyy2=2014
       #file='zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_199001-199003.nc'
        file='zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_{}{}-{}{}.nc'.format(str(yyyy),mm1,str(yyyy2),mm2)
        print(file)

zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195001-195003.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195004-195006.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195007-195009.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195010-195012.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195101-195103.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195104-195106.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195107-195109.nc
zos_Omon_HadGEM3-GC31-HH_hist-1950_r1i1p1f1_gn_195110-195112.nc


### Example 3: Single-model ensemble 
This code is part of 'ESM2.4 CMIP6 data download using python for a single-model ensemble'<br>
https://youtu.be/QS_RwPOz6Jg<br>

##### Learning objectives:<br> 
* functions

In [5]:
def download(institution_ID,source_ID,experiment_ID,member_IDs,versions,period,year_step,month_flag,disp_flag):
    
    for member_ID, version in zip(member_IDs,versions):
        #Download from
        node='http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/'
        mdir='CMIP6/HighResMIP/{}/{}/{}/{}/Omon/zos/gn/{}/'.format(institution_ID,source_ID,experiment_ID,member_ID,version)

        #Download to
        path=r'\\wsl$/Ubuntu/home/aelshall/NCO/Gmap/zos/{}/{}/'.format(source_ID,experiment_ID)
        Path(path).mkdir(parents=True, exist_ok=True)    
        
        #Monthly steps        
        if month_flag==1:
            MM1=['01']
            MM2=['12']
        elif month_flag==2:    
            MM1=['01','04','07','10']
            MM2=['03','06','09','12'] 

        #File download 
        for yyyy1 in range(period[0],period[1],year_step):
            for mm1, mm2 in zip(MM1,MM2):
                
                #Correct for last year of historical simulations  
                yyyy2=yyyy1+year_step-1
                if yyyy2>2014:
                    yyyy2=2014

                #Create file name
                file='zos_Omon_{}_{}_{}_gn_{}{}-{}{}.nc'.format(source_ID,experiment_ID,member_ID,str(yyyy1),mm1,str(yyyy2),mm2)
                if disp_flag>0:
                    print(file)

                ##download file
                dwf=node+mdir+file
                if disp_flag>1:
                    print(dwf)
                dwt=path+file
                if disp_flag>2:
                    print(dwt)
                #wget.download(dwf,dwt)

In [7]:
#ECMWF-IFS-HR_hist-1950
period=[1950,1952]     

institution_ID='ECMWF'
source_ID='ECMWF-IFS-HR'
experiment_ID='hist-1950'
member_IDs=['r1i1p1f1','r2i1p1f1','r3i1p1f1']
versions=['v20170101','v20180101','v20190101']
year_step=1
month_flag=1       
disp_flag=2
download(institution_ID,source_ID,experiment_ID,member_IDs,versions,period,year_step,month_flag,disp_flag)

zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195001-195012.nc
http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r1i1p1f1/Omon/zos/gn/v20170101/zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195001-195012.nc
zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195101-195112.nc
http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r1i1p1f1/Omon/zos/gn/v20170101/zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195101-195112.nc
zos_Omon_ECMWF-IFS-HR_hist-1950_r2i1p1f1_gn_195001-195012.nc
http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r2i1p1f1/Omon/zos/gn/v20180101/zos_Omon_ECMWF-IFS-HR_hist-1950_r2i1p1f1_gn_195001-195012.nc
zos_Omon_ECMWF-IFS-HR_hist-1950_r2i1p1f1_gn_195101-195112.nc
http://esgf-data3.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r2i1p1f1/Omon/zos/gn/v20180101/zos_Omon_ECMWF-IFS-HR_hist-1950_r

### Example 4: Multi-model ensemble 
This code is part of 'ESM2.5 CMIP6 data download using python for multi-model ensemble'<br>
https://youtu.be/xpFkw5ba_Ew<br>

##### Learning objectives:<br> 
* read csv_file into pandas dataFrame
* Selecting data by label/position 
* Multiple if conditions 

In [8]:
def download(SI,period,disp_flag,dw_flag,df):
    
    #Simulation information 
    activity=df.loc[SI, 'activity']
    institution_ID=df.loc[SI, 'institution_ID']
    source_ID=df.loc[SI, 'source_ID']
    experiment_ID=df.loc[SI, 'experiment_ID']
    member_ID=df.loc[SI, 'member_ID']
    grid_label=df.loc[SI, 'grid_label']
    version=df.loc[SI, 'version']
    serv=df.loc[SI, 'url']
    start_count=df.loc[SI, 'start_count']
    year_step=df.loc[SI, 'year_step']
    num_files=df.loc[SI, 'number_of_files']
    size_files=df.loc[SI, 'size']
    
    #Simulation information
    print('Downloading data of {}_{}_{} that has {} files of size {:.0f} MB'.\
                  format(source_ID,experiment_ID,member_ID,num_files,size_files/1e6))
        
    #Download from
    mdir='CMIP6/{}/{}/{}/{}/{}/Omon/zos/{}/{}/'.format(activity,institution_ID,source_ID,experiment_ID,member_ID,grid_label,version)

    #Download to
    path=r'\\wsl$/Ubuntu/home/aelshall/NCO/Gmap/zos/{}/{}/'.format(source_ID,experiment_ID)
    #Path(path).mkdir(parents=True, exist_ok=True)    
    
    #Monthly steps        
    if source_ID=='HadGEM3-GC31-HH':
        MM1=['01','04','07','10']
        MM2=['03','06','09','12'] 
    else:    
        MM1=['01']
        MM2=['12']

        
    #File download 
    for yyyy1 in range(period[0],period[1],year_step):
        for mm1, mm2 in zip(MM1,MM2):
            
  
            #Account for model group counting from one instead of zero
            yyyy1=yyyy1+start_count
            
            #Correct for last year of historical simulations  
            yyyy2=yyyy1+year_step-1
            if yyyy2>2014:
                yyyy2=2014
            
            #Correct for single file
            if year_step==65:
                yyyy1=1950
                yyyy2=2014
            elif year_step==165:
                yyyy1=1850
                yyyy2=2014
            
            ##Special Case(1): AWI hist-1950 ends at 201012
            if (institution_ID=='AWI' and experiment_ID=='hist-1950' and yyyy2>2010):
                print('AWI-CM-1-1-HR_hist-1950 ends at 201012')
                return

            ##Special Case(2): AWI historical has extra file for year 185001-185012 (hist-1950 ignored this year!)
            if (institution_ID=='AWI' and experiment_ID=='historical' and yyyy1==1851): 
                file='zos_Omon_{}_{}_{}_gn_{}{}-{}{}.nc'.format(source_ID,experiment_ID,member_ID,str(1850),mm1,str(1850),mm2)
                dwf=serv+mdir+file
                print(dwf)
                if dw_flag ==1:
                    wget.download(dwf,dwt)
            
            #Create file name
            file='zos_Omon_{}_{}_{}_gn_{}{}-{}{}.nc'.format(source_ID,experiment_ID,member_ID,str(yyyy1),mm1,str(yyyy2),mm2)
            if disp_flag>0:
                size_file=size_files/num_files/1e6
                print('{} ({:.0f} MB)'.format(file,size_file))
 

            ##Special Case(3): HadGEM3-GC31-MM_historical has irregular date range starting from 1970
            if (source_ID=='HadGEM3-GC31-MM' and experiment_ID=='historical' and yyyy1>1930):
                print('HadGEM3-GC31-MM_historical has irregular date range starting from 1970')
                #zos_Omon_HadGEM3-GC31-MM_historical_r1i1p1f3_gn_195001-196912.nc
                #zos_Omon_HadGEM3-GC31-MM_historical_r1i1p1f3_gn_197001-198712.nc
                #zos_Omon_HadGEM3-GC31-MM_historical_r1i1p1f3_gn_198801-198912.nc
                #zos_Omon_HadGEM3-GC31-MM_historical_r1i1p1f3_gn_199001-200912.nc
                #zos_Omon_HadGEM3-GC31-MM_historical_r1i1p1f3_gn_201001-201412.nc
                dates=['197001-198712', '198801-198912','199001-200912','201001-201412']
                for date in dates:
                    file='zos_Omon_{}_{}_{}_gn_{}.nc'.format(source_ID,experiment_ID,member_ID,date)
                    if disp_flag>0:
                        size_file=size_files/num_files/1e6
                        print('{} ({:.0f} MB)'.format(file,size_file))
                    dwf=serv+mdir+file
                    if disp_flag>1:
                        print(dwf)
                    
                    if dw_flag ==1:
                        wget.download(dwf,dwt)
                return     
            
                            
            ##download file
            dwf=serv+mdir+file
            if disp_flag>1:
                print(dwf)
            dwt=path+file
            if disp_flag>2:
                print(dwt)
            if dw_flag ==1:
                wget.download(dwf,dwt) 
            

In [14]:
df=pd.read_csv('zos_data.csv')
period=[1950, 1952]  #Download data from to 
disp_flag=1          #[0] no display, [1] simple display, [2] full display
dw_flag=0            #[0] print file name and do not download file, [1] download file 

#SI range for different models is the "sim_idx" column in zos_data.csv
for SI in range(27,30):
    download(SI,period,disp_flag,dw_flag,df)

Downloading data of EC-Earth3P-HR_hist-1950_r2i1p2f1 that has 65 files of size 3605 MB
zos_Omon_EC-Earth3P-HR_hist-1950_r2i1p2f1_gn_195001-195012.nc (55 MB)
zos_Omon_EC-Earth3P-HR_hist-1950_r2i1p2f1_gn_195101-195112.nc (55 MB)
Downloading data of EC-Earth3P-HR_hist-1950_r3i1p2f1 that has 65 files of size 3574 MB
zos_Omon_EC-Earth3P-HR_hist-1950_r3i1p2f1_gn_195001-195012.nc (55 MB)
zos_Omon_EC-Earth3P-HR_hist-1950_r3i1p2f1_gn_195101-195112.nc (55 MB)
Downloading data of ECMWF-IFS-HR_hist-1950_r1i1p1f1 that has 65 files of size 3439 MB
zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195001-195012.nc (53 MB)
zos_Omon_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gn_195101-195112.nc (53 MB)
