### CMIP6  "global_average_thermosteric_sea_level_change (zostoga)" data download using Python

* search the required data at CMIP6 website: https://esgf-node.llnl.gov/search/cmip6/
* get the files information from search result in JSON file and convert it to excel
* format converted excel file like data.csv

In [1]:
import numpy as np
import pandas as pd
import os
import wget
import time

In [2]:
file_info=pd.read_csv('data.csv')

In [3]:
file_info

Unnamed: 0,MIP_Era,Activity,Institution_ID,Source_ID,Experiment_ID,Member_ID,Table_ID,Variable,Grid_Label,Version,size,datetime_start,datetime_stop
0,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r13i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
1,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r11i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
2,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r24i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
3,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r14i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
4,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r16i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
5,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r25i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
6,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r12i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
7,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r18i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
8,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r19i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z
9,CMIP6,ScenarioMIP,CCCma,CanESM5,ssp245,r22i1p2f1,Omon,zostoga,gn,v20190429,207798,2015-01-16T12:00:00Z,2100-12-16T12:00:00Z


In [4]:
# generate date_range id from start & stop datetime columns
def date_id(idx,df,col):
    year=df.loc[idx,col].split('-')[0]
    month=df.loc[idx,col].split('-')[1]
    date=year+month
    
    return date

In [5]:
def data_download(idx,df,flag):
    
    mip_era=df.loc[idx,'MIP_Era']
    activity_drs=df.loc[idx,'Activity']
    institution_id=df.loc[idx,'Institution_ID']
    source_id=df.loc[idx,'Source_ID']
    experiment_id=df.loc[idx,'Experiment_ID']
    member_id=df.loc[idx,'Member_ID']
    table_id=df.loc[idx,'Table_ID']
    variable_id=df.loc[idx,'Variable']
    grid_label=df.loc[idx,'Grid_Label']
    version=df.loc[idx,'Version']
    start=date_id(idx,df,'datetime_start')
    end=date_id(idx,df,'datetime_stop')
    size=df.loc[idx,'size']/1e6

    
    
    # create output directory to save downloaded file
    root=os.getcwd()
    output_folder=os.path.join(root,mip_era,variable_id, source_id, experiment_id)
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
        

    # server address
    server='http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/'
    
    # online CMIP6 data archive directory
    #CMIP6/ScenarioMIP/CCCma/CanESM5/ssp245/r11i1p2f1/Omon/zostoga/gn/v20190429/
    directory=f'{mip_era}/{activity_drs}/{institution_id}/{source_id}/{experiment_id}/{member_id}/{table_id}/{variable_id}/{grid_label}/{version}/'

    # zostoga_Omon_CanESM5_ssp245_r13i1p2f1_gn_201501-210012.nc
    filename=f'{variable_id}_{table_id}_{source_id}_{experiment_id}_{member_id}_{grid_label}_{start}-{end}.nc'
    
    # url link
    url=server+directory+filename
    
    # output path of downloaded file
    output_filename=os.path.join(output_folder,filename)
    
    
    
    if flag==0:
        print (filename,'size: {}MB'.format(size))
        print(url)
    
    if flag==1:
        # download
        wget.download(url,output_filename)
        print(f'\n{filename} is downloaded | size: {size}MB')

In [6]:
for i in np.arange(len(file_info)):
    start=time.perf_counter()
    data_download(i,file_info,1)
    elapsed=time.perf_counter()-start
    print (f'time took to download: {round(elapsed,2)}s')


100% [............................................................................] 207798 / 207798
zostoga_Omon_CanESM5_ssp245_r13i1p2f1_gn_201501-210012.nc is downloaded | size: 0.207798MB
time took to download: 3.39s
100% [............................................................................] 207798 / 207798
zostoga_Omon_CanESM5_ssp245_r11i1p2f1_gn_201501-210012.nc is downloaded | size: 0.207798MB
time took to download: 2.88s
100% [............................................................................] 207798 / 207798
zostoga_Omon_CanESM5_ssp245_r24i1p2f1_gn_201501-210012.nc is downloaded | size: 0.207798MB
time took to download: 2.94s
100% [............................................................................] 207798 / 207798
zostoga_Omon_CanESM5_ssp245_r14i1p2f1_gn_201501-210012.nc is downloaded | size: 0.207798MB
time took to download: 2.96s
100% [............................................................................] 207798 / 207798
zostoga_Omon_CanESM5