In [None]:
# -*- coding: utf-8 -*-
#Created on Mon Aug 23 16:00:04 2021

#author: Alex Schud.

import numpy as np
import os
import xarray as xr
import time
from pathlib import Path
import sys
import cftime
from pyesgf.search import SearchConnection

####################################################################################################################
#Run Parameters - Change based on machine/ what you want to download

if (os.name=='nt'):
    base_directory=Path('C:/Users/asc182/CMIP6/Cf')
else:
    base_directory=Path("/home/asc182/env/asc182/CMIP6/Rad")

variable_name='dmsos'
exp_name='historical'
Table_name='day'
variant_name='r1i1p1f1'
source_names=['UKESM1-0-LL']

start_year='1940'
end_year='2014'

###################################################################################################################

year_list=range(int(start_year),int(end_year)+1)

#connections to check
Connection_List=['https://esgf-node.llnl.gov/esg-search','https://esgf-data.dkrz.de/esg-search','http://esgf-index1.ceda.ac.uk/esg-search']


for server in Connection_List:
    conn = SearchConnection(server, distrib=True)
    
    #search parameters
    ctx = conn.new_context(
    experiment_id=exp_name,
    variable=variable_name,
    table_id=Table_name,
    source_id=source_names,
    #variant_label=variant_name,  
    data_node=['aims3.llnl.gov ','cmip.bcc.cma.cn','cmip.dess.tsinghua.edu.cn', 'cmip.fio.org.cn'
               'crd-esgf-drc.ec.gc.ca', 'dist.nmlab.snu.ac.kr',
               'dpesgf03.nccs.nasa.gov', 'esg-cccr.tropmet.res.in', 'esg-dn1.nsc.liu.se',
               'esg-dn2.nsc.liu.se','esg.camscma.cn', 'esg.lasg.ac.cn', 'esg1.umr-cnrm.fr',
               'esgdata.gfdl.noaa.gov', 'esgf-cnr.hpc.cineca.it','esgf-data.csc.fi','esgf-data.ucar.edu',
               'esgf-data2.diasjp.net', 'esgf-data2.llnl.gov', 'noresg.nird.sigma2.no'
               'esgf-data3.ceda.ac.uk', 'esgf-nimscmip6.apcc21.org', 'esgf-node2.cmcc.it',
               'esgf.bsc.es', 'esgf.ichec.ie', 'esgf.nci.org.au', 'esgf.rcec.sinica.edu.tw',
               'esgf3.dkrz.de', 'polaris.pknu.ac.kr', 'vesg.ipsl.upmc.fr'])
    
    for result in ctx.search():
        files = result.file_context().search()
        text_ind=result.dataset_id.find(".v")
    
        tmp_string=result.dataset_id[0:text_ind]
    
        directory_string=Path(base_directory / tmp_string) 
        if(os.path.isdir(directory_string)):
            print('directory already created')
        else:
            os.mkdir(directory_string)
    
        string_list=[]    
        file_list=[]
        for f in files:
            string_list.append(f.opendap_url)
            file_list.append(f.filename)
            #print(f.opendap_url)
    
        count=0
        for tmp_string in string_list:
            print()
            print(tmp_string)
            if not(tmp_string==None):
                file_split=tmp_string.split('/')
                second_split=file_split[-1].split('_')
                first_year=int(second_split[-1][0:4])
                hyphen=second_split[-1].index('-')
                second_year=int(second_split[-1][hyphen+1:hyphen+5])

                
                if first_year > int(end_year) or second_year < int(start_year): 
                    print('Outside time period')
                else:
                    try:
                        if variable_name=='clisccp':
                            dataset = xr.open_dataset(tmp_string,chunks={'time': 1},engine='pydap', use_cftime=True,decode_times=True)
                        else:
                            dataset = xr.open_dataset(tmp_string,engine='pydap', use_cftime=True,decode_times=True)                        #dataset = xr.open_dataset(tmp_string,chunks=10000,engine='pydap', use_cftime=False,decode_times=False)
                        attributes=dataset.attrs
                        ##iterate through the years and save yearly
                        for tmp_year in year_list:
                            tmp_start_time=str(tmp_year)+'-01-01'
                            if type(dataset['time'].data[0])==cftime._cftime.Datetime360Day:
                                tmp_end_time=str(tmp_year)+'-12-30'
                            else:
                                tmp_end_time=str(tmp_year)+'-12-31'
                            
                            variable_xarray=dataset[variable_name].sel(time=slice(tmp_start_time,tmp_end_time))
                            variable_xarray.attrs=attributes
                            
                            if not(variable_xarray.size==0):
                                start_period=variable_xarray['time'].data[0]
                                end_period=variable_xarray['time'].data[-1]
                                
                                tmp_start_string='days since ' +str(tmp_year)+'-01-01'
                                
                                try:
                                    start_index=cftime.date2num(start_period,tmp_start_string)
                                except:
                                    start_index=cftime.date2num(start_period,tmp_start_string,calendar='noleap')
                                    
                                try:
                                    end_index=cftime.date2num(end_period,tmp_start_string)
                                except:
                                    end_index=cftime.date2num(end_period,tmp_start_string,calendar='noleap')
                                          
                                
                                tmp_year_val=tmp_year
                                tmp_starting_day=int(np.ceil(start_index))
                                tmp_ending_day=int(np.ceil(end_index))
                                
                                tmp_file_name=second_split[0]+'_'+str(tmp_year_val)+'_'+str(tmp_starting_day)+'-'+str(tmp_ending_day)+'.nc'
                                tmp_full_name=Path(directory_string / tmp_file_name)

                                if os.path.isfile(tmp_full_name) and os.path.getsize(tmp_full_name)>1000000:
                                    print('File Already downloaded')
                                else:
                                    variable_xarray.to_netcdf(tmp_full_name)#,'w',format='NETCDF4_CLASSIC')
                                    variable_xarray.close
                                    print(tmp_file_name)
                        
                        
                        dataset.close
                        #variable_xarray.close
                        #variable_dataset.close
                        time.sleep(0.22)
        
                        count+=1
                    except(FileNotFoundError):
                        print('File not found')
                    except:
                        print('Exception in download')
                
                        time.sleep(0.1)