# Intake to load CMIP data

## Using intake-esm on mistral
- install intake-esm: https://intake-esm.readthedocs.io/en/latest/installation.html
- check the already built catalogs: /home/mpim/m300524/.intake_esm/collections and copy to skip long catalog building process

In [14]:
import intake
import xarray as xr
import numpy as np
import pandas as pd

# CMIP5

In [66]:
collection_input_definition = '/home/mpim/m300524/pymistral/intake/cmip6_collection_mistral.yml'

In [67]:
col = intake.open_esm_metadatastore(collection_input_definition=collection_input_definition, overwrite_existing=False)

In [68]:
col.df.head()

Unnamed: 0,activity_id,experiment_id,file_basename,file_dirname,file_fullpath,grid_label,institution_id,member_id,mip_era,source_id,table_id,time_range,variable_id,version
0,CNRM-CERFACS,r99i1p1f2,tos_Omon_CNRM-CM6-1_1pctCO2_r99i1p1f2_gn_18500...,/work/kd0956/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-...,/work/kd0956/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-...,v20180424,CNRM-CM6-1,Omon,CMIP6,1pctCO2,tos,185001-185001,gn,v20180424


In [8]:
cmip5_variables = col.df['variable'].unique()

In [9]:
for v in ['institute','model','experiment','frequency','mip_table','modeling_realm','product']:
    print(v,col.df[v].unique(),'\n')

institute ['LASG-CESS' 'LASG-IAP' 'CCCma' 'CSIRO-QCCCE' 'BCC' 'ICHEC' 'CSIRO-BOM'
 'NSF-DOE-NCAR' 'MOHC' 'NOAA-GFDL' 'INM' 'IPSL' 'CNRM-CERFACS' 'MIROC'
 'MPI-M' 'MRI' 'NCC' 'NCAR' 'CMCC' 'BNU' 'NASA-GMAO' 'NASA-GISS'
 'NIMR-KMA' 'FIO' 'NICAM' 'NOAA-NCEP' 'COLA-CFS' 'INPE'] 

model ['FGOALS-g2' 'FGOALS-s2' 'CanESM2' 'CanAM4' 'FGOALS-gl' 'CSIRO-Mk3-6-0'
 'bcc-csm1-1' 'EC-EARTH' 'ACCESS1-0' 'ACCESS1-3' 'CESM1-WACCM'
 'HadGEM2-ES' 'GFDL-ESM2M' 'inmcm4' 'IPSL-CM5A-LR' 'HadGEM2-A' 'GFDL-CM3'
 'GFDL-HIRAM-C360' 'GFDL-CM2p1' 'GFDL-ESM2G' 'GFDL-HIRAM-C180' 'CNRM-CM5'
 'HadCM3' 'MIROC4h' 'MIROC5' 'MPI-ESM-LR' 'MRI-CGCM3' 'NorESM1-M'
 'MRI-AGCM3-2H' 'MRI-AGCM3-2S' 'HadGEM2-CC' 'MIROC-ESM' 'MIROC-ESM-CHEM'
 'MPI-ESM-P' 'IPSL-CM5A-MR' 'IPSL-CM5B-LR' 'CCSM4' 'NorESM1-ME'
 'MPI-ESM-MR' 'CMCC-CM' 'CESM1-CAM5-1-FV2' 'BNU-ESM' 'GEOS-5' 'GISS-E2-R'
 'GISS-E2-H' 'HadGEM2-AO' 'CESM1-CAM5' 'FIO-ESM' 'CESM1-FASTCHEM'
 'fio-esm' 'CESM1-BGC' 'CanCM4' 'MRI-ESM1' 'NICAM-09' 'bcc-csm1-1-m'
 'CMCC-CMS' 'GISS-E2-R

In [37]:
cat = col.search(experiment='piControl',variable=['tos'],mip_table='Omon',ensemble_member='r1i1p1')

In [38]:
qr=cat.query_results
qr.head()

Unnamed: 0,activity,ensemble_member,experiment,file_basename,file_dirname,file_fullpath,frequency,institute,mip_table,model,modeling_realm,product,temporal_subset,variable,version
556618,CMIP5,r1i1p1,piControl,tos_Omon_bcc-csm1-1-m_piControl_r1i1p1_000101-...,/work/kd0956/CMIP5/data/cmip5/output1/BCC/bcc-...,/work/kd0956/CMIP5/data/cmip5/output1/BCC/bcc-...,mon,BCC,Omon,bcc-csm1-1-m,ocean,output1,000101-040012,tos,v20120705
200327,CMIP5,r1i1p1,piControl,tos_Omon_bcc-csm1-1_piControl_r1i1p1_000101-05...,/work/kd0956/CMIP5/data/cmip5/output1/BCC/bcc-...,/work/kd0956/CMIP5/data/cmip5/output1/BCC/bcc-...,mon,BCC,Omon,bcc-csm1-1,ocean,output1,000101-050012,tos,v20120202
290287,CMIP5,r1i1p1,piControl,tos_Omon_BNU-ESM_piControl_r1i1p1_145001-20081...,/work/kd0956/CMIP5/data/cmip5/output1/BNU/BNU-...,/work/kd0956/CMIP5/data/cmip5/output1/BNU/BNU-...,mon,BNU,Omon,BNU-ESM,ocean,output1,145001-200812,tos,v20120504
241348,CMIP5,r1i1p1,piControl,tos_Omon_CanESM2_piControl_r1i1p1_291101-30101...,/work/kd0956/CMIP5/data/cmip5/output1/CCCma/Ca...,/work/kd0956/CMIP5/data/cmip5/output1/CCCma/Ca...,mon,CCCma,Omon,CanESM2,ocean,output1,291101-301012,tos,v20120410
769735,CMIP5,r1i1p1,piControl,tos_Omon_CMCC-CESM_piControl_r1i1p1_459401-460...,/work/kd0956/CMIP5/data/cmip5/output1/CMCC/CMC...,/work/kd0956/CMIP5/data/cmip5/output1/CMCC/CMC...,mon,CMCC,Omon,CMCC-CESM,ocean,output1,459401-460012,tos,v20130417


In [42]:
qr[['institute','model']]

Unnamed: 0,institute,model
556618,BCC,bcc-csm1-1-m
200327,BCC,bcc-csm1-1
290287,BNU,BNU-ESM
241348,CCCma,CanESM2
769735,CMCC,CMCC-CESM
698368,CMCC,CMCC-CM
609711,CMCC,CMCC-CMS
768191,CNRM-CERFACS,CNRM-CM5-2
650469,CNRM-CERFACS,CNRM-CM5
786473,CSIRO-BOM,ACCESS1-0


In [None]:
ds = cat.to_xarray()

In [None]:
ds.keys()

# CMIP6

- wrong ordering now

In [69]:
collection_input_definition = '/home/mpim/m300524/pymistral/intake/cmip6_collection_mistral.yml'

In [71]:
col = intake.open_esm_metadatastore(collection_input_definition=collection_input_definition, overwrite_existing=True)

Getting list of directories


HBox(children=(IntProgress(value=0, description='directories', max=10, style=ProgressStyle(description_width='…

Found 2 directories
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 14 columns):
activity_id       1 non-null object
experiment_id     1 non-null object
file_basename     1 non-null object
file_dirname      1 non-null object
file_fullpath     1 non-null object
grid_label        1 non-null object
institution_id    1 non-null object
member_id         1 non-null object
mip_era           1 non-null object
source_id         1 non-null object
table_id          1 non-null object
time_range        1 non-null object
variable_id       1 non-null object
version           1 non-null object
dtypes: object(14)
memory usage: 192.0+ bytes
None
Persisting cmip6_collection_mistral at : /home/mpim/m300524/.intake_esm/collections/cmip6/cmip6_collection_mistral.cmip6.csv


In [74]:
col.df.head()

Unnamed: 0,activity_id,experiment_id,file_basename,file_dirname,file_fullpath,grid_label,institution_id,member_id,mip_era,source_id,table_id,time_range,variable_id,version
0,CNRM-CERFACS,r99i1p1f2,tos_Omon_CNRM-CM6-1_1pctCO2_r99i1p1f2_gn_18500...,/work/kd0956/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-...,/work/kd0956/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-...,v20180424,CNRM-CM6-1,Omon,CMIP6,1pctCO2,tos,185001-185001,gn,v20180424


In [72]:
# mistral CMIP6
!ls /work/kd0956/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1/1pctCO2/r99i1p1f2/Omon/tos/gn/v20180424/

/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `BASH_FUNC_module'
tos_Omon_CNRM-CM6-1_1pctCO2_r99i1p1f2_gn_185001-185001.nc


In [73]:
# intake-esm sample CMIP6 files
!ls /home/mpim/m300524/intake-esm/tests/sample_data/cmip/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1/1pctCO2/r1i1p1f2/Lmon/gpp/gr/v20180626/gpp

/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `BASH_FUNC_module'
gpp_Lmon_CNRM-CM6-1_1pctCO2_r1i1p1f2_gr_185001-199912.nc
