In [5]:
import os
import xarray as xr
import pandas as pd
%matplotlib inline

In [6]:
experiment_id_dict = {
    "1pctCO2": {"FAR":"1P", "SAR":"GG"},
    "historical": {"FAR":"1P", "SAR":"GS", "TAR":"SRES-A2"},
    "piControl": {"FAR":"CI", "SAR":"CI"}
}
source_id_attrs = {"FAR": "institution", "SAR": "institution", "TAR": "institution"}

In [7]:
activity_ids = ["FAR","SAR","TAR"]
variable_ids = ["tas","psl","pr", "rsds", "sn", "tasmax", "tasmin", "sfcWind"]
table_id = "Amon"

In [8]:
push_to_cloud = True

In [9]:
stop = 0
for activity_id in activity_ids:
    
    fs_dict = {
        "activity_id": [],
        "institution_id": [],
        "source_id": [],
        "experiment_id": [],
        "member_id": [], 
        "table_id": [], 
        "variable_id": [], 
        "grid_label": [], 
        "zstore": [],
        "dcpp_init_year": []
    }
    
    path_to_nc = f"../data/interim/{activity_id}/"
    for experiment_id in experiment_id_dict.keys():
        for variable_id in variable_ids:
            for ncfile in os.listdir(path_to_nc):
                if variable_id not in ncfile: continue # wrong variable
                if activity_id not in experiment_id_dict[experiment_id]: continue # experiment doesn't exist
                if experiment_id_dict[experiment_id][activity_id] not in ncfile: continue # wrong experiment

                ds = xr.open_dataset(path_to_nc+ncfile, decode_cf=False)

                # Write to zarr
                institution_id = ds.attrs[source_id_attrs[activity_id]]

                # If different source_id and member_id for a single institution (as in SAR)
                if activity_id == 'SAR':
                    source_id = institution_id+'-'+str(ncfile[2:4])
                    member_id = f"r{ncfile[7:8]}i1p1f1"
                else:
                    source_id = institution_id
                    member_id = "r1i1p1f1"
                
                zarr_name = f"{activity_id}/{institution_id}/{source_id}/{experiment_id}/{member_id}/{table_id}/{variable_id}"
                path_to_zarr = "../data/zarr/"+zarr_name

                ds.to_zarr(path_to_zarr, mode='w', consolidated=True)

                fs_dict["activity_id"].append(activity_id)
                fs_dict["institution_id"].append(institution_id)
                fs_dict["source_id"].append(source_id)
                fs_dict["experiment_id"].append(experiment_id)
                fs_dict["member_id"].append(member_id)
                fs_dict["table_id"].append(table_id)
                fs_dict["variable_id"].append(variable_id)
                fs_dict["grid_label"].append("gn")
                fs_dict["zstore"].append(f"gs://ipcc-{activity_id.lower()}/"+zarr_name)
                fs_dict["dcpp_init_year"].append("NaN")
                print(zarr_name)

    # Push csv catalog to cloud bucket
    df = pd.DataFrame.from_dict(fs_dict)
    path_to_csv = f"../data/zarr/{activity_id}/pangeo-{activity_id.lower()}.csv"
    df.to_csv(path_to_csv)

    if push_to_cloud:
        print(f"\nPush {activity_id} data to Google Cloud storage:")
        transfer_command = f"../data/zarr/{activity_id}/* gs://ipcc-{activity_id.lower()}/"
        gsutil_command = f"gsutil -m cp -r {transfer_command}"
        print(gsutil_command+"\n\n")
        os.system(gsutil_command)

FAR/UKTR/UKTR/1pctCO2/r1i1p1f1/Amon/tas
FAR/GFDL/GFDL/1pctCO2/r1i1p1f1/Amon/tas
FAR/GISS/GISS/1pctCO2/r1i1p1f1/Amon/tas
FAR/GFDL/GFDL/1pctCO2/r1i1p1f1/Amon/pr
FAR/GISS/GISS/1pctCO2/r1i1p1f1/Amon/pr
FAR/UKTR/UKTR/1pctCO2/r1i1p1f1/Amon/pr
FAR/GFDL/GFDL/1pctCO2/r1i1p1f1/Amon/sn
FAR/GISS/GISS/1pctCO2/r1i1p1f1/Amon/sn
FAR/UKTR/UKTR/historical/r1i1p1f1/Amon/tas
FAR/GFDL/GFDL/historical/r1i1p1f1/Amon/tas
FAR/GISS/GISS/historical/r1i1p1f1/Amon/tas
FAR/GFDL/GFDL/historical/r1i1p1f1/Amon/pr
FAR/GISS/GISS/historical/r1i1p1f1/Amon/pr
FAR/UKTR/UKTR/historical/r1i1p1f1/Amon/pr
FAR/GFDL/GFDL/historical/r1i1p1f1/Amon/sn
FAR/GISS/GISS/historical/r1i1p1f1/Amon/sn

Push FAR data to Google Cloud storage:
gsutil -m cp -r ../data/zarr/FAR/* gs://ipcc-far/


SAR/HCCPR/HCCPR-01/1pctCO2/r1i1p1f1/Amon/tas
SAR/CSIRO/CSIRO-01/1pctCO2/r1i1p1f1/Amon/tas
SAR/CCSR-NIES/CCSR-NIES-01/1pctCO2/r1i1p1f1/Amon/tas
SAR/CCCma/CCCma-01/1pctCO2/r1i1p1f1/Amon/tas
SAR/HCCPR/HCCPR-01/1pctCO2/r1i1p1f1/Amon/tas
SAR/DKRZ/DKRZ-01/1pct