# Download CMIP6 4xCO2 data

This uses `netcdf-scm` to grab and process data.

See https://gitlab.com/netcdf-scm/netcdf-scm. This is based on one of the notebook examples.

Nicholls, Z, Lewis, J, Makin, M, et al. Regionally aggregated, stitched and de-drifted CMIP-climate data, processed with netCDF-SCM v2.0.0. Geosci Data J. 2021; 00: 1– 45. https://doi.org/10.1002/gdj3.113

In [None]:
from climateforcing.utils import mkdir_p
import netcdf_scm.io
import os
import glob
import requests
import zipfile
from tqdm import tqdm 
import scmdata
import matplotlib.pyplot as plt
import platform

In [None]:
platform.system()

In [None]:
# file to save the downloaded zip in
ZIP_FILE = os.path.join("..", "data", "netcdf-scm", "4xCO2_data.zip")
ZIP_FILE

In [None]:
# directory in which to extract data
DATA_DIR = os.path.join("..", "data", "netcdf-scm", "cmip_data")
mkdir_p(DATA_DIR)

In [None]:
# from https://stackoverflow.com/questions/40419395/python-zipfile-extractall-ioerror-on-windows-when-extracting-files-from-long-pat
class ZipfileLongPaths(zipfile.ZipFile):

    def _extract_member(self, member, targetpath, pwd):
        targetpath = winapi_path(targetpath)
        return zipfile.ZipFile._extract_member(self, member, targetpath, pwd)
    
    
def winapi_path(dos_path, encoding=None):
    path = os.path.abspath(dos_path)
    if platform.system() == "Windows":
        if path.startswith("\\\\"):
            path = "\\\\?\\UNC\\" + path[2:]
        else:
            path = "\\\\?\\" + path 
    return path  

In [None]:
experiments = (
    "abrupt-4xCO2",
)
variables = (
    "tas",
    "rsdt",
    "rsut",
    "rlut",
)

for exp in experiments:
    for var in variables:
        print(f"Downloading {exp} {var}")
        params = (
            ("experiment_id", exp),
            ("variable_id", var),
            ("timeseriestype", "average-year-mid-year"),
            ("normalised", "21-yr-running-mean"),
        )
        url = "https://cmip6.science.unimelb.edu.au/api/v1/download_zip"

        r = requests.get(url, params=params)
        r.raise_for_status()

        with open(ZIP_FILE, "wb") as f:
            f.write(r.content)

        zip_ref = ZipfileLongPaths(ZIP_FILE, "r")
        zip_ref.extractall(DATA_DIR)
        zip_ref.close()

print("Finished")

In [None]:
available_files = glob.glob(winapi_path(os.path.join(DATA_DIR, "**", "abrupt-4xCO2", "**", "*.MAG")), recursive=True)
print(f"{len(available_files)} available files")
available_files[:5]

In [None]:
db = [
    netcdf_scm.io.load_mag_file(f, "CMIP6Output")
    for f in tqdm(available_files, position=0, leave=True)
]

In [None]:
db = scmdata.run_append(db).filter(region="World")
db.head()

In [None]:
required_vars = {"tas", "rlut", "rsut", "rsdt"}
force_first_year = 1850

out = []
for scdf in tqdm(db.groupby(["climate_model", "member_id", "scenario"]), position=0, leave=True):
    climate_model = scdf.get_unique_meta("climate_model", True)
    scenario = scdf.get_unique_meta("scenario", True)
    member_id = scdf.get_unique_meta("member_id", True)
    available_vars = scdf["variable"].unique()
    if not all([v in available_vars for v in required_vars]):
        print(f"Not all required data for {climate_model} {member_id} {scenario}")
        print("Available vars: {}".format(available_vars))
        continue

    rndt = (
        scdf.filter(variable="rsdt")
        .subtract(scdf.filter(variable="rsut"), op_cols={"variable": "rsdt - rsut"})
        .subtract(scdf.filter(variable="rlut"), op_cols={"variable": "rndt"})
    )

    keep = scmdata.run_append([scdf, rndt]).timeseries(
        time_axis="year", drop_all_nan_times=True
    )
    
    if keep.shape[1]<150:
        print(f"Time series is too short in {climate_model} {member_id} {scenario}")
        continue
    
    first_year = keep.columns[0]
    keep.columns = keep.columns.map(lambda x: force_first_year + x - first_year)

    out.append(scmdata.ScmRun(keep))

out = scmdata.run_append(out)
out.head()

In [None]:
for sdf in out.groupby("scenario"):
    scenario = sdf.get_unique_meta("scenario", True)
    ax = sdf.filter(variable="tas").lineplot(hue="climate_model", time_axis="year")
    ax.set_title(scenario)
    plt.show()

In [None]:
out

In [None]:
out.to_csv(os.path.join("..", "data", "netcdf-scm", "4xCO2.csv"))