# Download and manipulate CMIP6 data

In [21]:
import glob
import os
import requests
import zipfile

import netcdf_scm.io
import pandas as pd
from scmdata import ScmRun, run_append
import tqdm

In [4]:
DATA_DIR = os.path.join("..", "data_input", "cmip6")
DATA_DIR

'../data_input/cmip6'

In [7]:
zip_file = os.path.join(".", "cmip6_data.zip")

In [10]:
experiments_to_download = (
#    "historical",
    "ssp245",
)

variables_to_download = (
    "tas",
)

if not os.path.isdir(DATA_DIR):
    print("Downloading relevant data")
    for exp in tqdm.tqdm(experiments_to_download, desc="experiment"):
        for var in tqdm.tqdm(variables_to_download, desc="variable"):
            params = (
                ("experiment_id", exp),
                ("variable_id", var),
                ("normalised", "21-yr-running-mean"),
                ("timeseriestype", "average-year-mid-year"),
            )
            url = "https://cmip6.science.unimelb.edu.au/api/v1/download_zip"
            r = requests.get(url, params=params)
            r.raise_for_status()

            with open(zip_file, "wb") as f:
                f.write(r.content)

            zip_ref = zipfile.ZipFile(zip_file, "r")
            zip_ref.extractall(DATA_DIR)
            zip_ref.close()

Downloading relevant data


experiment:   0%|                                         | 0/1 [00:00<?, ?it/s]
variable:   0%|                                           | 0/1 [00:00<?, ?it/s][A
variable: 100%|██████████████████████████████████| 1/1 [02:02<00:00, 122.53s/it][A
experiment: 100%|████████████████████████████████| 1/1 [02:02<00:00, 122.55s/it]


In [14]:
ssp245_tas_files = sorted(
    glob.glob(
        os.path.join(
            DATA_DIR,
            "**",
            "average-year-mid-year",
            "**",
            "NORMED*tas*.MAG",
        ),
        recursive=True,
    )
)
len(ssp245_tas_files)

193

In [34]:
out_table = []
load_kwargs = dict(drs="CMIP6Output")

for f in tqdm.tqdm(ssp245_tas_files):

#     db = []
    tas = netcdf_scm.io.load_mag_file(f, **load_kwargs).filter(region="World")
    assert tas.metadata["normalisation method"] == "21-yr-running-mean"

    climate_model = tas.get_unique_meta("climate_model", no_duplicates=True)
    member_id = tas.get_unique_meta("member_id", no_duplicates=True)
#     print(tas)
    
    out_table.append(tas)

# db = run_append(db)

#     out_dict = {
#         "climate_model": climate_model,
#         "member_id": member_id,
#         "mip_era": db.get_unique_meta("mip_era", no_duplicates=True),
#     }

#     for k, v in out_dict.items():
#         try:
#             val = v.magnitude
#             unit = v.units
#             key = "{} ({})".format(k, unit)
#         except AttributeError:
#             key = k
#             val = v

#         if key in out_table:
#             out_table[key].append(val)
#         else:
#             out_table[key] = [val]

# out_table = pd.DataFrame(out_table)

100%|█████████████████████████████████████████| 193/193 [00:10<00:00, 17.95it/s]


In [38]:
out_table = run_append(out_table)

In [39]:
out_table.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,time,1850-07-01,1851-07-01,1852-07-01,1853-07-01,1854-07-01,1855-07-01,1856-07-01,1857-07-01,1858-07-01,1859-07-01,...,2091-07-01,2092-07-01,2093-07-01,2094-07-01,2095-07-01,2096-07-01,2097-07-01,2098-07-01,2099-07-01,2100-07-01
activity_id,climate_model,member_id,mip_era,model,region,scenario,unit,variable,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
ScenarioMIP,AWI-CM-1-1-MR,r1i1p1f1,CMIP6,unspecified,World,ssp245,K,tas,-0.12319,-0.16213,-0.149941,-0.088612,0.103306,0.120271,-0.049101,-0.1782,-0.057063,-0.034576,...,2.79077,2.88588,3.19958,3.02918,2.94943,2.93476,2.95843,2.78831,2.80314,2.72852
ScenarioMIP,FGOALS-g3,r1i1p1f1,CMIP6,unspecified,World,ssp245,K,tas,0.153702,-0.050636,0.013877,0.026406,-0.025644,-0.070094,-0.103168,-0.124343,-0.053982,0.122305,...,2.37216,2.35156,2.29518,2.45317,2.4793,2.50959,2.29738,2.41345,2.35094,2.31073
ScenarioMIP,FGOALS-g3,r2i1p1f1,CMIP6,unspecified,World,ssp245,K,tas,0.153702,-0.050636,0.013877,0.026406,-0.025644,-0.070094,-0.103168,-0.124343,-0.053982,0.122305,...,2.30861,2.17882,2.3085,2.22876,2.32978,2.24084,2.06921,2.22329,2.18072,2.23223
ScenarioMIP,FGOALS-g3,r3i1p1f1,CMIP6,unspecified,World,ssp245,K,tas,0.153702,-0.050636,0.013877,0.026406,-0.025644,-0.070094,-0.103168,-0.124343,-0.053982,0.122305,...,2.17587,2.11436,2.25363,2.15746,2.1521,2.11867,2.14883,2.0443,2.17207,2.1743
ScenarioMIP,FGOALS-g3,r4i1p1f1,CMIP6,unspecified,World,ssp245,K,tas,0.153702,-0.050636,0.013877,0.026406,-0.025644,-0.070094,-0.103168,-0.124343,-0.053982,0.122305,...,2.16055,2.15168,2.07621,1.99055,2.19387,2.17568,2.11488,2.12646,2.18215,2.1091


In [49]:
# de-junk
out_table = out_table.timeseries()

In [52]:
out_table_means = out_table.groupby('climate_model').mean()
out_table_means

time,1850-07-01,1851-07-01,1852-07-01,1853-07-01,1854-07-01,1855-07-01,1856-07-01,1857-07-01,1858-07-01,1859-07-01,...,2091-07-01,2092-07-01,2093-07-01,2094-07-01,2095-07-01,2096-07-01,2097-07-01,2098-07-01,2099-07-01,2100-07-01
climate_model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCESS-CM2,0.012021,0.01469,-0.02566,0.014244,0.011469,-0.032908,-0.045573,0.03636,0.01106,0.016916,...,3.264263,3.30312,3.258563,3.294217,3.27574,3.396337,3.347633,3.35654,3.356443,3.37894
ACCESS-ESM1-5,0.082331,0.063298,-0.030325,0.078419,0.153294,0.034055,0.095915,0.031679,0.015508,0.038038,...,3.108643,3.146743,3.166507,3.201133,3.16767,3.205203,3.106107,3.057073,3.118143,3.18293
AWI-CM-1-1-MR,-0.12319,-0.16213,-0.149941,-0.088612,0.103306,0.120271,-0.049101,-0.1782,-0.057063,-0.034576,...,2.79077,2.88588,3.19958,3.02918,2.94943,2.93476,2.95843,2.78831,2.80314,2.72852
CESM2,-0.071711,-0.018635,0.066435,-0.0338,-0.02793,0.003034,-0.045934,-0.17626,-0.057909,0.040286,...,3.296847,3.35961,3.28547,3.325703,3.32741,3.32014,3.40588,3.29246,3.332263,3.50105
CESM2-WACCM,-0.007388,-0.167219,-0.044075,-0.07216,-0.126604,-0.040556,0.033457,0.001567,-0.133822,0.013627,...,3.127264,3.24112,3.28061,3.166578,3.259208,3.27598,3.21818,3.35567,3.305712,3.371228
CMCC-CM2-SR5,0.155986,0.21293,0.081849,0.36575,0.321307,0.323679,0.41698,0.446327,0.399858,0.321027,...,4.26443,4.06793,4.0302,4.11558,4.14314,4.29738,4.24253,4.20861,4.33033,4.20242
CMCC-ESM2,0.060162,0.171146,0.077076,0.064636,0.092505,0.244028,0.310062,0.206949,0.209377,0.303318,...,3.63031,3.79593,4.06731,4.0213,3.88465,3.84139,3.92808,3.97119,3.99952,3.80113
CNRM-CM6-1,-0.037678,-0.080929,-0.072735,-0.038629,-0.043558,-0.026948,-0.066927,0.018007,0.031398,0.014664,...,3.229203,3.269717,3.246305,3.286088,3.3138,3.385165,3.320107,3.35468,3.331033,3.405877
CNRM-CM6-1-HR,-0.152081,-0.206964,-0.149775,-0.096587,-0.094674,0.001441,-0.098072,-0.100262,-0.158562,-0.057563,...,3.70282,3.54782,3.54115,3.67602,3.68518,3.58099,3.65679,3.7739,3.77244,3.6454
CNRM-ESM2-1,-0.059424,0.011749,0.05713,-0.044778,-0.023126,-0.069598,-0.050815,-0.020062,-0.100811,0.010272,...,3.119142,3.229674,3.25211,3.285418,3.27191,3.303256,3.289732,3.361942,3.26159,3.379134


In [53]:
out_table_means.to_csv('../data_output/ssp245.csv')