# Goal:

Write a function that plots a summary of each climate model.

In [1]:
import netCDF4
import os

import matplotlib.pyplot as plt
import matplotlib
import numpy as np

In [2]:
old_dataset_dir = "Datasets/"
new_dataset_dir = "Datasets_test/"

In [3]:
model_names = ["ECHAM5", "GISS", "iCESM", "iHadCM3", "isoGSM"]

In [4]:
folder = os.path.join(new_dataset_dir, "ECHAM5/", "Original/")
files = {
    "isotopes" : netCDF4.Dataset(os.path.join(folder, "isotopes.nc")),
    "tsurf" : netCDF4.Dataset(os.path.join(folder, "tsurf.nc")),
    "prec" : netCDF4.Dataset(os.path.join(folder, "prec.nc"))
    }

files["tsurf"].variables["tsurf"]

<class 'netCDF4._netCDF4.Variable'>
float32 tsurf(time, lat, lon)
    long_name: surface temperature
    units: K
    code: 169
    table: 128
unlimited dimensions: time
current shape = (12000, 48, 96)
filling on, default _FillValue of 9.969209968386869e+36 used

In [47]:
def get_variable_names(ds):
    return list(ds.variables.keys())

def get_timesteps_variable(ds, varname):
    if "time" in ds.variables[varname].dimensions:
        return ds["time"][:].data
    elif "t" in ds.variables[varname].dimensions:
        return ds["t"][:].data
    else:
        print("The given variable has no time-dimension.")

def get_shape_variable(ds, varname):
    return ds.variables[varname][:].data.shape

def get_data_variable(ds, varname):
    return ds.variables[varname][:].data
        
        
def get_time_units(ds):
    if "time" in ds.variables.keys():
        return ds.variables["time"].units
    elif "t" in ds.variables.keys():
        return ds.variables["t"].units
    else:
        print("Unknown name for time variable")
        
def get_calendar(ds):
    if "time" in ds.variables.keys():
        return ds.variables["time"].calendar
    elif "t" in ds.variables.keys():
        return ds.variables["t"].calendar
    else:
        print("Unknown name for time variable")

def get_latitudes(ds):
    if "lat" in ds.variables.keys():
        return ds.variables["lat"][:].data
    if "latitude" in ds.variables.keys():
        return ds.variables["latitude"][:].data
    
def get_longitudes(ds):
    if "lon" in ds.variables.keys():
        return ds.variables["lon"][:].data
    if "longitude" in ds.variables.keys():
        return ds.variables["longitude"][:].data



def get_missing_value_fill_value_variable(ds, varname):
    fv = None
    mv = None
    if "_FillValue" in ds.variables[varname].ncattrs():
        fv = ds.variables[varname]._FillValue
    if "missing_value" in ds.variables[varname].ncattrs():
        mv = ds.variables[varname].missing_value
    return mv,fv



def summary_model_data(model_name, base_folder):
    folder = os.path.join(base_folder, model_name, "Original/")
    files = {
        "isotopes" : netCDF4.Dataset(os.path.join(folder, "isotopes.nc")),
        "tsurf" : netCDF4.Dataset(os.path.join(folder, "tsurf.nc")),
        "prec" : netCDF4.Dataset(os.path.join(folder, "prec.nc"))
        }    
    standard_var_names = {
        "isotopes": "d18O",
        "tsurf": "tsurf",
        "prec": "prec"
    }
    """
    print("--- Variables ---")
    for name, ds in files.items():
        print(name)
        print("Available variables:")
        print("{}: {}".format(name, get_variable_names(ds)))
    """
    print("--- Time ---")
    for name, ds in files.items():
        print(name)
        print("Calendar: {}".format(get_calendar(ds)))
        print("Time Units: {}".format(get_time_units(ds)))
        print("First 12 timesteps: {}".format(get_timesteps_variable(ds, standard_var_names[name])[:12]))
    """
    print("--- Data ---")
    for name, ds in files.items():    
        print(name)
        print("Lenth time axis: {}".format(len(get_timesteps_variable(ds, standard_var_names[name]))))
        print("Data shape: {}".format(get_shape_variable(ds, standard_var_names[name])))
        print("Latitudes: {}".format(get_latitudes(ds)))
        print("Longitudes: {}".format(get_longitudes(ds)))
        
        
        print("Missing value, Fill value: {}".format(get_missing_value_fill_value_variable(ds, standard_var_names[name])))
        
        plt.title("{}, variable {}, histogram of occuring values".format(model_name, name))
        plt.hist(ds.variables[standard_var_names[name]][:].data.flatten())
        plt.yscale("log")
        plt.show()
        
        plt.title("{}, variable {}, mean state".format(model_name, name))
        norm = matplotlib.colors.Normalize(vmin=-1e8, vmax=1e8)
        im = np.mean(np.squeeze(ds.variables[standard_var_names[name]][:].data),axis=0)
        print(im.shape)
        plt.imshow(im, aspect="equal", norm=norm)
        plt.colorbar()
        plt.show()
    """

In [48]:
for m in model_names:
    print(m)
    summary_model_data(m, new_dataset_dir)
    print("")

ECHAM5
--- Time ---
isotopes
Calendar: proleptic_gregorian
Time Units: day as %Y%m%d.%f
First 12 timesteps: [8500115.75 8500215.75 8500315.75 8500415.75 8500515.75 8500615.75
 8500715.75 8500815.75 8500915.75 8501015.75 8501115.75 8501215.75]
tsurf
Calendar: proleptic_gregorian
Time Units: day as %Y%m%d.%f
First 12 timesteps: [8500131.75 8500228.75 8500331.75 8500430.75 8500531.75 8500630.75
 8500731.75 8500831.75 8500930.75 8501031.75 8501130.75 8501231.75]
prec
Calendar: proleptic_gregorian


  return ds.variables["time"].calendar
  return ds.variables["time"].units


Time Units: month as %Y%m.%f
First 12 timesteps: [85001.5 85002.5 85003.5 85004.5 85005.5 85006.5 85007.5 85008.5 85009.5
 85010.5 85011.5 85012.5]

GISS
--- Time ---
isotopes
Calendar: proleptic_gregorian
Time Units: month as %Y%m.%f
First 12 timesteps: [85001.5 85002.5 85003.5 85004.5 85005.5 85006.5 85007.5 85008.5 85009.5
 85010.5 85011.5 85012.5]
tsurf
Calendar: proleptic_gregorian
Time Units: month as %Y%m.%f
First 12 timesteps: [85001.5 85002.5 85003.5 85004.5 85005.5 85006.5 85007.5 85008.5 85009.5
 85010.5 85011.5 85012.5]
prec
Calendar: proleptic_gregorian
Time Units: month as %Y%m.%f
First 12 timesteps: [85001.5 85002.5 85003.5 85004.5 85005.5 85006.5 85007.5 85008.5 85009.5
 85010.5 85011.5 85012.5]

iCESM
--- Time ---
isotopes
Calendar: proleptic_gregorian
Time Units: months since 850-1-15 00:00:00
First 12 timesteps: [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
tsurf
Calendar: 365_day
Time Units: days since 0850-01-01 00:00:00
First 12 timesteps: [ 31.  59.  90. 120.

  return ds.variables["t"].calendar
  return ds.variables["t"].units


First 12 timesteps: [588. 589. 590. 591. 592. 593. 594. 595. 596. 597. 598. 599.]

isoGSM
--- Time ---
isotopes
Calendar: standard
Time Units: months since 850-1-15 00:00:00
First 12 timesteps: [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
tsurf
Calendar: standard
Time Units: months since 850-1-15 00:00:00
First 12 timesteps: [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
prec
Calendar: standard
Time Units: months since 850-1-15 00:00:00
First 12 timesteps: [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]



In [38]:
folder = os.path.join("Datasets_test", "GISS", "Original/")
files = {
    "isotopes" : netCDF4.Dataset(os.path.join(folder, "isotopes.nc")),
    "tsurf" : netCDF4.Dataset(os.path.join(folder, "tsurf.nc")),
    "prec" : netCDF4.Dataset(os.path.join(folder, "prec.nc"))
    }    

In [39]:
files["isotopes"]

if "time" in files["isotopes"].variables.keys():
    a = files["isotopes"].variables["time"].units
elif "t" in files["isotopes"].variables.keys():
    a = files["isotopes"].variables["t"].units
else:
    print("Unknown name for time variable")

  a = files["isotopes"].variables["time"].units


In [42]:
def get_time_conversion_function(time_units):
    """
    The climate models use different calendars and notations to denote files. For simplicity, this function returns
    a function that takes in timesteps and returns a year and month.
    """
    if time_units = "day as %Y%m%d.%f":
        def f(timesteps):
            assert type(timesteps) == numpy.ndarray
            # trim of subdaily scale
            timesteps = [t.split(".")[0] for t in timesteps.astype(str)]
            ms = np.array([int(t[-4:-2]) for t in timesteps]) - 1 # want month from 0-11
            ys = np.array([int(t[:-4]) for t in timesteps])
            return ms, ys
    elif time_units = "month as %Y%m.%f":
        def f(timesteps):
            assert type(timesteps) == numpy.ndarray
            # trim of subdaily scale
            timesteps = [t.split(".")[0] for t in timesteps.astype(str)]
            ms = np.array([int(t[-2:]) for t in timesteps]) - 1 # want month from 0-11
            ys = np.array([int(t[::-2]) for t in timesteps])
            return ms, ys        
    elif time_units = "months since 850-1-15 00:00:00":
        def f(timesteps):
            assert type(timesteps) == numpy.ndarray
            y_ref = 850
            m_ref = 0
            
            ms = timesteps % 12 + m_ref
            ys = timesteps // 12 + y_ref
            return ms, ys             
    elif time_units = "days since 0850-01-01 00:00:00":
        def f(timesteps):
            assert type(timesteps) == numpy.ndarray
            y_ref = 850
            m_ref = 0
            m_l = [0, 31,  59,  90, 120, 151, 181, 212, 243, 273, 304, 334]
            ms = timesteps % 365
            ys = timesteps // 365 + y_ref
            return ms, ys   
        
    elif time_units = "months since 801-1-15 00:00:00":
    elif time_units = "months since 850-1-15 00:00:00":
    else:
        raise Error("Invalid time units")

        
        
        
        
        
        
        
        def extract_dates(timesteps):
            assert type(timesteps) == numpy.ndarray
            # trim of subdaily scale
            timesteps = [t.split(".")[0] for t in timesteps.astype(str)]
            ds = [int(t[-2:]) for t in timesteps]
            ms = [int(t[-4:-2]) for t in timesteps]
            ys = [int(t[:-4]) for t in timesteps]
            return ds, ms, ys
    elif dataset_description["CLIMATE_MODEL"] == "GISS":
        def extract_dates(timesteps):
            assert type(timesteps) == numpy.ndarray
            # trim of subdaily scale
            timesteps = [t.split(".")[0] for t in timesteps.astype(str)]
            ms = [int(t[-2:]) for t in timesteps]
            ys = [int(t[:-2]) for t in timesteps]
            return ds, ms, ys        
    elif dataset_description["CLIMATE_MODEL"] == "iCESM":
        
    else: 
        raise NotImplementedError("Invalid climate model name.")

SyntaxError: unexpected EOF while parsing (Temp/ipykernel_17224/441874789.py, line 15)

In [45]:
a = files["isotopes"].variables["time"][:].data

In [46]:
from datetime import datetime
date = datetime.strptime(str(a[0]), 'Y%m%d.%f').date()
date

ValueError: time data '85001.5' does not match format 'Y%m%d.%f'

In [31]:
a.astype(str)

array(['8500115.75', '8500215.75', '8500315.75', ..., '18491015.75',
       '18491115.75', '18491215.75'], dtype='<U32')

In [35]:
timesteps = [t.split(".")[0] for t in a.astype(str)]
ds = [t[-2:] for t in timesteps]
ms = [t[-4:-2] for t in timesteps]
ys = [t[:-4] for t in timesteps]

In [37]:
ms

['01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',

In [9]:
import numpy as np
a = set([(1,2),(2,3),(4,5)])
b = set([(2,3),(4,5)])

a.intersection(b)

{(2, 3), (4, 5)}