In [72]:
# imports
import ocw.dataset_processor as dsp
import ocw.data_source.local as local
import datetime as dt
from netCDF4 import num2date, Dataset, MFDataset
import numpy as np
import matplotlib.pyplot as plt
from sys import getsizeof
from glob import glob

# filepaths
home_dir = '/mnt/norgay/Datasets/'
pinatubo_home_dir = home_dir+'Pinatubo/Total_Effect_Estimation/Data/'
obs_home_dir = home_dir+'Climate/Berkeley_Earth/'

obs_dir = obs_home_dir+'Raw_NetCDF_1deg_x_1deg/TAVG/'
regridded_dir = obs_home_dir+'regridded_NetCDF_MPI-ESM-LR-grid/TAVG/'
mod_hist_dir = pinatubo_home_dir+'raw/MPI-ESM-LR/tas/'
mod_fut_dir = pinatubo_home_dir+'bias_corrected/'

mod_hist_prefix = mod_hist_dir+'tas_day_MPI-ESM-LR_historical_'

In [73]:
## settings

# vars
mod_var = 'tas'
clim_var = 'climatology'
obs_var = 'temperature'

# time range
ref_start_date = 1975
ref_end_date = 2005


In [74]:
## functions
def rounddown(flt, divisor=10):
    return int(flt/divisor)*divisor

def roundup(flt,divisor=10):
    return int(np.ceil(float(flt)/divisor)*divisor)

def temporal_slice_by_year(ds,year_start,year_end):
    start_date = dt.datetime(year_start,1,1)
    end_date = dt.datetime(year_end+1,1,1)

    timeStart = min(np.nonzero(ds.times >= start_date)[0])
    timeEnd = max(np.nonzero(ds.times <= end_date)[0])
    ds.times = ds.times[timeStart:timeEnd + 1]
    ds.values = ds.values[timeStart:timeEnd + 1, :]
    
    return ds
    
def round_datetime_to_hour(dt_obj):
    dt_plus = dt_obj + dt.timedelta(minutes=30)
    return dt_plus.replace(minute=0,second=0, microsecond=0)
# make it able to apply to numpy arrays
round_datetime_to_hour = np.vectorize(round_datetime_to_hour)

In [None]:
# get list of historical model output files to use
r*i1p1_18500101-18591231.nc'

In [75]:
# get list of observation netCDFs
start = rounddown(ref_start_date,10)
stop = roundup(ref_end_date,10)
obs_files = [obs_dir+'Complete_TAVG_Daily_LatLong1_{}.nc'.format(y) for y in range(start,stop,10)]
# obs_files = obs_files[:2]    # for testing only (comment out for running all files)

# get climatology
ds_clim = Dataset(obs_files[0],'r')
clim = ds_clim.variables['climatology'][:] + 273.15      # convert to Kelvin

# get reference grid
ds_grid = local.load_file(glob(mod_hist_path_pattern)[0],mod_var)
lats = ds_grid.lats
lons = ds_grid.lons
del ds_grid

In [76]:
# load anomaly observations 
ds_obs = local.load_dataset_from_multiple_netcdf_files(
    obs_var, time_name='date_number', file_list=obs_files)

NC file 1/4 /mnt/norgay/Datasets/Climate/Berkeley_Earth/Raw_NetCDF_1deg_x_1deg/TAVG/Complete_TAVG_Daily_LatLong1_1970.nc
NC file 2/4 /mnt/norgay/Datasets/Climate/Berkeley_Earth/Raw_NetCDF_1deg_x_1deg/TAVG/Complete_TAVG_Daily_LatLong1_1980.nc
NC file 3/4 /mnt/norgay/Datasets/Climate/Berkeley_Earth/Raw_NetCDF_1deg_x_1deg/TAVG/Complete_TAVG_Daily_LatLong1_1990.nc
NC file 4/4 /mnt/norgay/Datasets/Climate/Berkeley_Earth/Raw_NetCDF_1deg_x_1deg/TAVG/Complete_TAVG_Daily_LatLong1_2000.nc


In [77]:
# data is calculated as anomolies, so add back on climatology
days_of_year = np.array([],dtype=int)
for t in obs_files:
    days_of_year = np.append(days_of_year,Dataset(t).variables['day_of_year'][:].astype(int))
    
for x,d in enumerate(days_of_year):
    ds_obs.values[x,:,:] = ds_obs.values[x,:,:] + clim[d-1,:,:]

In [78]:
# regrid
ds_obs = dsp.spatial_regrid(ds_obs,lats,lons)

# convert times to 12-noon on day of interest
ds_obs.times = round_datetime_to_hour(ds_obs.times)

# save
ds_obs.units = 'K'
dsp.write_netcdf(ds_obs,regridded_dir+'tas_BEST_1970-2010.nc')

In [40]:
## ONLY USE TO RELOAD WITHOUT RE-REGRIDDING (COMMENT OUT IF RUNNING FROM SCRATCH)
# ds_obs = local.load_file(regridded_dir+'tas_BEST_1970-2010.nc','temperature')

In [80]:
# load historical (reference) model
ds_mod_hist = local.load_multiple_files(mod_hist_path_pattern, variable_name='tas')[0]

# slice only relevant dates
ds_mod_hist = [temporal_slice_by_year(ds,ref_start_date,ref_end_date) for ds in ds_mod_hist]

ValueError: min() arg is an empty sequence

In [82]:
temporal_slice_by_year(ds_mod_hist[0],1975,1985)

ValueError: min() arg is an empty sequence

In [17]:
## take temporal subset by month

# first slice 
# for i in range(1,13):
for i in [1]:
    obs_mon = dsp.temporal_subset(i,i,ds_obs)
    mod_hist_mon = [dsp.temporal_subset(i,i,r) for r in ds_mod_hist]

  target_dataset.times[time_index],
  dout = self.data[indx]
  dout._mask = _mask[indx]


In [19]:
mod_hist_mon[0].times

array([datetime.datetime(1850, 1, 1, 12, 0),
       datetime.datetime(1850, 1, 2, 12, 0),
       datetime.datetime(1850, 1, 3, 12, 0),
       datetime.datetime(1850, 1, 4, 12, 0),
       datetime.datetime(1850, 1, 5, 12, 0),
       datetime.datetime(1850, 1, 6, 12, 0),
       datetime.datetime(1850, 1, 7, 12, 0),
       datetime.datetime(1850, 1, 8, 12, 0),
       datetime.datetime(1850, 1, 9, 12, 0),
       datetime.datetime(1850, 1, 10, 12, 0),
       datetime.datetime(1850, 1, 11, 12, 0),
       datetime.datetime(1850, 1, 12, 12, 0),
       datetime.datetime(1850, 1, 13, 12, 0),
       datetime.datetime(1850, 1, 14, 12, 0),
       datetime.datetime(1850, 1, 15, 12, 0),
       datetime.datetime(1850, 1, 16, 12, 0),
       datetime.datetime(1850, 1, 17, 12, 0),
       datetime.datetime(1850, 1, 18, 12, 0),
       datetime.datetime(1850, 1, 19, 12, 0),
       datetime.datetime(1850, 1, 20, 12, 0),
       datetime.datetime(1850, 1, 21, 12, 0),
       datetime.datetime(1850, 1, 22, 12, 0

TODO:
    - regrid (and save?)
    - temporal subsets by month
    - load all of MPI reference model
    - create adjustment weights
    - load each MPI projection timeslice
    - bias correct