# Read data and create timeseries using PICES LME

Look at SST, ocean currents, chl-a

In [None]:
import xarray as xr
#import numpy as np
#from math import pi
#import datetime as dt
#import os
#from os.path import exists
import matplotlib.pyplot as plt
#import cartopy.crs as ccrs
#from datetime import *; from dateutil.relativedelta import *
#from scipy.interpolate import make_interp_spline, BSpline
#from scipy.signal import savgol_filter
#import sys
#import geopandas as gpd
import cartopy.crs as ccrs
#from cartopy.examples.arrows import sample_data
#from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
#from matplotlib.figure import Figure


# Read in PICES mask

In [None]:
#read in mask file
filename = './data/PICES_all_mask.nc'
ds_pices = xr.open_dataset(filename)
ds_pices.close()
ds_pices.region_mask.plot(vmin=11,vmax=26)

In [None]:
aggr_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst2Agg'
ds = xr.open_dataset(aggr_url).isel(altitude=0).drop({'anom','err','ice'})
ds

In [None]:
sst_climatology = ds.groupby('time.month').mean('time')


In [None]:
sst_anomalies = sst_climatology.groupby('time.month') - climatology
sst_anomalies

In [None]:
#url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/pmlEsaCCI31OceanColorDaily'
aggr_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/pmlEsaCCI31OceanColorMonthly'
ds = xr.open_dataset(aggr_url)
ds

In [None]:
ds = ds.chlor_a.sel(latitude=slice(90,20))
ds
#chl_climatology = ds.chlor_a.groupby('time.month').mean('time')
#chl_anomalies = ds.chlor_a.groupby('time.month') - climatology


In [None]:
aggr_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/erdlasFnWind10'
ds = xr.open_dataset(aggr_url)
wnd_climatology = ds.groupby('time.month').mean('time')
wnd_anomalies = sst_climatology.groupby('time.month') - climatology

In [None]:
aggr_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplOscar_LonPM180'
ds = xr.open_dataset(aggr_url)
cur_climatology = ds.groupby('time.month').mean('time')
cur_anomalies = sst_climatology.groupby('time.month') - climatology

In [None]:
dir_data_oscar = 'F:/data/sat_data/oscar/L4/oscar_third_deg/'
dir_data_sss='F:/data/model_data/CMEM/global-reanalysis-phy-001-030-monthly/'
dir_data_sst = 'F:/data/sst/cmc/CMC0.2deg/v2/monthly/' 
dir_data_ccmp='F:/data/sat_data/ccmp/v02.0/'
dir_data_mld='F:/data/model_data/godas/'
dir_figs = 'F:/data/NASA_biophysical/pices/figures/'
dir_timeseries = 'F:/data/NASA_biophysical/timeseries_data/'
dir_shp = 'F:/data/NASA_biophysical/pices/shapefiles/'
#oscar - reran all monthly fies & climatology, updated through 2018, 2/1/2019
#ccmp - added RVort, from 6-hourly creating new monthly and climatology 4/2/2019 
#sst - updated all files, reprocessed monthly, climatology 2/1/2019
#sss - rerunning 2018, cal climatology 2/2/2019
#ssh - re running 2018 , cal climatology 2/2/2019
#mld - reran climatology, downloaded final 2018 file 2/1/2019

def weighted_mean_of_subset(ds,data_in,data_in2,cond):
    #ds = input xarray data to have weighted mean
    #data_in = ds.data some data variable that has a nan mask applied where no data lat,lon dims
    #subset condition
    R = 6.37e6 #radius of earth in m
    # we know already that the spacing of the points is 1/4 degree latitude
    grid_dy,grid_dx = (ds.lat[0]-ds.lat[1]).data,(ds.lon[0]-ds.lon[1]).data
    dϕ = np.deg2rad(grid_dy)
    dλ = np.deg2rad(grid_dx)
    dA = R**2 * dϕ * dλ * np.cos(np.deg2rad(ds.lat)) #dA.plot()
    pixel_area = dA.where(cond)  #pixel_area.plot()
    pixel_area = pixel_area.where(np.isfinite(data_in) & np.isfinite(data_in2))
    total_ocean_area = pixel_area.sum(dim=('lon', 'lat'))
    data_weighted_mean = (ds * pixel_area).sum(dim=('lon', 'lat')) / total_ocean_area
    return data_weighted_mean


def get_climatology_filename(data_type):
    if data_type=='oscar':
        filename = dir_data_oscar + 'climatology_2000_2018_monthly_data_oscar.nc'        
    if data_type=='sss' or data_type=='ssh':
        filename = dir_data_sss + 'clim/climatology_1993_2017_mercatorglorys12v1_gl12_mean.nc'
    if data_type=='sst':
        filename = dir_data_sst + 'monthly_climatology_1992_2017_120000-CMC-L4_GHRSST-SSTfnd-CMC0.2deg-GLOB-v02.0-fv02.0.nc'
    if data_type == 'ccmp':
        filename = dir_data_ccmp + 'monthly/climatology_1988_2018_CCMP_Wind_Analysis_L3.0.nc'
    if data_type == 'mld':
        filename = dir_data_mld + 'monthly_climatology_dbss_obml_1992_2018.nc'
    return filename

def get_data_filename(data_type,lyr):
    if data_type == 'oscar':
        filename = dir_data_oscar + str(lyr) + 'monthly_data_oscar.nc'
    if data_type=='sss' or data_type=='ssh':
        filename = dir_data_sss + str(lyr) + '/'+ 'year_subset_mercatorglorys12v1_gl12_mean_' + str(lyr) + '.nc'
        if lyr==2018:
            filename = 'F:/data/model_data/CMEM/global-analysis-forecast-phys_001_015/monthly/year_subset_metoffice_coupled_orca025_GL4_SAL_b2018_dm20180208.nc'
    if data_type=='sst':
        filename = dir_data_sst + str(lyr) + 'monthly_average_' + '120000-CMC-L4_GHRSST-SSTfnd-CMC0.2deg-GLOB-v02.0-fv02.0.nc'
    if data_type == 'ccmp':
        filename = dir_data_ccmp + 'monthly/CCMP_Wind_Analysis_' + str(lyr) + '_V02.0_L3.0_RSS.nc'
    if data_type == 'mld':
        filename = dir_data_mld + 'dbss_obml.' + str(lyr) + '.nc'
    return filename

def get_monthly_oscar(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('oscar',lyr)
    else:
        filename = get_climatology_filename('oscar')
    print(filename)
    ds=xr.open_dataset(filename)
    ds = ds.sel(lon=slice(20.0,379.9))
    ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon').sortby('lat')
#    ds = ds.rename({'spd': 'data'})
#    ds = ds.mean('depth')  #get rid of depth in index
    ds['spd']=(ds.u**2+ds.v**2)**.5
    ds['dir']=np.arctan2(ds.v,ds.u)* 180./pi
  #  ds=ds.drop('year')
    ds.close()
    return ds

def get_monthly_mld(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('mld',lyr)
    else:
        filename = get_climatology_filename('mld')
    print(filename)
    ds=xr.open_dataset(filename)
    ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon').sortby('lat')
 #   ds = ds.rename({'dbss_obml': 'data'})
    ds.close()
    return ds

def get_monthly_ccmp(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('ccmp',lyr)
    else:
        filename = get_climatology_filename('ccmp')
    print(filename)
    ds=xr.open_dataset(filename)
    ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon').sortby('lat')
    ds['spd']=(ds.uwnd**2+ds.vwnd**2)**.5
    ds['dir']=np.arctan2(ds.vwnd,ds.uwnd)* 180./pi    
#    ds = ds.rename({'spd': 'data'})
    ds.close()
    return ds

def get_monthly_sst(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('sst',lyr)
    else:
        filename = get_climatology_filename('sst')
    print(filename)
    ds=xr.open_dataset(filename)
 #   ds = ds.rename({'analysed_sst': 'data'})
    ds.close()
    return ds

def get_monthly_sss(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('sss',lyr)
    else:
        filename = get_climatology_filename('sss')
    if lyr<2018:
        ds=xr.open_dataset(filename,drop_variables=['mlotst','bottomT','sithick','siconc','usi','vsi','thetao','uo','vo','zos'])
    else:
        ds=xr.open_dataset(filename,drop_variables=['zos'])
    print(filename)
#    ds = ds.rename({'so': 'data'})
    ds.close()
    return ds

def get_monthly_ssh(lyr,iclim):
    if iclim==0:
        filename = get_data_filename('sss',lyr)  #same file as sss
    else:
        filename = get_climatology_filename('sss') #same file as sss
    print(filename)
    if lyr<2018:
        ds=xr.open_dataset(filename,drop_variables=['mlotst','bottomT','sithick','siconc','usi','vsi','thetao','uo','vo','so'])
    else:
        ds=xr.open_dataset(filename,drop_variables=['so'])
   # ds = ds.rename({'zos': 'data'})
    ds.close()
    return ds



In [None]:
darray = ['oscar','mld','ccmp','sst','sss','ssh']
dir_shp = './data/'
data_dir = './data/'
data_fig = './figures/'

#read in mask file
filename = './data/PICES_all_mask.nc'
ds_pices = xr.open_dataset(filename)
ds_pices.close()

#region = np.arange(0,21)
for itype in range(0,6):
    init_data = 0
    dtype = darray[itype]
    for lyr in range(1993,2019):
        iclim = 0
        if itype == 0:
            ds = get_monthly_oscar(lyr,0)
            ds = ds.drop({'spd','dir'})
            ds2 = get_monthly_oscar(lyr,1)
            ds2=xr.Dataset(data_vars={'v': (('time','lat','lon'),ds2.v),
                                      'u': (('time','lat','lon'),ds2.u)},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.u#[0,:,:]
            data_in2 = ds2.u#[0,:,:]
        if itype == 1:
            ds = get_monthly_mld(lyr,0)
            ds2 = get_monthly_mld(lyr,1)
            ds2=xr.Dataset(data_vars={'dbss_obml': (('time','lat','lon'),ds2.dbss_obml),},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.dbss_obml#[0,:,:]
            data_in2 = ds2.dbss_obml#[0,:,:]
        if itype == 2:
            ds = get_monthly_ccmp(lyr,0)
            ds = ds.drop({'spd','dir'})
            ds2 = get_monthly_ccmp(lyr,1)
            ds2=xr.Dataset(data_vars={'vwnd': (('time','lat','lon'),ds2.vwnd),
                                      'uwnd': (('time','lat','lon'),ds2.uwnd),
                                      'rel_vorticity': (('time','lat','lon'),ds2.rel_vorticity)},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.uwnd#[0,:,:]
            data_in2 = ds2.uwnd#[0,:,:]
        if itype == 3:
            ds = get_monthly_sst(lyr,0)
            ds2 = get_monthly_sst(lyr,1)
            ds2=xr.Dataset(data_vars={'analysed_sst': (('time','lat','lon'),ds2.analysed_sst),},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.analysed_sst#[0,:,:]
            data_in2 = ds2.analysed_sst#[0,:,:]
        if itype == 4:
            ds = get_monthly_sss(lyr,0)
            ds2 = get_monthly_sss(lyr,1)
            ds2=ds2.drop('depth') #0.49m
            ds2=xr.Dataset(data_vars={'so': (('time','lat','lon'),ds2.so),},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.so#[0,:,:]
            data_in2 = ds2.so#[0,:,:]
        if itype == 5:
            ds = get_monthly_ssh(lyr,0)
            ds2 = get_monthly_ssh(lyr,1)
            ds2=ds2.drop('depth') #0.49m
            ds2=xr.Dataset(data_vars={'zos': (('time','lat','lon'),ds2.zos),},
                            coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
            data_in = ds.zos#[0,:,:]
            data_in2 = ds2.zos#[0,:,:]
#now iterate over regions
        init_data2 = 0
        coord_region=[]
        for iregion in range(11,25):
            mask_interp = ds_pices.interp_like(ds,method='nearest')
            cond = (mask_interp.region_mask==iregion)
            ds_mean = weighted_mean_of_subset(ds,data_in,data_in2,cond)
            ds_mean_clim = weighted_mean_of_subset(ds2,data_in,data_in2,cond)
            ds_anom = ds_mean - ds_mean_clim
            if itype == 0:  #if currents or winds need to recal spd dir from means of u and v
                ds_mean['spd']=(ds_mean.u**2+ds_mean.v**2)**.5
                ds_mean['dir']=np.arctan2(ds_mean.v,ds_mean.u)* 180./pi                
                ds_anom['spd']=(ds_anom.u**2+ds_anom.v**2)**.5
                ds_anom['dir']=np.arctan2(ds_anom.v,ds_anom.u)* 180./pi                
                ds_mean_clim['spd']=(ds_mean_clim.u**2+ds_mean_clim.v**2)**.5
                ds_mean_clim['dir']=np.arctan2(ds_mean_clim.v,ds_mean_clim.u)* 180./pi                
            if itype==2:  #if currents or winds need to recal spd dir from means of u and v
                ds_mean['spd']=(ds_mean.uwnd**2+ds_mean.vwnd**2)**.5
                ds_mean['dir']=np.arctan2(ds_mean.vwnd,ds_mean.uwnd)* 180./pi                
                ds_anom['spd']=(ds_anom.uwnd**2+ds_anom.vwnd**2)**.5
                ds_anom['dir']=np.arctan2(ds_anom.vwnd,ds_anom.uwnd)* 180./pi                
                ds_mean_clim['spd']=(ds_mean_clim.uwnd**2+ds_mean_clim.vwnd**2)**.5
                ds_mean_clim['dir']=np.arctan2(ds_mean_clim.vwnd,ds_mean_clim.uwnd)* 180./pi                
            if init_data2==0:
                ds_box = ds_mean
                ds_box_clim = ds_mean_clim
                ds_box_anom = ds_anom
                coord_region.append(iregion)
                init_data2=1
            else:
                ds_box = xr.concat([ds_box,ds_mean],dim='region')
                ds_box_clim = xr.concat([ds_box_clim,ds_mean_clim],dim='region')
                ds_box_anom = xr.concat([ds_box_anom,ds_anom],dim='region')
                coord_region.append(iregion)
        if init_data==0:
            ds_newbox = ds_box
            ds_newbox_clim = ds_box_clim
            ds_newbox_anom = ds_box_anom
            init_data=1
        else:
            ds_newbox = xr.concat([ds_newbox,ds_box],dim='time')
            ds_newbox_clim = xr.concat([ds_newbox_clim,ds_box_clim],dim='time')
            ds_newbox_anom = xr.concat([ds_newbox_anom,ds_box_anom],dim='time')
        #print(ds_newbox.box)
    ds_newbox.coords['region']=coord_region
    ds_newbox_clim.coords['region']=coord_region
    ds_newbox_anom.coords['region']=coord_region
    print(ds_newbox)
    
    filename_out = dir_timeseries + dtype + '_pices_data_v2.nc'
    ds_newbox.to_netcdf(filename_out)
      filename_out_clim = dir_timeseries + dtype + '_pices_data_minus_clim_v2.nc'
    ds_newbox_anom.to_netcdf(filename_out_clim)
    df = ds_newbox_anom.to_dataframe()
    filename_out = dir_timeseries + dtype + '_pices_data_minus_clim_v2.csv'
    df.to_csv(filename_out)
    filename_out_clim = dir_timeseries + dtype + '_pices_data_clim_v2.nc'
    ds_newbox_clim.to_netcdf(filename_out_clim)
    df = ds_newbox_clim.to_dataframe()
    filename_out = dir_timeseries + dtype + '_pices_data_clim_v2.csv'
    df.to_csv(filename_out)
        
    print('out!')


# old testing code
this was used to fine tune the masking using in the weighted mean calculation

In [None]:
lyr=2005
ds = get_monthly_sst(lyr,0)
ds2 = get_monthly_sst(lyr,1)
ds2=xr.Dataset(data_vars={'analysed_sst': (('time','lat','lon'),ds2.analysed_sst),},
                coords={'lat':ds2.lat,'lon':ds2.lon,'time':ds.time})
data_in = ds.analysed_sst#[0,:,:]
data_in2 = ds2.analysed_sst#[0,:,:]
mask_interp = ds_pices.interp_like(ds,method='nearest')
iregion = 14
cond = (mask_interp.region_mask==iregion)
ds_mean = weighted_mean_of_subset(ds,data_in,data_in2,cond)
ds_mean_clim = weighted_mean_of_subset(ds2,data_in,data_in2,cond)
print(ds_mean.analysed_sst.data-273.15)
print(ds_mean_clim.analysed_sst.data-273.15)


R = 6.37e6 #radius of earth in m
# we know already that the spacing of the points is 1/4 degree latitude
grid_dy,grid_dx = (ds.lat[0]-ds.lat[1]).data,(ds.lon[0]-ds.lon[1]).data
dϕ = np.deg2rad(grid_dy)
dλ = np.deg2rad(grid_dx)
dA = R**2 * dϕ * dλ * np.cos(np.deg2rad(ds.lat)) #dA.plot()
pixel_area = dA.where(cond)/1000000  #pixel_area.plot()  PUT into km2
pixel_area = pixel_area.where(np.isfinite(data_in) & np.isfinite(data_in2))
total_ocean_area = pixel_area.sum(dim=('lon', 'lat'))
data_weighted_mean = (ds2 * pixel_area).sum(dim=('lon', 'lat')) / total_ocean_area
print(data_weighted_mean.analysed_sst.data-273.15)

