In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import sys
import pandas as pd

data_dir = 'F:/data/NASA_biophysical/pices/shapefiles/LME/'


filename_lme = data_dir+'all_mask.nc'
inital_date = '1980-01-01'
final_date = '2020-02-01'

filename_noaa = 'F:/data/sst/noaa_oisst/sst.mnmean.nc'
filename_cmc = 'F:/data/sst/cmc/CMC0.2deg/v2/monthly/'

def weighted_mean_of_data(data_in,data_cond):
    import numpy as np
    import xarray as xr
    #data_in = input xarray data to have weighted mean
    #LME mask T or F values
    global_attrs = data_in.attrs
    R = 6.37e6 #radius of earth in m
    grid_dy,grid_dx = (data_in.lat[0]-data_in.lat[1]).data,(data_in.lon[0]-data_in.lon[1]).data
    dϕ = np.deg2rad(grid_dy)
    dλ = np.deg2rad(grid_dx)
    dA = R**2 * dϕ * dλ * np.cos(np.deg2rad(data_in.lat)) 
    pixel_area = dA.where(data_cond)  #pixel_area.plot()
    #pixel_area = pixel_area.where(np.isfinite(data_mask))
    sum_data=(data_in*pixel_area).sum(dim=('lon', 'lat'),keep_attrs=True)
    total_ocean_area = ((data_in/data_in)*pixel_area).sum(dim=('lon', 'lat'))
    #print(sum_data)
    #print(total_ocean_area)
    data_weighted_mean = sum_data/total_ocean_area
    data_weighted_mean.attrs = global_attrs  #save global attributes
    for a in data_in:                      #set attributes for each variable in dataset
        gatt = data_in[a].attrs
        data_weighted_mean[a].attrs=gatt

    return data_weighted_mean


## Read in LME mask

In [None]:
ds_mask = xr.open_dataset(filename_lme)
ds_mask.close()
print(ds_mask)

In [None]:
ds_mask.region_mask.plot()
print(ds_mask.LME_names)

In [None]:
ds_mask.region_mask.where(ds_mask.region_mask==3).plot()

## Read in CMC 30 years of data

In [None]:
ds=xr.open_mfdataset(filename_cmc+'*.nc',combine='nested',concat_dim='time').drop({'sq_sst','mask'})#,'vgosa','err'}).rename({'latitude':'lat','longitude':'lon'})
ds.close()
print(ds)

In [None]:
ds.analysed_sst[10,:,:].plot()

In [None]:
ds_mask = xr.open_dataset(filename_lme)
ds_mask.close()
ds_mask

In [None]:
#interpolate mask
mask_interp = ds_mask.interp_like(ds,method='nearest')

#mask data using climatology so that each month has the same data
ds_climatology = ds.groupby('time.month').mean('time',keep_attrs=True,skipna=False)
ds_masked = ds.groupby('time.month').where(ds_climatology>-3)

for ilme in range(67):

    #create mean for lme region
    cond = (mask_interp.region_mask==ilme)
    tem = weighted_mean_of_data(ds_masked,cond)
    data_mean=tem.assign_coords(region=ilme)

    #make climatology and anomalies using .groupby method
    data_climatology = data_mean.sel(time=slice('1993-01-01','2019-12-31')).groupby('time.month').mean('time',keep_attrs=True,skipna=False)
    data_anomaly = data_mean.groupby('time.month') - data_climatology
    global_attributes = ds.attrs
    data_anomaly.attrs = global_attributes

    if ilme==0:
        ds_ts = data_mean
        ds_anom = data_anomaly
    else:
        ds_ts = xr.concat([ds_ts, data_mean], dim='region')
        ds_anom = xr.concat([ds_anom, data_anomaly], dim='region')
ds_ts = ds_ts.assign_coords(region_name=ds_mask.LME_names)
ds_anom = ds_anom.assign_coords(region_name=ds_mask.LME_names)
tem=ds_ts.isel(region=slice(2,67)).transpose().load()
df = pd.DataFrame(data=tem.analysed_sst.data-273.15,index=tem.time.data,columns=ds_ts.region_name[2:67])
df.to_csv('F:/data/NASA_biophysical/LME_timeseries/cmc_sst_v1.csv')
ds_ts.to_netcdf('F:/data/NASA_biophysical/LME_timeseries/cmc_sst_v1.nc')
tem=ds_anom.isel(region=slice(2,67)).transpose().load()
df = pd.DataFrame(data=tem.analysed_sst.data,index=tem.time.data,columns=ds_ts.region_name[2:67])
df.to_csv('F:/data/NASA_biophysical/LME_timeseries/cmc_sst_anom_v1.csv')
ds_ts.to_netcdf('F:/data/NASA_biophysical/LME_timeseries/cmc_sst_anom_v1.nc')


In [None]:
#ds_ts.analysed_sst[1,:].plot(lw=3)
#ds_anom.analysed_sst[1,:].plot()

In [None]:
ds=xr.open_dataset('F:/data/sst/noaa_oisst/sst.mnmean.nc')#.drop({'sq_sst','mask'})#,'vgosa','err'}).rename({'latitude':'lat','longitude':'lon'})
ds.close()
ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
ds = ds.sortby(ds.lon).rename({'sst':'analysed_sst'})
print(ds)

In [None]:
#interpolate mask
mask_interp = ds_mask.interp_like(ds,method='nearest')

#mask data using climatology so that each month has the same data
ds_climatology = ds.groupby('time.month').mean('time',keep_attrs=True,skipna=False)
ds_masked = ds.groupby('time.month').where(ds_climatology>-3)

for ilme in range(67):

    #create mean for lme region
    cond = (mask_interp.region_mask==ilme)
    tem = weighted_mean_of_data(ds_masked,cond)
    data_mean=tem.assign_coords(region=ilme)

    #make climatology and anomalies using .groupby method
    data_climatology = data_mean.sel(time=slice('1854-01-01','2019-12-31')).groupby('time.month').mean('time',keep_attrs=True,skipna=False)
    data_anomaly = data_mean.groupby('time.month') - data_climatology
    global_attributes = ds.attrs
    data_anomaly.attrs = global_attributes

    if ilme==0:
        ds_ts = data_mean
        ds_anom = data_anomaly
    else:
        ds_ts = xr.concat([ds_ts, data_mean], dim='region')
        ds_anom = xr.concat([ds_anom, data_anomaly], dim='region')
ds_ts = ds_ts.assign_coords(region_name=ds_mask.LME_names)
ds_anom = ds_anom.assign_coords(region_name=ds_mask.LME_names)
tem=ds_ts.isel(region=slice(2,67)).transpose().load()
df = pd.DataFrame(data=tem.analysed_sst.data,index=tem.time.data,columns=ds_ts.region_name[2:67])
df.to_csv('F:/data/NASA_biophysical/LME_timeseries/noaa_oisst_v1.csv')
ds_ts.to_netcdf('F:/data/NASA_biophysical/LME_timeseries/noaa_oisst_v1.nc')
tem=ds_anom.isel(region=slice(2,67)).transpose().load()
df = pd.DataFrame(data=tem.analysed_sst.data,index=tem.time.data,columns=ds_ts.region_name[2:67])
df.to_csv('F:/data/NASA_biophysical/LME_timeseries/noaa_oisst_anom_v1.csv')
ds_ts.to_netcdf('F:/data/NASA_biophysical/LME_timeseries/noaa_oisstt_anom_v1.nc')
