# Exercise Idea
Download the daily global sea surface temperature data for a specific region of interest using the MUR-JPL-L4-GLOB-v4.1 dataset. Mask out the land area from the dataset and save the data as netCDF files.


## Dataset Information
Dataset Name: MUR-JPL-L4-GLOB-v4.1 Aggregation <br>
Data Provider: NASA Jet Propulsion Laboratory (JPL) <br>
Data Source: GHRSST Global High Resolution Sea Surface Temperature (SST) <br>
Description: The MUR-JPL-L4-GLOB-v4.1 is a gridded Level 4 analysis dataset that merges multi-sensor observations from various satellites and in-situ platforms to provide a global coverage of the sea surface temperature. This dataset is available at 1 km resolution and has a temporal resolution of one day. <br>
Website: https://podaac.jpl.nasa.gov/Multi-scale_Ultra-high_Resolution_MUR-SST

In [None]:
# Imports
import numpy as np
from siphon.catalog import TDSCatalog
import xarray as xr
from xarray.backends import NetCDF4DataStore
import netCDF4
from datetime import datetime, timedelta
from global_land_mask import globe

# Set the path to the folder that you want you data to end up in
save_folder_path = '/path/to/your/data/'

# Downloading Data and Pre-processing
# Choose the dataset from the THREDDS catalog
ds = TDSCatalog('https://thredds.jpl.nasa.gov/thredds/catalog_ghrsst_gds2.xml')
ds = ds.datasets['MUR-JPL-L4-GLOB-v4.1 Aggregation']

# Temporal settings
start_year = 2020
start_month = 4
n_years = 4
n_days = 61

# Spatial settings
north=60
south=54
west=7
east=15

# Start the query
ncss = ds.subset()
query = ncss.query()

for j in range(n_years):
    for i in range(n_days):

        query.time_range(datetime(start_year+j,start_month,1) + timedelta(days=i),
                         datetime(start_year+j,start_month,1) + timedelta(days=1+i)) # time range between now and one day ahead - this is not available - only available at one day before resolution  

        query.lonlat_box(north=north, south=south, east=east, west=west) # choose lat and lon boundaries
        query.variables('analysed_sst')   

        sst = ncss.get_data(query)
        sst = xr.open_dataset(NetCDF4DataStore(sst))
        sst['analysed_sst']=(('time', 'lat', 'lon'), sst.analysed_sst.data-273.15)

        # mask land
        lat,lon=np.meshgrid(sst.lat,sst.lon)
        is_in_ocean = globe.is_ocean(lat, lon)
        masked_sst=(sst.analysed_sst.data.squeeze()*is_in_ocean.T)
        masked_sst[masked_sst==0.]=np.nan
        masked_sst_labels=xr.DataArray(data=masked_sst[np.newaxis,:,:],
                                dims=['time','lat','lon'])
        sst['masked_sst']=masked_sst_labels

        # save sst data to netcdf, you can choose any naming scheme you like
        sst.to_netcdf(save_folder_path + 'sst_{}.nc'.format((sst.time.dt.strftime("%Y%m%d")).values.tolist()[0]))