## Load satellite and remote sensing data
 - This will compromise the driver data for the XGB machine learning algorithm

In [3]:
%matplotlib inline

import numpy as np
import pandas as pd
import xarray as xr
# import xarray.ufuncs as xu
# import cmocean as cm
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import AutoMinorLocator
# import cartopy.crs as ccrs
# import cartopy.feature as cfeature
import os

### Clean up satellite data

In [4]:
dir_raw = '/home/jovyan/MLGeo_Autumn2022_MMStoll/Final_Project/Data/'
date_range = ['1994-01','2007-11']

In [5]:
#SST
#https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.html
ds_sst = xr.open_dataset(f'{dir_raw}/sst.mnmean.nc')
ds_sst_NP = ds_sst.sel(time = slice(f'{date_range[0]}',f'{date_range[1]}'))

In [6]:
#SSS
# https://www.metoffice.gov.uk/hadobs/en4/download-en4-2-2.html

In [7]:
#xCO2
#https://gml.noaa.gov/ccgg/mbl/data.php
ds_xco2 = pd.read_csv(f'{dir_raw}/co2_GHGreference.388125861_surface.txt', sep = " ",skiprows=90)
ds_xco2

Unnamed: 0.1,Unnamed: 0,1994.208333,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,355.8640,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 484,Unnamed: 485,364.7190,Unnamed: 487,Unnamed: 488,Unnamed: 489,Unnamed: 490,Unnamed: 491,Unnamed: 492,0.1630.2
0,,1994.229167,,,,,355.8930,,,,...,,,364.7580,,,,,,,0.1280
1,,1994.250000,,,,,355.9210,,,,...,,,364.7030,,,,,,,0.1020
2,,1994.270833,,,,,355.9480,,,,...,,,364.5820,,,,,,,0.0900
3,,1994.291667,,,,,355.9780,,,,...,,,364.4310,,,,,,,0.0900
4,,1994.312500,,,,,356.0100,,,,...,,,364.2850,,,,,,,0.1020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653,,2007.833333,,,,,382.2090,,,,...,,,383.7660,,,,,,,0.1250
654,,2007.854167,,,,,382.1960,,,,...,,,384.9030,,,,,,,0.1560
655,,2007.875000,,,,,382.1800,,,,...,,,385.9360,,,,,,,0.1790
656,,2007.895833,,,,,382.1630,,,,...,,,386.8680,,,,,,,0.1910


### Load satellite data

In [None]:
def load_satellite_data(dir_data='/home/jovyan/MLGeo_Autumn2022_MMStoll/Final_Project/LDEO_HPD_MMS/Data/'):
    '''
    load_satellite_data(dir_data='/home/jovyan/MLGeo_Autumn2022_MMStoll/Final_Project/LDEO_HPD_MMS/Data/')
        loads a dataset with satellite observations
    '''
    # Observations dictionary
    dict_data = {'spco2': f'{dir_data}/spco2_1x1_mon_SOCATv2019_199801-201712.nc', 
                 'sst': f'{dir_data}/sst_1x1_mon_NOAAOIv2_199801-201712.nc',
                 'sss': f'{dir_data}/sss_1x1_mon_EN421_199801-201712.nc',
                 'chl': f'{dir_data}/chl_1x1_mon_globColour_199801-201712.nc',
                 'mld': f'{dir_data}/mld_1x1_clim_deBoyer_199801-201712.nc',
                 'xco2': f'{dir_data}/xco2_1x1_mon_globalview_199801-201712.nc'}

    ###  Load all variables into common dataset
    ds_obs = xr.merge([xr.open_dataset(dict_data['sst']),
                       xr.open_dataset(dict_data['sss']),
                       xr.open_dataset(dict_data['chl']),
                       xr.open_dataset(dict_data['mld']),
                       xr.open_dataset(dict_data['xco2']),
                       xr.open_dataset(dict_data['spco2'])])
    
    return ds_obs

In [None]:
def load_inputs(dir_data='/local/data/artemis/observations/neural_net_data/inputs_1982_2017'):
    '''
    load_satellite_date(dir_data='/local/data/artemis/observations/neural_net_data')
        loads a dataset with satellite observations
    '''
    # Observations dictionary
    dict_data = {'spco2': f'{dir_data}/spco2_1x1_mon_SOCATv2019_198201-201712.nc', 
                 'sst': f'{dir_data}/sst_1x1_mon_NOAAOIv2_198201-201712.nc',
                 'sss': f'{dir_data}/sss_1x1_mon_EN421_198201-201712.nc',
                 'chl': f'{dir_data}/chl_1x1_mon_globColour_198201-201712.nc',
                 'mld': f'{dir_data}/mld_1x1_clim_deBoyer_198201-201712.nc',
                 'xco2': f'{dir_data}/xco2_1x1_mon_globalview_198201-201712.nc'}

    ###  Load all variables into common dataset
    ds_obs = xr.merge([xr.open_dataset(dict_data['sst']),
                       xr.open_dataset(dict_data['sss']),
                       xr.open_dataset(dict_data['chl']),
                       xr.open_dataset(dict_data['mld']),
                       xr.open_dataset(dict_data['xco2']),
                       xr.open_dataset(dict_data['spco2'])])
    
    return ds_obs