# Post-processing MARIS seawater dump

In [None]:
import pandas as pd
from fastcore.xtras import L
from pathlib import Path

In [None]:
fname = 'pro/data/maris/2023-03-28 MARIS all seawater data.xlsx'
df = pd.read_excel(Path.home() / fname)

In [None]:
df.columns

In [None]:
df.head()

## Default export

In [None]:
cols_of_interest = ['sample_id', 'DisplayLat', 'DisplayLong',
                    'sampdepth', 'profile_id', 'begperiod',
                    'nusymbol', 'unit_corr','activity_corr',
                    'uncertaint_corr', 'ref_id'] 

cols_name = ['smp_id', 'lat', 'lon', 
             'depth', 'profile_id', 'begperiod', 
             'nusymbol', 'unit', 'activity',
             'uncertainty', 'ref_id']

In [None]:
df_selected = df[cols_of_interest]
df_selected.columns = cols_name

In [None]:
df_selected.head()

In [None]:
df_selected = df_selected[df_selected['activity'] >= 0]

In [None]:
df_selected = df_selected[df_selected['depth'] >= 0]

In [None]:
df_selected[df_selected['nusymbol'] == '137Cs']['activity'].max()

In [None]:
df_selected.info()

In [None]:
out_fname = Path.home() / 'pro/data/maris/maris-seawater-2023-04-15.csv'
df_selected.to_csv(out_fname, index=False)

## LaMer clustering

In [None]:
df.columns

In [None]:
# Select 137Cs only
df_selected = df[df.nusymbol == '137Cs']; df_selected.shape

In [None]:
# Re-assign missing depth values (-1) to 0
df_selected.loc[df_selected.sampdepth == -1, 'sampdepth'] = 0

In [None]:
# Select only depth <= 100 m 
df_selected = df_selected[df_selected.sampdepth <= 100]; df_selected.shape

In [None]:
# Select only IHO areas of interest
# To be added: north of iceland, ...
iho_of_interest = [
    'Bay of Biscay',
    'Bristol Channel',
    'Caribbean Sea',
    'Celtic Sea',
    'Gulf of Guinea',
    'Gulf of Mexico',
    'Inner Seas off the West Coast of Scotland',
    'Labrador Sea',
    'North Atlantic Ocean',
    'English Channel',
    "Irish Sea and St. George's Channel",
    'North Sea',
    'Skagerrak',
    'Baltic Sea',
    'Gulf of Bothnia',
    'Gulf of Finland',
    'Gulf of Riga',
    'Kattegat'
    ]

df_selected = df_selected[df_selected.areaname.isin(iho_of_interest)]; df_selected.shape

In [None]:
df_selected.head()

In [None]:
# Excluding filtered water
df_selected = df_selected[df_selected.filtered != 'Y']; df_selected.shape


In [None]:
cols_of_interest = ['sample_id', 'DisplayLat', 'DisplayLong', 'areaname', 'area_id',
                    'sampdepth', 'profile_id', 'begperiod',
                    'nusymbol', 'unit_corr','activity_corr',
                    'uncertaint_corr', 'ref_id'] 

In [None]:
cols_name = ['smp_id', 'lat', 'lon', 'area_name', 'area_id',
             'depth', 'profile_id', 'begperiod', 
             'nusymbol', 'unit', 'activity',
             'uncertainty', 'ref_id']

In [None]:
df_selected.head()

In [None]:
df_selected = df_selected[cols_of_interest]
df_selected.columns = cols_name

In [None]:
df_selected.head()

In [None]:
bad_loc = ((df_selected['lat'] == 0) & (df_selected['lon'] == 0)); bad_loc


In [None]:
df_selected = df_selected[~bad_loc]

In [None]:
out_fname = Path.home() / 'pro/data/maris/maris-seawater-lamer-2023-07-25.csv'
df_selected.to_csv(out_fname, index=False)