In [None]:
#| default_exp utils

# Utilities
> Various utilities

In [None]:
#| export
from pathlib import Path
from netCDF4 import Dataset
from fastcore.test import test_eq
import fastcore.all as fc
import pandas as pd
import numpy as np
import requests
from shapely import MultiPoint
from operator import attrgetter
from dataclasses import dataclass

from marisco.configs import species_lut_path, sediments_lut_path

import jellyfish as jf
from collections.abc import Callable

## Callbacks

In [None]:
#| export
class Callback(): order = 0

In [None]:
#| export
def run_cbs(cbs, obj=None):
    for cb in sorted(cbs, key=attrgetter('order')):
        if cb.__doc__: obj.logs.append(cb.__doc__)
        cb(obj)

In [None]:
#| export
class Transformer():
    def __init__(self, dfs, cbs=None): 
        fc.store_attr()
        self.logs = []
        
    def callback(self):
        run_cbs(self.cbs, self)
        
    def __call__(self):
        self.callback()
        return self.dfs

## Validation

In [None]:
#| export
def has_valid_varname(
    var_names:list, # variable names
    cdl_path:str, # Path to MARIS CDL file (point of truth)
    group = None, # Check if the variable names is contained in the group
):
    "Check that proposed variable names are in MARIS CDL"
    has_valid = True
    
    if group != None:
        with Dataset(cdl_path) as nc:
            # Get variable names for group in CDL 
            grp_keys = nc.groups[group].variables.keys() # get any group
    else:
        with Dataset(cdl_path) as nc:
            # Get variable names in CDL for all groups
            grp_keys=[]
            for grp in nc.groups.values():
                grp_keys.extend(list(grp.variables.keys()))
            # Get unique 
            grp_keys = list(set(grp_keys))        
        
    # Check if var_names is in keys
    for name in var_names:
        if name not in grp_keys:
            has_valid = False
            if group != None:
                print(f'"{name}" variable name not found in group "{group}" of MARIS CDL')
            else:
                print(f'"{name}" variable name not found in MARIS CDL')
        return has_valid             

In [None]:
VARNAMES = ['lat', 'lon']
test_eq(has_valid_varname(VARNAMES, './files/nc/maris-cdl.nc'), True)

In [None]:
VARNAMES = ['ba140_invalid', 'ba140_dl']
test_eq(has_valid_varname(VARNAMES, './files/nc/maris-cdl.nc'), False)

"ba140_invalid" variable name not found in MARIS CDL


TODO (NEED TO BE DISCUSSED): 

    - update `./files/nc/maris-cdl.nc` to include bio_group

In [None]:
# VARNAMES = ['bio_group', 'body_part', 'species']
# test_eq(has_valid_varname(VARNAMES, './files/nc/maris-cdl.nc'), True)

In [None]:
# VARNAMES = ['bio_group', 'body_part', 'species']
# test_eq(has_valid_varname(VARNAMES,  nc_tpl_path()), True)

## Geoprocessing

In [None]:
#| export
def get_bbox(df,
             coord_cols=('lon', 'lat')
            ):
    x, y = coord_cols        
    arr = [(row[x], row[y]) for _, row in df.iterrows()]
    return MultiPoint(arr).envelope

In [None]:
df = pd.DataFrame({'lon': np.linspace(-10, 5, 20), 'lat':  np.linspace(40, 50, 20)})
bbox = get_bbox(df);

In [None]:
# To get `lon_min`, `lon_max`, `lat_min`, `lat_max`
bbox.bounds

(-10.0, 40.0, 5.0, 50.0)

In [None]:
# And its Well-Know Text representation
bbox.wkt

'POLYGON ((-10 40, 5 40, 5 50, -10 50, -10 40))'

In [None]:
# If unique (lon, lat)
df = pd.DataFrame({'lon': [0, 0], 'lat':  [1, 1]})
bbox = get_bbox(df);

In [None]:
bbox.bounds

(0.0, 1.0, 0.0, 1.0)

## Downloaders

In [None]:
#| export
def download_files_in_folder(owner:str, 
                             repo:str, 
                             src_dir:str, 
                             dest_dir:str
                             ):
    "Make a GET request to the GitHub API to get the contents of the folder"
    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
    response = requests.get(url)

    if response.status_code == 200:
        contents = response.json()

        # Iterate over the files and download them
        for item in contents:
            if item["type"] == "file":
                fname = item["name"]
                download_file(owner, repo, src_dir, dest_dir, fname)
    else:
        print(f"Error: {response.status_code}")

def download_file(owner, repo, src_dir, dest_dir, fname):
    # Make a GET request to the GitHub API to get the raw file contents
    url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
    response = requests.get(url)

    if response.status_code == 200:
        # Save the file locally
        with open(Path(dest_dir) / fname, "wb") as file:
            file.write(response.content)
        print(f"{fname} downloaded successfully.")
    else:
        print(f"Error: {response.status_code}")

## WorRMS
The [World Register of Marine Species (WorMS)](https://www.marinespecies.org) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to "fuzzy" match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described [here](https://www.marinespecies.org/tutorial_taxonmatch.php).

In [None]:
#| export
def match_worms(
    name:str # Name of species to look up in WoRMS
    ):
    "Lookup `name` in WoRMS (fuzzy match)"
    url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
    params = {
        'scientificnames[]': [name],
        'marine_only': 'true'
    }
    headers = {
        'accept': 'application/json'
    }
    
    response = requests.get(url, params=params, headers=headers)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return -1

For instance:

In [None]:
#|eval: false
match_worms('Aristeus antennatus')

[[{'AphiaID': 107083,
   'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
   'scientificname': 'Aristeus antennatus',
   'authority': '(Risso, 1816)',
   'status': 'accepted',
   'unacceptreason': None,
   'taxonRankID': 220,
   'rank': 'Species',
   'valid_AphiaID': 107083,
   'valid_name': 'Aristeus antennatus',
   'valid_authority': '(Risso, 1816)',
   'parentNameUsageID': 106807,
   'kingdom': 'Animalia',
   'phylum': 'Arthropoda',
   'class': 'Malacostraca',
   'order': 'Decapoda',
   'family': 'Aristeidae',
   'genus': 'Aristeus',
   'citation': 'DecaNet eds. (2024). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2024-06-10',
   'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
   'isMarine': 1,
   'isBrackish': 0,
   'isFreshwater': 0,
   'isTerrestrial': 0,
   'isExtinct': 0,
   'match_type': 'exact',
   'modified': '2022-08-24T09:48:1

In [None]:
# open dbo_species
#from tqdm import tqdm
#results = []
#species = pd.read_excel(species_lut_path()).species
#for i, name in tqdm(enumerate(species), total=len(species)):
#    if i > 1:
#        worms_match = match_worms(name)
#        if worms_match != -1:
#            results.append(worms_match[0][0])

In [None]:
# np.unique(np.array([result['phylum'] for result in results]))

array(['Annelida', 'Arthropoda', 'Bryozoa', 'Chaetognatha', 'Charophyta',
       'Chlorophyta', 'Chordata', 'Cnidaria', 'Ctenophora',
       'Echinodermata', 'Mollusca', 'Myzozoa', 'Ochrophyta', 'Porifera',
       'Rhodophyta', 'Tracheophyta'], dtype='<U13')

In [None]:
#len(maris_worms_matches)

In [None]:
#maris_worms_matches = fc.load_pickle('./files/pkl/maris-worms-matches.pkl')

In [None]:
#np.unique(np.array([result['phylum'] for result in maris_worms_matches]))

In [None]:
#len([result for result in maris_worms_matches if result['status'] == 'accepted'])

## Marisco look-up table fuzzy matching
Using https://jamesturk.github.io/jellyfish fuzzy matching distance metrics.


In [None]:
#| export
@dataclass
class Match:
    matched_id: int
    matched_maris_name: str
    source_name: str
    match_score: int

In [None]:
#| export
def match_maris_lut(
    lut_path: str, # Path to MARIS species authoritative species look-up table
    data_provider_name: str, # Name of data provider nomenclature item to look up 
    maris_id: str, # Id of MARIS lookup table nomenclature item to match
    maris_name: str, # Name of MARIS lookup table nomenclature item to match
    dist_fn: Callable = jf.levenshtein_distance, # Distance function
    nresults: int = 10 # Maximum number of results to return
) -> pd.DataFrame:
    """
    Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, ...).
    """
    df = pd.read_excel(lut_path)
    df = df.dropna(subset=[maris_name])
    df = df.astype({maris_id: 'int'})

    # Vectorized operation to calculate the distance between the input name and all names in the DataFrame
    df['score'] = df[maris_name].str.lower().apply(lambda x: dist_fn(data_provider_name.lower(), x))

    # Sort the DataFrame by score and select the top nresults
    df = df.sort_values(by='score', ascending=True)[:nresults]

    # Select the id and name columns and return the DataFrame
    return df[[maris_id, maris_name, 'score']]

Below an example trying to match the name "PLANKTON" with `dbo_species_cleaned.xlsx` MARIS biota species lookup table:

In [None]:
lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
match_maris_lut(lut_fname, data_provider_name='PLANKTON', 
                maris_id='species_id', maris_name='species')

Unnamed: 0,species_id,species,score
281,280,Plankton,0
696,695,Zooplankton,3
633,632,Palaemon,4
697,696,Phytoplankton,5
812,811,Chanos,5
160,159,Neuston,5
234,233,Penaeus,6
1458,1457,Lamnidae,6
1438,1437,Labrus,6
1527,1526,Favites,6


Below an example trying to match the name "GLACIAL" with dbo_sedtype.xlsx MARIS sediment lookup table:

In [None]:
# sediments_lut_path()
lut_fname = '../files/lut/dbo_sedtype.xlsx'
match_maris_lut(lut_fname, data_provider_name='GLACIAL', 
                maris_id='sedtype_id', maris_name='sedtype')

Unnamed: 0,sedtype_id,sedtype,score
26,25,Glacial,0
3,2,Gravel,4
2,1,Clay,5
51,50,Glacial clay,5
4,3,Marsh,6
7,6,Sand,6
13,12,Silt,6
15,14,Sludge,6
27,26,Soft,7
52,51,Soft clay,7


In [None]:
#def match_maris_species(
#    lut_path:str, # Path to MARIS species authoritative species look-up table
#    name:str, # Name of species to look up 
#    col_lookup:str='species', # Name of the column where the character strings match
#    dist_fn:Callable=jf.levenshtein_distance, # Jellyfish distance to use
#    coi:list=['species_id', 'species', 'Taxonname', 'TaxonDBID'], # Columns of interest to display
#    nresults:int=10 # Maximum number of results to return
#    ):
#    "Fuzzy matching biota species provided by the data provider and MARIS one."
#    df = pd.read_excel(lut_path)
#    df = df.dropna(subset=col_lookup)
#    df = df.astype({'species_id':'int'})
#    results = []
#    for _, row in df.iterrows():
#        score = dist_fn(name.lower(), row[col_lookup].lower())
#        result = row[coi].to_dict()
#        result['score'] = score
#        results.append(result)
#    return pd.DataFrame(results).sort_values(by='score', ascending=True)[:nresults]

In [None]:
# species_lut_path()
#lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
#match_maris_species(lut_fname, 'PLANKTON')

In [None]:
#def match_maris_sediment(
#    name:str, # Name of sediment to look up 
#    col_lookup:str='sedtype', # Name of the column where the character strings match
#    dist_fn:Callable=jf.levenshtein_distance, # Jellyfish distance to use
#    coi:list=['sedtype_id', 'sedtype'], # Columns of interest to display
#    nresults:int=10 # Maxiumn number of results to return
#    ):
#    "Fuzzy matching sediments type provided by the data provider and MARIS one."
#    df = pd.read_excel(sediments_lut_path())
#    df = df.dropna(subset=col_lookup)
#    df = df.astype({'sedtype_id':'int'})
#    results = []
#    for _, row in df.iterrows():
#        score = dist_fn(name.lower(), row[col_lookup].lower())
#        result = row[coi].to_dict()
#        result['score'] = score
#        results.append(result)
#    return pd.DataFrame(results).sort_values(by='score', ascending=True)[:nresults]

In [None]:
#match_maris_sediment('GLACIAL')

In [None]:
#| export
def has_valid_varname(
    var_names:list, # variable names
    cdl_path:str, # Path to MARIS CDL file (point of truth)
    group = None, # Check if the variable names is contained in the group
):
    "Check that proposed variable names are in MARIS CDL"
    has_valid = True
    with Dataset(cdl_path) as nc:
        cdl_vars={}
        all_vars=[]
        # get variable names in CDL 
        for grp in nc.groups.values():
            # Create a list of var for each group
            vars = list(grp.variables.keys())
            cdl_vars[grp.name] = vars
            all_vars.extend(vars)
        
    if group != None:
        allowed_vars= cdl_vars[group]
    else: 
        # get unique 
        allowed_vars = list(set(all_vars))
        
    for name in var_names:
        if name not in allowed_vars:
            has_valid = False
            if group != None:
                print(f'"{name}" variable name not found in group "{group}" of MARIS CDL')
            else:
                print(f'"{name}" variable name not found in MARIS CDL')
    return has_valid    

In [None]:
# test=has_valid_varname(VARNAMES, nc_tpl_path(),'seawater')
# test

In [None]:
VARNAMES = ['lat', 'lon']
test_eq(has_valid_varname(VARNAMES, './files/nc/maris-cdl.nc','seawater'), True)

In [None]:
# VARNAMES = ['ba140_invalid', 'ba140_dl']
# test_eq(has_valid_varname(VARNAMES, nc_tpl_path()), False)

"ba140_invalid" variable name not found in MARIS CDL


In [None]:
# VARNAMES = ['bio_group', 'body_part', 'species']
# test_eq(has_valid_varname(VARNAMES, nc_tpl_path()), True)

## Geoprocessing

In [None]:
#| export
def get_bbox(df,
             coord_cols=('lon', 'lat')
            ):
    x, y = coord_cols        
    arr = [(row[x], row[y]) for _, row in df.iterrows()]
    return MultiPoint(arr).envelope

In [None]:
df = pd.DataFrame({'lon': np.linspace(-10, 5, 20), 'lat':  np.linspace(40, 50, 20)})
bbox = get_bbox(df);

In [None]:
# To get `lon_min`, `lon_max`, `lat_min`, `lat_max`
bbox.bounds

(-10.0, 40.0, 5.0, 50.0)

In [None]:
# And its Well-Know Text representation
bbox.wkt

'POLYGON ((-10 40, 5 40, 5 50, -10 50, -10 40))'

In [None]:
# If unique (lon, lat)
df = pd.DataFrame({'lon': [0, 0], 'lat':  [1, 1]})
bbox = get_bbox(df);

In [None]:
bbox.bounds

(0.0, 1.0, 0.0, 1.0)

## Downloaders

In [None]:
#| export
def download_files_in_folder(owner:str, 
                             repo:str, 
                             src_dir:str, 
                             dest_dir:str
                             ):
    "Make a GET request to the GitHub API to get the contents of the folder"
    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
    response = requests.get(url)

    if response.status_code == 200:
        contents = response.json()

        # Iterate over the files and download them
        for item in contents:
            if item["type"] == "file":
                fname = item["name"]
                download_file(owner, repo, src_dir, dest_dir, fname)
    else:
        print(f"Error: {response.status_code}")

def download_file(owner, repo, src_dir, dest_dir, fname):
    # Make a GET request to the GitHub API to get the raw file contents
    url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
    response = requests.get(url)

    if response.status_code == 200:
        # Save the file locally
        with open(Path(dest_dir) / fname, "wb") as file:
            file.write(response.content)
        print(f"{fname} downloaded successfully.")
    else:
        print(f"Error: {response.status_code}")

## WorRMS
The [World Register of Marine Species (WorMS)](https://www.marinespecies.org) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to "fuzzy" match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described [here](https://www.marinespecies.org/tutorial_taxonmatch.php).

In [None]:
#| export
def match_worms(
    name:str # Name of species to look up in WoRMS
    ):
    "Lookup `name` in WoRMS (fuzzy match)"
    url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
    params = {
        'scientificnames[]': [name],
        'marine_only': 'true'
    }
    headers = {
        'accept': 'application/json'
    }
    
    response = requests.get(url, params=params, headers=headers)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return -1

For instance:

In [None]:
#|eval: false
match_worms('Aristeus antennatus')

[[{'AphiaID': 107083,
   'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
   'scientificname': 'Aristeus antennatus',
   'authority': '(Risso, 1816)',
   'status': 'accepted',
   'unacceptreason': None,
   'taxonRankID': 220,
   'rank': 'Species',
   'valid_AphiaID': 107083,
   'valid_name': 'Aristeus antennatus',
   'valid_authority': '(Risso, 1816)',
   'parentNameUsageID': 106807,
   'kingdom': 'Animalia',
   'phylum': 'Arthropoda',
   'class': 'Malacostraca',
   'order': 'Decapoda',
   'family': 'Aristeidae',
   'genus': 'Aristeus',
   'citation': 'DecaNet eds. (2024). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2024-06-10',
   'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
   'isMarine': 1,
   'isBrackish': 0,
   'isFreshwater': 0,
   'isTerrestrial': 0,
   'isExtinct': 0,
   'match_type': 'exact',
   'modified': '2022-08-24T09:48:1

In [None]:
# open dbo_species
#from tqdm import tqdm
#results = []
#species = pd.read_excel(species_lut_path()).species
#for i, name in tqdm(enumerate(species), total=len(species)):
#    if i > 1:
#        worms_match = match_worms(name)
#        if worms_match != -1:
#            results.append(worms_match[0][0])

In [None]:
# np.unique(np.array([result['phylum'] for result in results]))

array(['Annelida', 'Arthropoda', 'Bryozoa', 'Chaetognatha', 'Charophyta',
       'Chlorophyta', 'Chordata', 'Cnidaria', 'Ctenophora',
       'Echinodermata', 'Mollusca', 'Myzozoa', 'Ochrophyta', 'Porifera',
       'Rhodophyta', 'Tracheophyta'], dtype='<U13')

In [None]:
#len(maris_worms_matches)

In [None]:
#maris_worms_matches = fc.load_pickle('./files/pkl/maris-worms-matches.pkl')

In [None]:
#np.unique(np.array([result['phylum'] for result in maris_worms_matches]))

In [None]:
#len([result for result in maris_worms_matches if result['status'] == 'accepted'])

## Marisco look-up table fuzzy matching
Using https://jamesturk.github.io/jellyfish fuzzy matching distance metrics.


In [None]:
#| export
@dataclass
class Match:
    matched_id: int
    matched_maris_name: str
    source_name: str
    match_score: int

In [None]:
#| export
def match_maris_lut(
    lut_path: str, # Path to MARIS species authoritative species look-up table
    data_provider_name: str, # Name of data provider nomenclature item to look up 
    maris_id: str, # Id of MARIS lookup table nomenclature item to match
    maris_name: str, # Name of MARIS lookup table nomenclature item to match
    dist_fn: Callable = jf.levenshtein_distance, # Distance function
    nresults: int = 10 # Maximum number of results to return
) -> pd.DataFrame:
    """
    Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, ...).
    """
    df = pd.read_excel(lut_path)
    df = df.dropna(subset=[maris_name])
    df = df.astype({maris_id: 'int'})

    # Vectorized operation to calculate the distance between the input name and all names in the DataFrame
    df['score'] = df[maris_name].str.lower().apply(lambda x: dist_fn(data_provider_name.lower(), x))

    # Sort the DataFrame by score and select the top nresults
    df = df.sort_values(by='score', ascending=True)[:nresults]

    # Select the id and name columns and return the DataFrame
    return df[[maris_id, maris_name, 'score']]

Below an example trying to match the name "PLANKTON" with `dbo_species_cleaned.xlsx` MARIS biota species lookup table:

In [None]:
lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
match_maris_lut(lut_fname, data_provider_name='PLANKTON', 
                maris_id='species_id', maris_name='species')

Unnamed: 0,species_id,species,score
281,280,Plankton,0
696,695,Zooplankton,3
633,632,Palaemon,4
697,696,Phytoplankton,5
812,811,Chanos,5
160,159,Neuston,5
234,233,Penaeus,6
1458,1457,Lamnidae,6
1438,1437,Labrus,6
1527,1526,Favites,6


Below an example trying to match the name "GLACIAL" with dbo_sedtype.xlsx MARIS sediment lookup table:

In [None]:
# sediments_lut_path()
lut_fname = '../files/lut/dbo_sedtype.xlsx'
match_maris_lut(lut_fname, data_provider_name='GLACIAL', 
                maris_id='sedtype_id', maris_name='sedtype')

Unnamed: 0,sedtype_id,sedtype,score
26,25,Glacial,0
3,2,Gravel,4
2,1,Clay,5
51,50,Glacial clay,5
4,3,Marsh,6
7,6,Sand,6
13,12,Silt,6
15,14,Sludge,6
27,26,Soft,7
52,51,Soft clay,7


In [None]:
#def match_maris_species(
#    lut_path:str, # Path to MARIS species authoritative species look-up table
#    name:str, # Name of species to look up 
#    col_lookup:str='species', # Name of the column where the character strings match
#    dist_fn:Callable=jf.levenshtein_distance, # Jellyfish distance to use
#    coi:list=['species_id', 'species', 'Taxonname', 'TaxonDBID'], # Columns of interest to display
#    nresults:int=10 # Maximum number of results to return
#    ):
#    "Fuzzy matching biota species provided by the data provider and MARIS one."
#    df = pd.read_excel(lut_path)
#    df = df.dropna(subset=col_lookup)
#    df = df.astype({'species_id':'int'})
#    results = []
#    for _, row in df.iterrows():
#        score = dist_fn(name.lower(), row[col_lookup].lower())
#        result = row[coi].to_dict()
#        result['score'] = score
#        results.append(result)
#    return pd.DataFrame(results).sort_values(by='score', ascending=True)[:nresults]

In [None]:
# species_lut_path()
#lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
#match_maris_species(lut_fname, 'PLANKTON')

In [None]:
#def match_maris_sediment(
#    name:str, # Name of sediment to look up 
#    col_lookup:str='sedtype', # Name of the column where the character strings match
#    dist_fn:Callable=jf.levenshtein_distance, # Jellyfish distance to use
#    coi:list=['sedtype_id', 'sedtype'], # Columns of interest to display
#    nresults:int=10 # Maxiumn number of results to return
#    ):
#    "Fuzzy matching sediments type provided by the data provider and MARIS one."
#    df = pd.read_excel(sediments_lut_path())
#    df = df.dropna(subset=col_lookup)
#    df = df.astype({'sedtype_id':'int'})
#    results = []
#    for _, row in df.iterrows():
#        score = dist_fn(name.lower(), row[col_lookup].lower())
#        result = row[coi].to_dict()
#        result['score'] = score
#        results.append(result)
#    return pd.DataFrame(results).sort_values(by='score', ascending=True)[:nresults]

In [None]:
#match_maris_sediment('GLACIAL')