In [1]:
import sys
import os
# Adjust the path to point to your src directory
sys.path.insert(0, os.path.abspath('../../src'))

from icpoes.analyser import ConcentrationAnalyser
from icpoes.check_stds import check_standards
import xarray as xr
import re

In [2]:
import xarray as xr

def get_labels_by_letter(dataset: xr.Dataset, coord: str, letters: list, inverse: bool = False) -> list:
    """
    Get a list of labels from an xarray dataset coordinate that start with any given letters.

    Parameters:
    - dataset: xr.Dataset, the dataset containing the coordinate.
    - coord: str, the name of the coordinate to filter.
    - letters: list of str, the starting letters to filter labels.
    - inverse: bool, if True, return labels that do NOT start with any of the given letters.

    Returns:
    - list of labels satisfying the condition.
    """
    labels = dataset.coords[coord].values
    if inverse:
        return [str(label) for label in labels if not any(str(label).startswith(letter) for letter in letters)]
    else:
        return [str(label) for label in labels if any(str(label).startswith(letter) for letter in letters)]


In [3]:
import xarray as xr

def replace_and_multiply_entries(ds1, ds2, labels_to_replace, multiplier):
    """
    Replaces entries in ds1 with entries from ds2 for the specified labels,
    after multiplying the variables in ds2 by the provided multiplier.
    
    Parameters:
    -----------
    ds1 : xarray.Dataset
        The original dataset.
    ds2 : xarray.Dataset
        The dataset containing replacement entries.
    labels_to_replace : list
        A list of labels indicating which entries to replace.
    multiplier : numeric
        The factor by which to multiply the variables in ds2 for replacement.
        
    Returns:
    --------
    xarray.Dataset
        The updated dataset with replacements applied.
    """
    # Drop entries in ds1 with labels in labels_to_replace
    ds1_dropped = ds1.drop_sel(sample_name=labels_to_replace)
    
    # Select replacement entries from ds2 and multiply by the multiplier
    ds2_replacements = ds2.sel(sample_name=labels_to_replace) * multiplier
    
    # Concatenate the modified ds1 with the updated replacement entries along 'label'
    ds_updated = xr.concat([ds1_dropped, ds2_replacements], dim="sample_name")
    
    # Optionally sort by the 'label' coordinate if order matters
    ds_updated = ds_updated.sortby("sample_name")
    
    return ds_updated



In [4]:
soils = '../../data/soils.nc'
deccan_soils = '../../data/deccan_soils.nc'
sukinda = '../../data/sukinda.nc'

sukinda_ds = xr.open_dataset(sukinda)
deccan_ds = xr.open_dataset(deccan_soils)
soils_ds = xr.open_dataset(soils)

sukinda_reruns = get_labels_by_letter(soils_ds, 'sample_name', 'S')
# Drop non-Sukinda sample labels
sukinda_reruns = [s for s in sukinda_reruns if re.fullmatch(r'S\d+', s)]

# Put diluted reruns into sukinda dataset
sukinda_ds = replace_and_multiply_entries(sukinda_ds, soils_ds, sukinda_reruns, 15)
extraneous_labels = get_labels_by_letter(sukinda_ds, 'sample_name', 'S', inverse=True)
non_sample_s_labels = get_labels_by_letter(sukinda_ds, 'sample_name', 'S')
extraneous_labels = extraneous_labels + [s for s in non_sample_s_labels if not re.fullmatch(r'S\d+', s)]
sukinda_ds = sukinda_ds.drop_sel(sample_name=extraneous_labels)

deccan_ds = replace_and_multiply_entries(deccan_ds, soils_ds, 'D10', 15)
soils_ds.drop_sel(sample_name='D10')

In [5]:
standard_names = ['NIST 1640a_', 'Cam-Tap-Water_', 'SPS-SW2 10%_', 'SLRS-6_']
standards = get_labels_by_letter(soils_ds, 'sample_name', standard_names)
calibration_line = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

soils_ds = soils_ds.drop_sel(sample_name=standards)
soils_ds = soils_ds.drop_sel(sample_name=sukinda_reruns)
soils_ds = soils_ds.drop_sel(sample_name=calibration_line)

In [6]:
standard_names = ['SPS-SW2 10%_', 'SLRS-6_', 'Cam-Tap-Water_']
standards = get_labels_by_letter(deccan_ds, 'sample_name', standard_names)
t_samples = get_labels_by_letter(deccan_ds, 'sample_name', 'T')
deccan_ds = deccan_ds.drop_sel(sample_name=standards)
deccan_ds = deccan_ds.drop_sel(sample_name=t_samples)
deccan_ds = deccan_ds.drop_sel(sample_name=calibration_line)
deccan_ds = deccan_ds.drop_sel(sample_name=['BW', 'Filter Blank', 'Blank'])
deccan_ds

In [7]:
sukinda_ds.to_netcdf('../../results/sukinda_cleaned.nc')
deccan_ds.to_netcdf('../../results/deccan_cleaned.nc')
soils_ds.to_netcdf('../../results/soils_cleaned.nc')