# Loading

> Data loading utilities

In [1]:
#| default_exp loading

In [14]:
#| export
import pandas as pd
import fastcore.all as fc
from pathlib import Path
import re

In [20]:
# | export
def load_mir(src_dir):
    """
    Load MIR spectra of K spiked soil samples.
    
    Parameters:
    src_dir (Path-like object): Directory containing the spectra files.
    
    Returns:
    tuple: Tuple containing the array of absorbance values, 
           array of wavenumbers (columns), and array of sample names (rows).
    """
    pattern = r'-\d-\d$'
    fnames = [f for f in src_dir.ls() if re.search(pattern, f.stem)]
    
    dfs = [pd.read_csv(fname, header=None, names=['wavenumber', 'absorbance'])
           .query('649 < wavenumber < 4000')
           .assign(name=fname.stem) for fname in fnames]
    
    df_combined = pd.concat(dfs).pivot_table(values='absorbance', index='name', columns='wavenumber')
    
    return df_combined.values, df_combined.columns.values, df_combined.index.values.astype('U')


For example:

In [21]:
# |eval: false
src_dir = Path().home() / 'pro/data/k-spiking/mir'
X, wavenumbers, names = load_mir(src_dir)

In [22]:
# | export
def load_nir(fname):
    """
    Load NIR spectra of K spiked soil samples.
    
    Parameters:
    fname (str or Path-like object): File name or path of the Excel file.
    
    Returns:
    tuple: Tuple containing the array of spectral values, 
           array of wavenumbers (columns), and array of sample names (rows).
    """
    df = pd.read_excel(fname, sheet_name='Results', index_col='Sample ID')
    df.index.name = 'name'
    df.columns.name = 'wavenumber'
    
    return df.values, df.columns.values, df.index.values.astype('U')

For example:

In [23]:
# |eval: false
fname = Path().home() / 'pro/data/k-spiking/nir/2023-12-8 _FT-NIR-K-spiked soil.xlsx'
X, wavenumbers, names = load_nir_kex_spike(fname)

In [24]:
#| hide
import nbdev; nbdev.nbdev_export()