# Preprocessing

> Data preprocessing utilities.

In [1]:
#| default_exp preprocessing

In [24]:
#| export
import fastcore.all as fc
from pathlib import Path
from sklearn.base import BaseEstimator, TransformerMixin
from typing import List
import numpy as np

In [5]:
#|export
class MeanCenter(BaseEstimator, TransformerMixin):
    "Mean center spectra."
    def fit(self, X, y=None): return self
    def transform(self, X, y=None):     
        return X - X.mean(axis=1, keepdims=True) 

In [38]:
#|export
class MeanReplicates(BaseEstimator, TransformerMixin):
    "Select replicated of specified spectra and average their spiked versions."
    def __init__(self, 
                 smp_name:str, # Sample of interest: 'LUI', 'SPA1' or 'TM4.1'.
                 names:List, # Names of scanned samples and replicates.
                ):        
        fc.store_attr()
        
    def _mean(self, X, substring):
        return X[[substring in name for name in self.names],:].mean(axis=0)
    
    def fit(self, X, y=None): return self
        
    def transform(self, X, y=None):     
        X =  np.array([self._mean(X, f'{self.smp_name}-{idx}') for idx in [0, 1, 2, 3]])
        return X - X.mean(axis=1, keepdims=True)

In [39]:
#| hide
import nbdev; nbdev.nbdev_export()