# Filter Matrix Directory

> directory class.

In [None]:
#| default_exp utils.filter_mat_dir

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import os
from pathlib import Path
from dataclasses import dataclass, field, KW_ONLY
from typing import Optional, List, ClassVar

### Filter Matrix Directory Viewer

In [None]:
#| export
import pandas as pd

In [None]:
#| export
from iza.static import (
    ADATA, MATRIX, BARCODES, FEATURES, EXT_H5, EXT_MTX, EXT_TSV,
    GENE_SYMBOL, ENSEMBL_ID
)
from iza.types import (
    AnnData
)
from tqdm.auto import tqdm

In [None]:
#| export
try: 
    import scanpy as sc, scprep

    # NOTE: Directory defined in _02_utils/_02_directory.ipynb
    @dataclass
    class FilterMatrixDirectory(Directory):
        _: KW_ONLY
        ADATA_FILE: ClassVar[str] = f'{ADATA}{EXT_H5}'
        MATRIX_FILE: ClassVar[str] = f'{MATRIX}{EXT_MTX}'
        BARCODES_FILE: ClassVar[str] = f'{BARCODES}{EXT_TSV}'
        FEATURES_FILE: ClassVar[str] = f'{FEATURES}{EXT_TSV}'
        

        def __post_init__(self):    
            try:
                if not self.has_adata:
                    self.make_adata()
            except Exception as err:
                raise err

        def __repr__(self):
            base = os.path.basename(self.dirname)
            srep = f'FilteredMatrix(valid: {self.is_valid}, adata: {self.has_adata})'        
            srep += '\n'
            srep += super(FilterMatrixDirectory, self).__repr__()
            return srep
                    
        @property
        def adata_filename(self) -> str:
            return os.path.join(self.dirname, self.ADATA_FILE)

        @property
        def matrix_filename(self) -> str:
            return os.path.join(self.dirname, self.MATRIX_FILE)
        
        @property
        def barcodes_filename(self) -> str:
            return os.path.join(self.dirname, self.BARCODES_FILE)
        
        @property
        def features_filename(self) -> str:
            return os.path.join(self.dirname, self.FEATURES_FILE)

        @property
        def has_adata(self) -> bool:
            return os.path.isfile(self.adata_filename)

        @property
        def has_matrix(self) -> bool:
            return os.path.isfile(self.matrix_filename)

        @property
        def has_barcodes(self) -> bool:
            return os.path.isfile(self.barcodes_filename)

        @property
        def has_features(self) -> bool:
            return os.path.isfile(self.features_filename)

        @property
        def is_valid(self) -> bool:
            return all([self.has_matrix, self.has_barcodes, self.has_features])

        def make_adata(self) -> AnnData:
            if self.has_adata:
                return

            steps = (FEATURES, BARCODES, MATRIX, 'combine', ADATA)
            
            desc = os.path.basename(self.dirname)

            steps = tqdm(steps, desc=desc, leave=True)        
            for step in steps:
                steps.set_postfix(stage=step)
                match step:
                    case 'features':
                        features = pd.read_csv(self.features_filename, sep='\t', header=None)
                        features.columns = [ENSEMBL_ID, GENE_SYMBOL, 'feature_type']
                        features.index = pd.Series(features.ensembl_id.copy().values)

                    case 'barcodes':
                        barcodes = pd.read_csv(self.barcodes_filename, sep='\t', header=None)
                        barcodes.columns = [BARCODES]
                        barcodes.index = pd.Series(barcodes.barcodes.copy().values)

                    case 'matrix':
                        matrix = scprep.io.load_mtx(self.matrix_filename, sparse=True).T

                    case 'combine':
                        data = pd.DataFrame.sparse.from_spmatrix(
                            matrix, columns=features.index, index = barcodes.index
                        )
                        del matrix

                    case 'adata':
                        adata = sc.AnnData(X=data.values, obs=barcodes, var=features, dtype='float32')
                        adata.write(self.adata_filename)

                    case _:
                        pass

            return adata

        def get_adata(self) -> AnnData:
            adata = sc.read_h5ad(self.adata_filename)
            return adata

except ImportError as err:
    @dataclass
    class FilterMatrixDirectory(Directory):
        _: KW_ONLY
        ADATA_FILE: ClassVar[str] = f'{ADATA}{EXT_H5}'
        MATRIX_FILE: ClassVar[str] = f'{MATRIX}{EXT_MTX}'
        BARCODES_FILE: ClassVar[str] = f'{BARCODES}{EXT_TSV}'
        FEATURES_FILE: ClassVar[str] = f'{FEATURES}{EXT_TSV}'
        

        def __post_init__(self):    
            raise ImportError('FilterMatrixDirectory requires scprep and scanpy to be installed')
        
    pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()