Skip to content

Commit

Permalink
feat(gunc): Add method to parse contig_assignments file.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronmussig committed Mar 29, 2022
1 parent 3d69636 commit 625c597
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/source/gunc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ Progenes reference DB

.. autoclass:: magna.gunc.GuncAllScores
:members:

Helper functions
----------------

.. autofunction:: magna.gunc.read_contig_assignments_tsv

17 changes: 16 additions & 1 deletion magna/gunc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@
from magna.util.io import download_file


def read_contig_assignments_tsv(path: str) -> pd.DataFrame:
"""Read from the GUNC contig_assignments output file.
Args:
path: The path to the DIAMOND output file.
"""
dtype = {
'contig': object,
'tax_level': object,
'assignment': object,
'count_of_genes_assigned': np.uintc
}
return pd.read_csv(path, sep='\t', dtype=dtype)


def gunc_max_css_scores_gtdb_r95() -> pd.DataFrame:
"""Return the max clade separation score (CSS) for the R95 GTDB."""
path = os.path.join(MAGNA_DIR, 'gunc', 'gtdb_95.maxcss_level.feather')
Expand All @@ -18,7 +33,7 @@ def gunc_max_css_scores_gtdb_r95() -> pd.DataFrame:

def gunc_contig_assignment_gtdb_r95() -> pd.DataFrame:
"""Return the contig assignment for the R95 GTDB."""
path = os.path.join(MAGNA_DIR,'gunc', 'GUNC.gtdb_95.contig_assignments.feather')
path = os.path.join(MAGNA_DIR, 'gunc', 'GUNC.gtdb_95.contig_assignments.feather')
if not os.path.isfile(path):
raise IOError(f'{path} does not exist.')
return pd.read_feather(path)
Expand Down

0 comments on commit 625c597

Please sign in to comment.