In [1]:
# JG 3/9/18
# This workbook converts a DADA2 biom generated via their tutorial 
# into a qiime compatible biom file.
# It also transfers the sequence data into a separate file and renames the OTUs
# since often this data isn't necessary.

# Imports:
import pandas as pd

In [3]:
# Previous uses of this script:

# Date: 3/9/18
# Sequence data: AmoA and nxrB biom files

nxrb_file = '/Users/jimbo/Desktop/SCBNR_reseq/reseq_nxrB/DADA2/DADA2_nxrB_biom.txt'
amoa_file = '/Users/jimbo/Downloads/reseq_amoA/DADA2/DADA2_amoA_biom.txt'

#nxrb_biom = DADA_biom(nxrb_file)
#nxrb_biom.split_write('nxrB_relabeled', 'nxrB')

amoa_biom = DADA_biom(amoa_file)
amoa_biom.split_write('amoa_relabeled', 'amoA')

In [2]:
# Class for manipulating biom file:

class DADA_biom:
    """ A class for storing and converting DADA2 biom files to QIIME/other more typical formats.
Methods (Args/Returns):    
    Init: 
        path_in: Biome file with rows as samples and columns as exact sequence variants
        
    Split_write: Splits biom into two files - fasta sequence file and re-indexed biom file.
        file_out: prefix for output files. 
        Files will be labeled "{file_out}_biom.txt" and "{file_out}_rep_seqs.fasta"
    """
    def __init__(self, path_in):
        self.df = pd.read_csv(path_in, sep='\t')
        self.df = self.df.transpose()
        self.path = path_in.rsplit('/',1)[0]
        
    def split_write(self, file_out, otu_prefix):
        # Fixed names
        biom_out  = '{0}/{1}_biom2.txt'.format(self.path, file_out)
        fasta_out = '{0}/{1}_rep_seqs.fasta'.format(self.path, file_out)
        
        # Write fasta sequences
        self.write_seqs(fasta_out, otu_prefix)
        
        # Write new biom
        old_index = self.df.index
        new_index = ['{0}_{1}'.format(otu_prefix, x) for x in range(len(self.df.index))]
        self.df.index = new_index
        self.df.to_csv(biom_out, sep='\t')
        
    def write_seqs(self, file_out, otu_prefix):
        """ Writes sequences to a new fasta file"""
        ind=0
        with open(file_out, 'w') as f:
            for i in self.df.index:
                header = '>{0}_{1}\n'.format(otu_prefix, ind)
                f.write(header)
                f.write(i+'\n')
                ind+=1
    


    