In [51]:
import screed # A Python library for reading FASTA and FASQ file format.
from Bio import SeqIO 

class ReadSeq: 
    """Class for Reading Sequence Data"""
    def readText(self, inputfile): 
        # opne data 
        with open(inputfile, "r") as seqfile: 
            # read data 
            seq = seqfile.read()
            # remove special characters \n and \t 
            seq = seq.replace("\n", "")
            seq = seq.replace("\t", "") 
        return seq 

    def readFASTA(self, inputfile): 
        # open file 
        with open(inputfile, "r") as f: 
            # remove name line / info line 
            seq = f.readline()
            # read data 
            seq = f.read() 
            # remove special character 
            seq = seq.replace("\n", "")
            seq = seq.replace("\t", "") 
        return seq 
        
    def readFastq(self, filename):
        sequences = []
        qualities = []
        with open(filename) as fh:
            while True:
                fh.readline() # skip name line
                seq = fh.readline().rstrip() # read base sequence
                fh.readline() # skip placeholder line
                qual = fh.readline().rstrip() #base quality line
                if len(seq) == 0:
                    break
                sequences.append(seq)
                qualities.append(qual)
        return sequences, qualities
    
    def readFastaWithScreed(self, inputfile):
        """
        Reads and returns file as FASTA format with special characters removed.
        """
        with screed.open(inputfile) as seqfile:
            for read in seqfile:
                seq = read.sequence
        return seq
    
    def readFastqWithScreed(self, inputfile):
        """
        Reads and returns file as FASTA format with special characters removed.
        """
        with screed.open(inputfile) as seqfile:
            for read in seqfile:
                seq = read.sequence
        return seq 
    

    def readFastqWithBiopython(self, inputfile):
        """
        Reads and returns file as FASTA format with special characters removed.
        """
        with open(inputfile) as seqfile:
            for record in SeqIO.parse(seqfile, "fasta"): 
                seq = record.seq 
        return seq 

In [52]:
# create an object of ReadSeq class 
data = ReadSeq() 

In [20]:
seq1 = data.readFASTA("../data/Haemophilus_influenzae.fasta")

In [21]:
seq2 = data.readFastq("../data/SRR835775_1.first1000.fastq")

In [30]:
seq3 = data.readFastq("../data/dna.txt")

In [53]:
seq4 = data.readFastaWithScreed("../data/Haemophilus_influenzae.fasta")

In [55]:
seq5 = data.readFastqWithBiopython("../data/Haemophilus_influenzae.fasta")