In [None]:
# default_exp softwares.fastq.star

# STAR

In [5]:
# export

from pybiotools4p.softwares.base import Base, modify_cmd


In [6]:
# export

class Star(Base):
    def __init__(self, software, fd):
        super(Star, self).__init__(software)
        self._default = fd

    def cmd_version(self):
        return 'echo {repr};{software} --version'.format(
            repr=self.__repr__(),
            software=self._software
        )
    
    @modify_cmd
    def cmd_build_index(self, star_index_dir, reference, gtf, read_length):
        '''
        :param star_index_dir:
        :param reference:
        :param gtf:
        :param read_length:
        :return:
        '''
        return r'''
{star} {build_index} \
        --genomeDir {star_index_dir} \
        --genomeFastaFiles {reference} \
        --sjdbGTFfile {gtf} \
        --sjdbOverhang {read_length}        
        '''.format(
            star=self._software,
            build_index=self._default['build_index'],
            **locals()
        )

    @modify_cmd
    def cmd_align(self, star_idx, fq1, fq2, prefix, gtf, read_length, miRNA=False):
        '''
        :param star_idx:
        :param fq1:
        :param fq2:
        :param prefix:
        :param gtf:
        :param sampleid:
        :param lane:
        :param platform:
        :param read_length:
        :return:
        '''

        return r'''
{star} {align_paras} \
    --genomeDir {star_idx} \
    --readFilesIn {fq1} {fq2} \
    --outFileNamePrefix {prefix} \
    --sjdbGTFfile {gtf}  \
    --sjdbOverhang {read_length} {mp}    
            '''.format(
            star=self._software,
            align_paras=self._default['align'],
            mp=self._default['mirna_align'] if miRNA else '',
            **locals()
        )

    def __repr__(self):
        return 'star:' + self._software

    def __str__(self):
        return 'Spliced Transcripts Alignment to a Reference'

In [7]:
import configparser

config=configparser.ConfigParser()
config.read('pybiotools4p/default.ini')

star=Star('STAR',config['star'])
star

star:STAR

In [8]:
print(star.cmd_version())

echo star:STAR;STAR --version


In [9]:
print(star)

Spliced Transcripts Alignment to a Reference


In [10]:
reference='biology-test-data/fasta/Homo_sapiens.GRCh38.dna.primary_assembly.chromosome22.fa'
gtf='biology-test-data/gtf/HS.22.gtf'
star_index_dir='./pybiotools/star_index'
read_length=99
fq1='./biology-test-data/fastq/HS.22.r1.fq.gz'
fq2='./biology-test-data/fastq/HS.22.r2.fq.gz'
prefix='./pybiotools/HS_22'

print(star.cmd_build_index(star_index_dir, reference, gtf, read_length))

STAR --runThreadN 16 --runMode genomeGenerate \
        --genomeDir ./pybiotools/star_index \
        --genomeFastaFiles biology-test-data/fasta/Homo_sapiens.GRCh38.dna.primary_assembly.chromosome22.fa \
        --sjdbGTFfile biology-test-data/gtf/HS.22.gtf \
        --sjdbOverhang 99


In [11]:
print(star.cmd_align(star_index_dir, fq1, fq2, prefix, gtf, read_length,miRNA=False))

STAR --runThreadN 16 \
--outSAMstrandField intronMotif \
--readFilesCommand zcat \
--outSAMtype BAM SortedByCoordinate \
    --genomeDir ./pybiotools/star_index \
    --readFilesIn ./biology-test-data/fastq/HS.22.r1.fq.gz ./biology-test-data/fastq/HS.22.r2.fq.gz \
    --outFileNamePrefix ./pybiotools/HS_22 \
    --sjdbGTFfile biology-test-data/gtf/HS.22.gtf  \
    --sjdbOverhang 99


In [12]:
print(star.cmd_align(star_index_dir, fq1, '', prefix+'_mirna', gtf, read_length,miRNA=True))

STAR --runThreadN 16 \
--outSAMstrandField intronMotif \
--readFilesCommand zcat \
--outSAMtype BAM SortedByCoordinate \
    --genomeDir ./pybiotools/star_index \
    --readFilesIn ./biology-test-data/fastq/HS.22.r1.fq.gz  \
    --outFileNamePrefix ./pybiotools/HS_22_mirna \
    --sjdbGTFfile biology-test-data/gtf/HS.22.gtf  \
    --sjdbOverhang 99 --alignEndsType EndToEnd \
--outFilterMismatchNmax 1 \
--outFilterMultimapScoreRange 0 \
--quantMode TranscriptomeSAM GeneCounts \
--outReadsUnmapped Fastx  \
--outFilterMultimapNmax 10 \
--outSAMunmapped Within \
--outFilterScoreMinOverLread 0 \
--outFilterMatchNminOverLread 0 \
--outFilterMatchNmin 16 \
--alignSJDBoverhangMin 1000 \
--alignIntronMax 1 \
--outWigType wiggle \
--outWigStrand Stranded \
--outWigNorm RPM
