Skip to content

Commit

Permalink
Added --gff3-genes-as-transcripts option for virus genomes
Browse files Browse the repository at this point in the history
  • Loading branch information
bli25 committed Jun 27, 2018
1 parent cb87607 commit 14abd93
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
14 changes: 10 additions & 4 deletions rsem-gff3-to-gtf
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ class Transcript:
self.index = 0
return self

def next(self):
def __next__(self):
if self.index == len(self.results):
raise StopIteration
interval = self.results[self.index]
Expand Down Expand Up @@ -217,6 +217,7 @@ def flush_out(fout):
parser = HelpOnErrorParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter, description = "Convert GFF3 files to GTF files.")
parser.add_argument("input_GFF3_file", help = "Input GFF3 file.")
parser.add_argument("output_GTF_file", help = "Output GTF file.")
parser.add_argument("--make-genes-as-transcripts", help = "GFF3 file does not contain transcripts, make each gene as a transcript.", action = "store_true")
parser.add_argument("--RNA-patterns", help = "Types of RNAs to be extracted, e.g. mRNA,rRNA", metavar = "<patterns>")
parser.add_argument("--extract-sequences", help = "If GFF3 file contains reference sequences, extract them to the specified file", metavar = "<output.fa>")
args = parser.parse_args()
Expand Down Expand Up @@ -274,11 +275,16 @@ with open(args.input_GFF3_file) as fin:
my_assert(gid not in gid2gname,
"Gene {0} appears multiple times! Last occurrence is at line {1}:\n{2}".format(gid, feature.line_no, feature.line))
gid2gname[gid] = feature.getAttribute("Name")
elif feature.feature_type == "transcript":

if args.make_genes_as_transcripts:
feature.feature_type = feature.original_type = "transcript"
feature.attribute_dict["Parent"] = [feature.attribute_dict["ID"]]

if feature.feature_type == "transcript":
transcript = getTranscript(feature.getAttribute("ID", True), feature)
transcript.setTranscript(feature)
else:
assert feature.feature_type == "exon"

if feature.feature_type == "exon":
for parent in feature.getAttribute("Parent", True):
transcript = getTranscript(parent, feature)
transcript.addExon(feature)
Expand Down
9 changes: 9 additions & 0 deletions rsem-prepare-reference
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ my $status;
my $gtfF = "";
my $gff3F = "";
my $gff3_RNA_patterns = "";
my $gff3_genes_as_transcripts = 0;
my $gtf_sources = "None";
my $mappingF = "";
my $polyAChoice = 1; # 0, --polyA, add polyA tails for all isoforms; 1, default, no polyA tails; 2, --no-polyA-subset
Expand All @@ -43,6 +44,7 @@ my $star_sjdboverhang = 100;
GetOptions("gtf=s" => \$gtfF,
"gff3=s" => \$gff3F,
"gff3-RNA-patterns=s" => \$gff3_RNA_patterns,
"gff3-genes-as-transcripts" => \$gff3_genes_as_transcripts,
"trusted-sources=s" => \$gtf_sources,
"transcript-to-gene-map=s" => \$mappingF,
"allele-to-gene-map=s" => \$alleleMappingF,
Expand Down Expand Up @@ -116,6 +118,9 @@ if ($gff3F ne "") {
if ($gff3_RNA_patterns ne "") {
$command .= " --RNA-patterns $gff3_RNA_patterns";
}
if ($gff3_genes_as_transcripts) {
$command .= " --make-genes-as-transcripts";
}
$command .= " $gff3F $gtfF";
&runCommand($command)
}
Expand Down Expand Up @@ -260,6 +265,10 @@ The annotation file is in GFF3 format instead of GTF format. RSEM will first con
<pattern> is a comma-separated list of transcript categories, e.g. "mRNA,rRNA". Only transcripts that match the <pattern> will be extracted. (Default: "mRNA")
=item B<--gff3-genes-as-transcripts>
This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format.
=item B<--trusted-sources> <sources>
<sources> is a comma-separated list of trusted sources, e.g. "ENSEMBL,HAVANA". Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted. (Default: off)
Expand Down

0 comments on commit 14abd93

Please sign in to comment.