Skip to content

Commit

Permalink
Merge pull request #376 from VoSeq/genbankfasta
Browse files Browse the repository at this point in the history
Genbankfasta. Ref #373
  • Loading branch information
carlosp420 committed Jan 7, 2018
2 parents 2bfb0d7 + dafbd11 commit 8190787
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
14 changes: 14 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -582,3 +582,17 @@ and place them in the ``config.json`` configuration file of VoSeq:
* Save and exit.

Thus, every picture that you upload into your VoSeq installation will be uploaded into your Flickr account.

Lineages
========

GenBank fasta files have the optional field Lineages. You can add lineages in the form:

LINEAGES = {
# superfamily: lineage from domain Eukaryota to suborder Ditrysia
"Papilionoidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ",
"Hesperioidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ",
"Hedyloidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ",
}

by editing the file `voseq/create_dataset/utils.py`.
4 changes: 2 additions & 2 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ django-haystack==2.5.0
easy-thumbnails==2.2
flickrapi==2.0

dataset-creator==0.3.17
seqrecord-expanded==0.2.8
dataset-creator==0.3.19
seqrecord-expanded==0.2.9
degenerate-dna==0.0.9
36 changes: 33 additions & 3 deletions voseq/create_dataset/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from seqrecord_expanded import SeqRecordExpanded
from seqrecord_expanded.exceptions import MissingParameterError
from seqrecord_expanded.exceptions import TranslationErrorMixedGappedSeq
Expand All @@ -13,6 +15,13 @@
from public_interface.models import Sequences
from public_interface.models import Vouchers

LINEAGES = {
# superfamily: lineage from domain Eukaryota to suborder Ditrysia
"Papilionoidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ", # noqa
"Hesperioidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ", # noqa
"Hedyloidea": "Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; ", # noqa
}


class CreateDataset(object):
"""Accepts form input to create a dataset in several formats.
Expand Down Expand Up @@ -171,29 +180,49 @@ def build_seq_obj(self, code, gene_code, our_taxon_names, all_seqs):
seq = self.create_seq_record(this_voucher_seqs)

if code in our_taxon_names:
lineage = self.get_lineage(code)
seq_record = SeqRecordExpanded(
seq,
voucher_code=code.replace(" ", "_"),
taxonomy=our_taxon_names[code],
gene_code=gene_code,
reading_frame=self.gene_codes_metadata[gene_code]['reading_frame'],
table=self.gene_codes_metadata[gene_code]['genetic_code'],
lineage=lineage,
)
return seq_record
else:
return None

def get_lineage(self, code):
voucher = Vouchers.objects.get(code=code)
try:
lineage = LINEAGES[voucher.superfamily]
except KeyError:
lineage = ""

additional_lineage = ";".join([
voucher.family, voucher.subfamily, voucher.tribe, voucher.subtribe,
voucher.genus, voucher.species, voucher.subspecies,
])
lineage += re.sub(";+", "; ", additional_lineage)
return lineage.strip()

def extract_sequence_from_all_seqs_in_db(self, all_seqs, code, gene_code):
try:
voucher_sequences = all_seqs[code]
except KeyError:
self.warnings += ['Could not find sequences for voucher {0} and gene_code {1}'.format(code, gene_code)]
self.warnings += [
'Could not find sequences for voucher {0} and gene_code {1}'.format(
code, gene_code)]
return '?'

try:
this_voucher_seqs = voucher_sequences[gene_code]
except KeyError:
self.warnings += ['Could not find sequences for voucher {0} and gene_code {1}'.format(code, gene_code)]
self.warnings += [
'Could not find sequences for voucher {0} and gene_code {1}'.format(
code, gene_code)]
return '?'
return this_voucher_seqs

Expand Down Expand Up @@ -242,7 +271,8 @@ def get_gene_codes_metadata(self):
"""
:return: dictionary with genecode and base pair number.
"""
queryset = Genes.objects.all().values('gene_code', 'length', 'reading_frame', 'genetic_code')
queryset = Genes.objects.all().values(
'gene_code', 'length', 'reading_frame', 'genetic_code')
gene_codes_metadata = dict()
for i in queryset:
gene_code = i['gene_code']
Expand Down

0 comments on commit 8190787

Please sign in to comment.