# Cell below creates objects used to map to tables of c_elegans database.

In [1]:
from sqlalchemy import Column, INTEGER, TEXT, NUMERIC, REAL, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship

Base = declarative_base()

class Feature(Base):
    __tablename__ = 'features'
    featureid = Column(INTEGER, primary_key = True)
    seqid = Column(TEXT)
    source = Column(TEXT)
    type = Column(TEXT, index = True)
    start = Column(INTEGER, index = True)
    end = Column(INTEGER, index = True)
    score = Column(REAL, index = True)
    strand = Column(TEXT)
    phase = Column(TEXT)
    child = relationship('Attribute', back_populates = 'parent')

    def __repr__(self):
        return f'''<Feature(featureid = {self.featureid}, seqid = {self.seqid}, source = {self.source}, type = {self.type},
                start = {self.start}, end = {self.end}, score = {self.score}, strand = {self.strand}, phase = {self.phase})>'''

class Attribute(Base):
    __tablename__ = 'attributes'

    id = Column(INTEGER, primary_key = True)
    featureid = Column(TEXT, ForeignKey('features.featureid'), index = True)
    tag = Column(TEXT, index=True)
    value = Column(TEXT)
    parent = relationship('Feature', back_populates = 'child')

    def __repr__(self):
        return f'<Attribute(featureid = {self.featureid}, tag = {self.tag}, value = {self.value})>'

In [2]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import func

In [3]:
engine = create_engine('sqlite:///c_elegans_core.sqlite', echo=True)
Session = sessionmaker(bind=engine)
db = Session()

### The classes created above are used to describe the data within their associated `__tablename__` tables. These are used as mapper entities for interacting with the database, and therefore queried against.

In [4]:
db.query(Feature).count()

2018-03-22 15:20:45,786 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-03-22 15:20:45,787 INFO sqlalchemy.engine.base.Engine ()
2018-03-22 15:20:45,789 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-03-22 15:20:45,789 INFO sqlalchemy.engine.base.Engine ()
2018-03-22 15:20:45,791 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2018-03-22 15:20:45,792 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1 
FROM (SELECT features.featureid AS features_featureid, features.seqid AS features_seqid, features.source AS features_source, features.type AS features_type, features.start AS features_start, features."end" AS features_end, features.score AS features_score, features.strand AS features_strand, features.phase AS features_phase 
FROM features) AS anon_1
2018-03-22 15:20:45,793 INFO sqlalchemy.engine.base.Engine ()


659621

In [26]:
db.query(Feature.type, func.count(Feature.type)).group_by(Feature.type).all()

2018-03-22 15:29:20,836 INFO sqlalchemy.engine.base.Engine SELECT features.type AS features_type, count(features.type) AS count_1 
FROM features GROUP BY features.type
2018-03-22 15:29:20,837 INFO sqlalchemy.engine.base.Engine ()


[('CDS', 222859),
 ('chromosome', 7),
 ('exon', 269904),
 ('five_prime_UTR', 30960),
 ('gene', 20222),
 ('lnc_RNA', 283),
 ('mRNA', 33391),
 ('miRNA', 454),
 ('ncRNA', 8402),
 ('ncRNA_gene', 24765),
 ('piRNA', 15364),
 ('pre_miRNA', 257),
 ('pseudogene', 1791),
 ('pseudogenic_transcript', 1827),
 ('rRNA', 22),
 ('snRNA', 130),
 ('snoRNA', 345),
 ('tRNA', 634),
 ('three_prime_UTR', 28004)]

In [27]:
db.query(Attribute.tag).distinct().count()

2018-03-22 15:29:45,833 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1 
FROM (SELECT DISTINCT attributes.tag AS attributes_tag 
FROM attributes) AS anon_1
2018-03-22 15:29:45,834 INFO sqlalchemy.engine.base.Engine ()


16