# 210707 Upgrade-database-schemas

In [None]:
from pathlib import Path
from contextlib import redirect_stderr
import sys

In [2]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import alembic.command as cmd

In [3]:
from midas.db.migrate import get_alembic_config
from midas.db.models import *
from midas.db.migrate import get_alembic_config, current_revision

## Setup

In [4]:
TODAY = '2021-07-07'
DATESTR = '210707'

In [5]:
testdb_dir = Path('/home/jared/projects/midas/data/databases/testdb_210126')
refseq_dir = Path('/home/jared/projects/midas/data/databases/refseq-curated/2.0')

src_files = dict(
    testdb=testdb_dir / 'testdb_210126.db',
    refseq=refseq_dir / 'refseq-curated-2.0-r3-210707.db',
)

dst_files = dict(
    testdb=testdb_dir / f'testdb_210126-r2-{DATESTR}.db',
    refseq=refseq_dir / f'refseq-curated-2.0-r4-{DATESTR}.db',
)

## Run upgrades

In [6]:
with redirect_stderr(sys.stdout):
    for key in ['testdb', 'refseq']:
        src = src_files[key]
        dst = dst_files[key]

        print('*' * len(key))
        print(key)
        print('*' * len(key))
        print()

        print('Copying from src...')
        !cp {src} {dst}
        engine = create_engine(f'sqlite:///{dst}')

        print('Current revision:', current_revision(engine))

        print('Upgrading...')
        config = get_alembic_config(engine)
        cmd.upgrade(config, 'head')

        print('\n' * 3)

******
testdb
******

Copying from src...
Current revision: d961d0698083
Upgrading...
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade d961d0698083 -> 7c1a8837b74d, Rename Genome columns
INFO  [alembic.runtime.migration] Running upgrade 7c1a8837b74d -> 1c060eb1fc83, AnnotatedGenome taxonomy relationship
INFO  [alembic.runtime.migration] Running upgrade 1c060eb1fc83 -> b1c356705db2, Rename ReferenceGenomeSet table and relationships
INFO  [alembic.runtime.migration] Running upgrade b1c356705db2 -> c43540b80d50, Update key version cols




******
refseq
******

Copying from src...
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Current revision: d961d0698083
Upgrading...
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-trans

## Update metadata

### Testdb

In [7]:
testdb_engine = create_engine(f'sqlite:///{dst_files["testdb"]}')

In [8]:
testdb_session = sessionmaker(testdb_engine)()

#### Genome set

In [9]:
testdb_gset = testdb_session.query(ReferenceGenomeSet).one()
testdb_gset.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7feeb543e130>,
 'version': '1.0',
 'key': 'midas/test/testdb_210126',
 'description': 'Database containing artificial genomes, to be used for end-to-end testing',
 'extra': {'date_created': '2021-01-26'},
 'id': 1,
 'name': 'testdb_210126'}

In [10]:
testdb_gset.key = 'gambit/testdb_210126'

In [11]:
testdb_extra = dict(testdb_gset.extra)
testdb_extra['revision'] = dict(
    num=2,
    date_created=TODAY,
    description='Upgraded schema to latest revision c43540b80d50, removed prefix from genome keys.'
)

testdb_gset.extra = testdb_extra

In [12]:
testdb_session.commit()

#### Genome keys

In [13]:
for g in testdb_session.query(Genome):
    assert g.key.startswith('midas/testdb_210126/')
    g.key = g.key.split('/')[-1]

In [14]:
testdb_session.commit()

#### Finish

In [15]:
testdb_engine.execute('VACUUM;')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7feeb52b71c0>

### Refseq

In [16]:
refseq_engine = create_engine(f'sqlite:///{dst_files["refseq"]}')

In [17]:
refseq_session = sessionmaker(refseq_engine)()

#### Genome set

In [18]:
refseq_gset = refseq_session.query(ReferenceGenomeSet).one()
refseq_gset.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x7feeb52c7b20>,
 'version': '2.0',
 'key': 'midas/refseq-bacterial-assemblies/curated',
 'description': 'Curated subset of RefSeq bacterial genomes in NCBI assembly database',
 'extra': {'revision': {'num': 3,
   'date_created': '2021-07-07',
   'description': 'Updated to alembic revision d961d0698083. Removed Entrez ESummary data. Fixed refseq_acc values incorrectly stored in genbank_acc column.'}},
 'id': 3,
 'name': 'MIDAS curated RefSeq bacterial WGS assemblies'}

In [19]:
refseq_extra = dict(refseq_gset.extra)
refseq_extra['revision'] = dict(
    num=4,
    date_created=TODAY,
    description='Upgraded schema to latest revision c43540b80d50.'
)

refseq_gset.extra = refseq_extra

In [20]:
refseq_session.commit()

### Finish

In [21]:
refseq_engine.execute('VACUUM;')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7feeb52cfa30>