Skip to content

Commit

Permalink
Merge pull request #141 from pagreene/codify-rds
Browse files Browse the repository at this point in the history
Codify RDS instance creation.
  • Loading branch information
pagreene committed Oct 22, 2020
2 parents 0061190 + edd637c commit 69877bd
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 17 deletions.
33 changes: 19 additions & 14 deletions indra_db/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
DB_CONFIG_DIR = path.expanduser('~/.config/indra')
DB_CONFIG_PATH = path.join(DB_CONFIG_DIR, 'db_config.ini')

DB_STR_FMT = "{prefix}://{username}{password}{host}{port}/{name}"
PRINCIPAL_ENV_PREFIX = 'INDRADB'
READONLY_ENV_PREFIX = 'INDRARO'
S3_DUMP_ENV_VAR = 'INDRA_DB_S3_PREFIX'
Expand Down Expand Up @@ -53,6 +52,24 @@
CONFIG = None


def build_db_url(**kwargs):
fmt = "{prefix}://{username}{password}{host}{port}/{name}"

# Extract all the database connection data
if kwargs['host']:
kwargs['host'] = '@' + kwargs['host']
kwargs['prefix'] = kwargs.get('dialect', kwargs.get('prefix'))
if kwargs.get('driver') and kwargs.get('prefix'):
kwargs['prefix'] += kwargs['driver']
if kwargs.get('port'):
kwargs['port'] = ':' + str(kwargs['port'])
if kwargs.get('password'):
kwargs['password'] = ':' + kwargs['password']

# Get the role of the database
return fmt.format(**kwargs)


def _get_urls_from_env(prefix):
return {k[len(prefix):].lower(): v
for k, v in environ.items()
Expand All @@ -71,19 +88,7 @@ def _load_config():
CONFIG[section[4:]] = def_dict
continue

# Extract all the database connection data
if def_dict['host']:
def_dict['host'] = '@' + def_dict['host']
def_dict['prefix'] = def_dict['dialect']
if def_dict['driver']:
def_dict['prefix'] += def_dict['driver']
if def_dict['port']:
def_dict['port'] = ':' + def_dict['port']
if def_dict['password']:
def_dict['password'] = ':' + def_dict['password']

# Get the role of the database
url = DB_STR_FMT.format(**def_dict)
url = build_db_url(**def_dict)
if def_dict.get('role') == 'readonly':
# Include the entry both with and without the -ro. This is only
# needed when sometimes a readonly database has the same name
Expand Down
137 changes: 136 additions & 1 deletion indra_db/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import json
import random
import logging
import string
from io import BytesIO
from numbers import Number
from functools import wraps
from datetime import datetime
from time import sleep

from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

Expand All @@ -24,6 +26,7 @@
from sqlalchemy.engine.url import make_url

from indra.util import batch_iter
from indra_db.config import CONFIG, build_db_url
from indra_db.util import S3Path
from indra_db.exceptions import IndraDbException
from indra_db.schemas import principal_schema, readonly_schema
Expand Down Expand Up @@ -156,6 +159,42 @@ def __init__(self, table, issue):
super(IndraTableError, self).__init__(self, msg)


class RdsInstanceNotFoundError(IndraDbException):
def __init__(self, instance_identifier):
msg = f"No instance with name \"{instance_identifier}\" found on RDS."
super(RdsInstanceNotFoundError, self).__init__(msg)


def get_instance_attribute(attribute, instance_identifier):
"""Get the current status of a database."""
# Get descriptions for all instances (apparently you can't get just one).
import boto3
rds = boto3.client('rds')
resp = rds.describe_db_instances()

# If we find the one they're looking for, return the status.
for desc in resp['DBInstances']:
if desc['DBInstanceIdentifier'] == instance_identifier:

# Try to match some common patterns for attribute labels.
if attribute in desc:
return desc[attribute]

if attribute.capitalize() in desc:
return desc[attribute.capitalize()]

inst_attr = f'DBInstance{attribute.capitalize()}'
if inst_attr in desc:
return desc[inst_attr]

# Give explosively up if the above fail.
raise ValueError(f"Invalid attribute: {attribute}. Did you mean "
f"one of these: {list(desc.keys())}?")

# Otherwise, fail.
raise RdsInstanceNotFoundError(instance_identifier)


class DatabaseManager(object):
"""An object used to access INDRA's database.
Expand Down Expand Up @@ -194,6 +233,10 @@ class DatabaseManager(object):
For more sophisticated examples, several use cases can be found in
`indra.tests.test_db`.
"""
_instance_type = NotImplemented
_instance_name_fmt = NotImplemented
_db_name = NotImplemented

def __init__(self, url, label=None):
self.url = make_url(url)
self.session = None
Expand Down Expand Up @@ -231,8 +274,86 @@ def __del__(self, *args, **kwargs):
except:
print("Failed to execute rollback of database upon deletion.")

@classmethod
def create_instance(cls, instance_name, size, tag_dict=None):
"""Allocate the resources on RDS for a database, and return handle."""
# Load boto3 locally to avoid unnecessary dependencies.
import boto3
rds = boto3.client('rds')

# Convert tags to boto3's goofy format.
tags = ([{'Key': k, 'Value': v} for k, v in tag_dict.items()]
if tag_dict else [])

# Create a new password.
pw_chars = random.choices(string.ascii_letters + string.digits, k=24)
password = ''.join(pw_chars)

# Load the rds general config settings.
rds_config = CONFIG['rds-settings']

# Create the database.
inp_identifier = cls._instance_name_fmt.format(
name=instance_name.lower()
)
resp = rds.create_db_instance(
DBInstanceIdentifier=inp_identifier,
DBName=cls._db_name,
AllocatedStorage=size,
DBInstanceClass=cls._instance_type,
Engine='postgres',
MasterUsername=rds_config['master_user'],
MasterUserPassword=password,
VpcSecurityGroupIds=[rds_config['security_group']],
AvailabilityZone=rds_config['availability_zone'],
DBSubnetGroupName='default',
Tags=tags,
DeletionProtection=True
)

# Perform a basic sanity check.
assert resp['DBInstance']['DBInstanceIdentifier'] == inp_identifier, \
f"Bad response from creating RDS instance {inp_identifier}:\n{resp}"

# Wait for the database to be created.
logger.info("Waiting for database to be created...")
while get_instance_attribute('status', inp_identifier) == 'creating':
sleep(5)

# Use the given info to return a handle to the new database.
endpoint = get_instance_attribute('endpoint', inp_identifier)
url_str = build_db_url(dialect='postgres', host=endpoint['Address'],
port=endpoint['Port'], password=password,
name=cls._db_name,
username=rds_config['master_user'])
return cls(url_str)

def get_config_string(self):
"""Print a config entry for this handle.
This is useful after using `create_instance`.
"""
data = {
'dialect': self.url.drivername,
'driver': None,
'username': self.url.username,
'password': self.url.password_original,
'host': self.url.host,
'port': self.url.port,
'name': self.url.database
}
return '\n'.join(f'{key} = {value}' if value else f'{key} ='
for key, value in data.items())

def get_env_string(self):
"""Generate the string for an environment variable.
This is useful after using `create_instance`.
"""
return str(self.url)

def grab_session(self):
"Get an active session with the database."
"""Get an active session with the database."""
if not self.available:
return
if self.session is None or not self.session.is_active:
Expand Down Expand Up @@ -906,6 +1027,12 @@ def pg_restore(self, dump_file, **options):

class PrincipalDatabaseManager(DatabaseManager):
"""This class represents the methods special to the principal database."""

# Note that these are NOT guaranteed to apply to older deployed instances.
_instance_type = 'db.m5.large'
_instance_name_fmt = 'indradb-{name}'
_db_name = 'indradb_principal'

def __init__(self, host, label=None):
super(self.__class__, self).__init__(host, label)
if not self.available:
Expand Down Expand Up @@ -1166,6 +1293,9 @@ def __init__(self, key, is_latest_str, is_last_modified):

class ReadonlyDatabaseManager(DatabaseManager):
"""This class represents the readonly database."""
_instance_type = 'db.m5.xlarge'
_instance_name_fmt = 'indradb-readonly-{name}'
_db_name = 'indradb_readonly'

def __init__(self, host, label=None):
super(self.__class__, self).__init__(host, label)
Expand All @@ -1182,6 +1312,11 @@ def __init__(self, host, label=None):
setattr(self, tbl.__name__, tbl)
self.__non_source_cols = None

def get_config_string(self):
res = super(ReadonlyDatabaseManager, self).get_config_string()
res = 'role = readonly\n' + res
return res

def get_source_names(self) -> set:
"""Get a list of the source names as they appear in SourceMeta cols."""
all_cols = set(self.get_column_names(self.SourceMeta))
Expand Down
12 changes: 10 additions & 2 deletions indra_db/resources/default_db_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# the format defined in `indra_db.config.DB_STR_FMT`, with a name starting with
# INDRADB<db_name_in_all_caps>

# Tests Databases:
# Test Databases:
# ----------------
# Any name with 'test' in it (ex: 'test', 'test1', 'local_test', etc.) may be
# used in testing. Each test database will be tried in order, from top to
Expand Down Expand Up @@ -51,4 +51,12 @@ prefix =
# ------------------
[aws-lambda]
role =
function =
function =


# AWS RDS Config:
# ---------------
[aws-rds-settings]
master_user =
security_group =
availability_zone =
8 changes: 8 additions & 0 deletions indra_db/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from indra_db.config import build_db_url


def test_build_db_url():
"""Test the build of a database URL from typical inputs."""
res_url = build_db_url(host="host", password="password", dialect="postgres",
username="user", port=10, name="db")
assert res_url == "postgres://user:password@host:10/db", res_url

0 comments on commit 69877bd

Please sign in to comment.