Skip to content

Commit

Permalink
Fix #16, fix #17, fix #18, fix #37 - Add query interface
Browse files Browse the repository at this point in the history
Note that there are still some open issues with this - they
are documented in #39, #42, #43, #44 and #45.
  • Loading branch information
mxsasha committed Jul 6, 2018
1 parent 7af6f00 commit 32dc8f4
Show file tree
Hide file tree
Showing 18 changed files with 1,483 additions and 37 deletions.
35 changes: 35 additions & 0 deletions irrd/conf/__init__.py
@@ -1,3 +1,4 @@
import logging.config
from typing import Any
import os

Expand All @@ -12,3 +13,37 @@ def get_setting(setting_name: str) -> Any:
if env_key in os.environ:
return os.environ[env_key]
return default


LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': '%(asctime)s irrd[%(process)d]: %(levelname)s %(name)s: %(message)s'
},
'simple': {
'format': '%(levelname)s %(name)s: %(message)s'
},
},
'handlers': {
'null': {
'level': 'DEBUG',
'class': 'logging.NullHandler',
},
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
},
'loggers': {
'': {
'handlers': ['console'],
'level': 'DEBUG',
'propagate': True,
},
}
}

logging.config.dictConfig(LOGGING)
18 changes: 9 additions & 9 deletions irrd/db/alembic/versions/28dc1cd85bdc_initial_db.py
Expand Up @@ -29,13 +29,16 @@ def upgrade():
sa.Column('ip_version', sa.Integer(), nullable=True),
sa.Column('ip_first', postgresql.INET(), nullable=True),
sa.Column('ip_last', postgresql.INET(), nullable=True),
sa.Column('asn_first', sa.Integer(), nullable=True),
sa.Column('asn_last', sa.Integer(), nullable=True),
sa.Column('ip_size', sa.DECIMAL(scale=0), nullable=True),
sa.Column('asn_first', sa.BigInteger(), nullable=True),
sa.Column('asn_last', sa.BigInteger(), nullable=True),
sa.Column('created', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
sa.Column('updated', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
sa.PrimaryKeyConstraint('pk'),
sa.UniqueConstraint('rpsl_pk', 'source', name='rpsl_pk_source_unique')
)
op.create_index(op.f('ix_rpsl_objects_ip_first'), 'rpsl_objects', ['ip_first'], unique=False)
op.create_index(op.f('ix_rpsl_objects_ip_last'), 'rpsl_objects', ['ip_last'], unique=False)
op.create_index(op.f('ix_rpsl_objects_asn_first'), 'rpsl_objects', ['asn_first'], unique=False)
op.create_index(op.f('ix_rpsl_objects_asn_last'), 'rpsl_objects', ['asn_last'], unique=False)
op.create_index(op.f('ix_rpsl_objects_ip_version'), 'rpsl_objects', ['ip_version'], unique=False)
Expand All @@ -58,18 +61,15 @@ def upgrade():
[sa.text("((parsed_data->'member-of'))")], unique=False, postgresql_using='gin')
op.create_index(op.f('ix_rpsl_objects_parsed_data_mp_members'), 'rpsl_objects',
[sa.text("((parsed_data->'mp-members'))")], unique=False,
postgresql_using='gin') # ### end Alembic commands ###

op.create_index('ix_rpsl_objects_ip_first', 'rpsl_objects', [sa.text('ip_first inet_ops')], unique=False,
postgresql_using='gist')
op.create_index('ix_rpsl_objects_ip_last', 'rpsl_objects', [sa.text('ip_last inet_ops')], unique=False,
postgresql_using='gist')
postgresql_using='gin')
op.create_index(op.f('ix_rpsl_objects_parsed_data_origin'), 'rpsl_objects', [sa.text("((parsed_data->'origin'))")],
unique=False, postgresql_using='gin')


def downgrade():
# Manually added
op.drop_index(op.f('ix_rpsl_objects_ip_last'), table_name='rpsl_objects')
op.drop_index(op.f('ix_rpsl_objects_ip_first'), table_name='rpsl_objects')
op.drop_index(op.f('ix_rpsl_objects_ip_last'), table_name='rpsl_objects')
op.drop_index(op.f('ix_rpsl_objects_parsed_data_zone_c'), table_name='rpsl_objects')
op.drop_index(op.f('ix_rpsl_objects_parsed_data_role'), table_name='rpsl_objects')
op.drop_index(op.f('ix_rpsl_objects_parsed_data_members'), table_name='rpsl_objects')
Expand Down
70 changes: 63 additions & 7 deletions irrd/db/api.py
@@ -1,16 +1,19 @@
import logging
from datetime import datetime
from typing import List, Set

import sqlalchemy as sa
from IPy import IP
from sqlalchemy.dialects import postgresql as pg

from irrd.utils.validators import parse_as_number, ValidationError
from . import engine
from .models import RPSLDatabaseObject
from irrd.rpsl.parser import RPSLObject
from irrd.rpsl.rpsl_objects import lookup_field_names

MAX_RECORDS_CACHE_BEFORE_INSERT = 5000
logger = logging.getLogger(__name__)
MAX_RECORDS_CACHE_BEFORE_INSERT = 500


class RPSLDatabaseQuery:
Expand All @@ -32,9 +35,12 @@ class RPSLDatabaseQuery:
def __init__(self):
self.statement = sa.select([
self.columns.pk,
self.columns.object_class,
self.columns.rpsl_pk,
self.columns.object_text
])
self.columns.parsed_data,
self.columns.object_text,
self.columns.source,
]).order_by(self.columns.ip_first.asc(), self.columns.asn_first.asc())
self._lookup_attr_counter = 0
self._query_frozen = False

Expand All @@ -44,7 +50,7 @@ def pk(self, pk: str):

def rpsl_pk(self, rpsl_pk: str):
"""Filter on an exact RPSL PK (e.g. 192.0.2.0/24,AS23456)."""
return self._filter(self.columns.rpsl_pk == rpsl_pk)
return self._filter(self.columns.rpsl_pk == rpsl_pk.upper())

def sources(self, sources: List[str]):
"""
Expand All @@ -53,7 +59,7 @@ def sources(self, sources: List[str]):
Sources list must be an iterable. Will match objects from any
of the mentioned sources.
"""
fltr = self.columns.source.in_(sources)
fltr = self.columns.source.in_([s.upper() for s in sources])
return self._filter(fltr)

def object_classes(self, object_classes: List[str]):
Expand All @@ -68,6 +74,7 @@ def object_classes(self, object_classes: List[str]):

def lookup_attr(self, attr_name: str, attr_value: str):
"""Filter on a lookup attribute, e.g. mnt-by."""
attr_name = attr_name.lower()
if attr_name not in self.lookup_field_names:
raise ValueError(f"Invalid lookup attribute: {attr_name}")
self._check_query_frozen()
Expand Down Expand Up @@ -106,7 +113,7 @@ def ip_less_specific(self, ip: IP):

def ip_less_specific_one_level(self, ip: IP):
"""
Filter any less specifics or exact matches of a prefix.
Filter one level less specific of a prefix.
Due to implementation details around filtering, this must
always be the last call on a query object, or unpredictable
Expand Down Expand Up @@ -157,6 +164,54 @@ def asn(self, asn: int):
fltr = sa.and_(self.columns.asn_first <= asn, self.columns.asn_last >= asn)
return self._filter(fltr)

def text_search(self, value: str):
"""
Search the database for a specific free text.
In order, this attempts:
- If the value is a valid AS number, return all as-block, as-set, aut-num objects
relating or including that AS number.
- If the value is a valid IP address or network, return all objects that relate to
that resource and any less specifics.
- Otherwise, return all objects where the RPSL primary key is exactly this value,
or (case insensitive) it matches part of a person/role name (not nic-hdl, their
actual person/role attribute value).
"""
self._check_query_frozen()
try:
_, asn = parse_as_number(value)
return self.object_classes(['as-block', 'as-set', 'aut-num']).asn(asn)
except ValidationError:
pass

try:
ip = IP(value)
return self.ip_less_specific(ip)
except ValueError:
pass

counter = ++self._lookup_attr_counter
fltr = sa.or_(
self.columns.rpsl_pk == value,
sa.and_(
self.columns.object_class == 'person',
sa.text(f"parsed_data->>'person' ILIKE :lookup_attr_text_search{counter}")
),
sa.and_(
self.columns.object_class == 'role',
sa.text(f"parsed_data->>'role' ILIKE :lookup_attr_text_search{counter}")
),
)
self.statement = self.statement.where(fltr).params(
**{f'lookup_attr_text_search{counter}': '%' + value + '%'}
)
return self

def first_only(self):
"""Only return the first match."""
self.statement = self.statement.limit(1)
return self

def _filter(self, fltr):
self._check_query_frozen()
self.statement = self.statement.where(fltr)
Expand Down Expand Up @@ -188,6 +243,7 @@ def execute_query(self, query: RPSLDatabaseQuery):
"""Execute an RPSLDatabaseQuery within the current transaction."""
# To be able to query objects that were just created, flush the cache.
self._flush_rpsl_object_upsert_cache()
logger.debug(f'Executing: {query}')
result = self._connection.execute(query.statement)
for row in result:
yield dict(row)
Expand Down Expand Up @@ -232,7 +288,7 @@ def upsert_rpsl_object(self, rpsl_object: RPSLObject):
'ip_size': ip_size,
'asn_first': rpsl_object.asn_first,
'asn_last': rpsl_object.asn_last,
'updated': datetime.utcnow(), # TODO: timezones between created/updates are now inconsistent
'updated': datetime.utcnow(),
})
self._rpsl_pk_source_seen.add(rpsl_pk_source)

Expand Down
12 changes: 5 additions & 7 deletions irrd/db/models.py
Expand Up @@ -29,11 +29,11 @@ class RPSLDatabaseObject(Base): # type: ignore
object_text = sa.Column(sa.Text, nullable=False)

ip_version = sa.Column(sa.Integer, index=True)
ip_first = sa.Column(pg.INET)
ip_last = sa.Column(pg.INET)
ip_first = sa.Column(pg.INET, index=True)
ip_last = sa.Column(pg.INET, index=True)
ip_size = sa.Column(sa.DECIMAL(scale=0))
asn_first = sa.Column(sa.Integer, index=True)
asn_last = sa.Column(sa.Integer, index=True)
asn_first = sa.Column(sa.BigInteger, index=True)
asn_last = sa.Column(sa.BigInteger, index=True)

created = sa.Column(sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False)
updated = sa.Column(sa.DateTime(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False)
Expand All @@ -42,8 +42,6 @@ class RPSLDatabaseObject(Base): # type: ignore
def __table_args__(cls): # noqa
args = [
sa.UniqueConstraint('rpsl_pk', 'source', name='rpsl_pk_source_unique'),
sa.Index('ix_rpsl_objects_ip_first', sa.text("ip_first inet_ops"), postgresql_using='gist'),
sa.Index('ix_rpsl_objects_ip_last', sa.text("ip_last inet_ops"), postgresql_using='gist'),
]
for name in lookup_field_names():
index_name = 'ix_rpsl_objects_parsed_data_' + name.replace('-', '_')
Expand All @@ -56,7 +54,7 @@ def __repr__(self):


expected_lookup_field_names = {
'zone-c', 'member-of', 'mnt-by', 'role', 'members', 'person', 'source', 'mp-members'}
'zone-c', 'member-of', 'mnt-by', 'role', 'members', 'person', 'source', 'mp-members', 'origin'}
if sorted(lookup_field_names()) != sorted(expected_lookup_field_names): # pragma: no cover
raise RuntimeError(f"Field names of lookup fields do not match expected set. Indexes may be missing. "
f"Expected: {expected_lookup_field_names}, actual: {lookup_field_names()}")
63 changes: 54 additions & 9 deletions irrd/db/tests/test_api.py
Expand Up @@ -35,6 +35,7 @@ def irrd_database():
RPSLDatabaseObject.metadata.drop_all(engine)


# noinspection PyTypeChecker
@pytest.fixture()
def database_handler_with_route():
rpsl_object_route_v4 = Mock(
Expand Down Expand Up @@ -96,11 +97,11 @@ def test_object_writing(self, monkeypatch, irrd_database):

# There should be two entries with MNT-CORRECT in the db now.
query = RPSLDatabaseQuery()
result = [i for i in self.dh.execute_query(query)] # Loop to exhaust the generator
result = list(self.dh.execute_query(query))
assert len(result) == 2

query.lookup_attr('mnt-by', 'MNT-CORRECT')
result = [i for i in self.dh.execute_query(query)]
result = list(self.dh.execute_query(query))
assert len(result) == 2

rpsl_obj_ignored = Mock(
Expand All @@ -121,12 +122,13 @@ def test_object_writing(self, monkeypatch, irrd_database):
self.dh.rollback()

query = RPSLDatabaseQuery()
result = [i for i in self.dh.execute_query(query)] # Loop to exhaust the generator
result = list(self.dh.execute_query(query))
assert len(result) == 2

self.dh._connection.close()


# noinspection PyTypeChecker
class TestRPSLDatabaseQueryLive:

def test_matching_filters(self, irrd_database, database_handler_with_route):
Expand All @@ -142,6 +144,10 @@ def test_matching_filters(self, irrd_database, database_handler_with_route):
self._assert_match(RPSLDatabaseQuery().ip_more_specific(IP('192.0.0.0/21')))
self._assert_match(RPSLDatabaseQuery().ip_less_specific(IP('192.0.2.0/24')))
self._assert_match(RPSLDatabaseQuery().ip_less_specific(IP('192.0.2.0/25')))
self._assert_match(RPSLDatabaseQuery().text_search('192.0.2.0/24'))
self._assert_match(RPSLDatabaseQuery().text_search('192.0.2.0/25'))
self._assert_match(RPSLDatabaseQuery().text_search('192.0.2.1'))
self._assert_match(RPSLDatabaseQuery().text_search('192.0.2.0/24,AS23456'))

def test_chained_filters(self, irrd_database, database_handler_with_route):
self.dh = database_handler_with_route
Expand Down Expand Up @@ -169,6 +175,43 @@ def test_non_matching_filters(self, irrd_database, database_handler_with_route):
self._assert_no_match(RPSLDatabaseQuery().asn(23455))
self._assert_no_match(RPSLDatabaseQuery().ip_more_specific(IP('192.0.2.0/24')))
self._assert_no_match(RPSLDatabaseQuery().ip_less_specific(IP('192.0.2.0/23')))
self._assert_no_match(RPSLDatabaseQuery().text_search('192.0.2.0/23'))
self._assert_no_match(RPSLDatabaseQuery().text_search('AS2914'))
self._assert_no_match(RPSLDatabaseQuery().text_search('23456'))
# RPSL pk searches are case sensitive
self._assert_no_match(RPSLDatabaseQuery().text_search('192.0.2.0/24,as23456'))

def test_text_search_person_role(self, irrd_database):
rpsl_object_person = Mock(
pk=lambda: 'PERSON',
rpsl_object_class='person',
parsed_data={'person': 'my person-name', 'source': 'TEST'},
render_rpsl_text=lambda: 'object-text',
ip_version=lambda: None,
ip_first=None,
ip_last=None,
asn_first=None,
asn_last=None,
)
rpsl_object_role = Mock(
pk=lambda: 'ROLE',
rpsl_object_class='person',
parsed_data={'person': 'my role-name', 'source': 'TEST'},
render_rpsl_text=lambda: 'object-text',
ip_version=lambda: None,
ip_first=None,
ip_last=None,
asn_first=None,
asn_last=None,
)
self.dh = DatabaseHandler()
self.dh.upsert_rpsl_object(rpsl_object_person)
self.dh.upsert_rpsl_object(rpsl_object_role)

self._assert_match(RPSLDatabaseQuery().text_search('person-name'))
self._assert_match(RPSLDatabaseQuery().text_search('role-name'))

self.dh._connection.close()

def test_more_less_specific_filters(self, irrd_database, database_handler_with_route):
self.dh = database_handler_with_route
Expand Down Expand Up @@ -228,6 +271,11 @@ def test_more_less_specific_filters(self, irrd_database, database_handler_with_r
assert len(rpsl_pks) == 1, f"Failed query: {q}"
assert '192.0.2.0/25,AS23456' in rpsl_pks

q = RPSLDatabaseQuery().ip_less_specific(IP('192.0.2.0/25')).first_only()
rpsl_pks = [r['rpsl_pk'] for r in self.dh.execute_query(q)]
assert len(rpsl_pks) == 1, f"Failed query: {q}"
assert '192.0.2.0/25,AS23456' in rpsl_pks

q = RPSLDatabaseQuery().sources(['TEST']).ip_less_specific_one_level(IP('192.0.2.0/27'))
self._assert_match(q)

Expand All @@ -243,12 +291,9 @@ def test_invalid_lookup_attribute(self):

def _assert_match(self, query):
__tracebackhide__ = True
assert generator_len(self.dh.execute_query(query)) == 1, f"Failed query: {query}"
assert len(list(self.dh.execute_query(query))) == 1, f"Failed query: {query}"

def _assert_no_match(self, query):
__tracebackhide__ = True
assert not generator_len(self.dh.execute_query(query)), f"Failed query: {query}"


def generator_len(generator):
return len([i for i in generator])
result = list(self.dh.execute_query(query))
assert not len(result), f"Failed query: {query}: unexpected output: {result}"
3 changes: 2 additions & 1 deletion irrd/rpsl/fields.py
Expand Up @@ -236,10 +236,11 @@ def parse(self, value: str, messages: RPSLParserMessages, strict_validation=True
messages.error(f"Invalid set name {value}: component {component} is a reserved word")
return None

parsed_as_number = None
try:
parsed_as_number, _ = parse_as_number(component)
except ValidationError as ve:
parsed_as_number = None
pass
if not re_generic_name.match(component.upper()) and not parsed_as_number:
messages.error(
f"Invalid set {value}: component {component} is not a valid AS number nor a valid set name"
Expand Down

0 comments on commit 32dc8f4

Please sign in to comment.