Skip to content

Commit

Permalink
Moved metadata_store into database and content_discovery (#7773)
Browse files Browse the repository at this point in the history
  • Loading branch information
qstokkink committed Dec 14, 2023
1 parent 6817a03 commit 5923954
Show file tree
Hide file tree
Showing 86 changed files with 834 additions and 1,003 deletions.
2 changes: 1 addition & 1 deletion src/tribler/core/components/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tribler.core.components.libtorrent.download_manager.download_manager import DownloadManager
from tribler.core.components.libtorrent.settings import LibtorrentSettings
from tribler.core.components.libtorrent.torrentdef import TorrentDef
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.config.tribler_config import TriblerConfig
from tribler.core.tests.tools.common import TESTS_DATA_DIR
from tribler.core.utilities.path_util import Path
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from asyncio import Future

from ipv8.requestcache import RandomNumberCache
from tribler.core.components.metadata_store.utils import RequestTimeoutException


class SelectRequest(RandomNumberCache):
Expand All @@ -25,6 +24,10 @@ def on_timeout(self):
self.timeout_callback(self)


class RequestTimeoutException(Exception):
pass


class EvaSelectRequest(SelectRequest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
from tribler.core.components.ipv8.eva.protocol import EVAProtocol
from tribler.core.components.ipv8.eva.result import TransferResult
from tribler.core.components.knowledge.community.knowledge_validator import is_valid_resource
from tribler.core.components.metadata_store.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, entries_to_chunk
from tribler.core.components.metadata_store.db.store import ObjState
from tribler.core.components.database.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, entries_to_chunk
from tribler.core.components.database.db.store import ObjState
from tribler.core.components.torrent_checker.torrent_checker.dataclasses import HealthInfo
from tribler.core.upgrade.tags_to_knowledge.previous_dbs.knowledge_db import ResourceType
from tribler.core.utilities.pony_utils import run_threaded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ipv8.community import CommunitySettings
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.torrent_checker.torrent_checker.torrent_checker import TorrentChecker
from tribler.core.utilities.notifier import Notifier

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from asyncio import Future
from binascii import hexlify
from operator import attrgetter
from random import choices, randint, random
from random import choices, randint
from typing import List
from unittest.mock import AsyncMock, Mock, patch

Expand All @@ -30,9 +30,9 @@
from tribler.core.components.database.db.layers.tests.test_knowledge_data_access_layer_base import \
Resource, TestKnowledgeAccessLayerBase
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.metadata_store.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, NEW
from tribler.core.components.metadata_store.db.serialization import CHANNEL_THUMBNAIL, NULL_KEY, REGULAR_TORRENT
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, NEW
from tribler.core.components.database.db.serialization import CHANNEL_THUMBNAIL, NULL_KEY, REGULAR_TORRENT
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.torrent_checker.torrent_checker.torrent_checker import TorrentChecker
from tribler.core.components.torrent_checker.torrent_checker.torrentchecker_session import HealthInfo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from ipv8.peerdiscovery.network import Network
from tribler.core.components.component import Component
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.database.database_component import DatabaseComponent
from tribler.core.components.ipv8.ipv8_component import INFINITE, Ipv8Component
from tribler.core.components.metadata_store.metadata_store_component import MetadataStoreComponent
from tribler.core.components.reporter.reporter_component import ReporterComponent
from tribler.core.components.torrent_checker.torrent_checker_component import TorrentCheckerComponent

Expand All @@ -18,15 +18,15 @@ async def run(self):
await self.get_component(ReporterComponent)

self._ipv8_component = await self.require_component(Ipv8Component)
metadata_store_component = await self.require_component(MetadataStoreComponent)
database_component = await self.require_component(DatabaseComponent)
torrent_checker_component = await self.require_component(TorrentCheckerComponent)

self.community = ContentDiscoveryCommunity(ContentDiscoveryCommunity.settings_class(
my_peer = self._ipv8_component.peer,
endpoint = self._ipv8_component.ipv8.endpoint,
network = Network(),
maximum_payload_size = self.session.config.content_discovery_community.maximum_payload_size,
metadata_store=metadata_store_component.mds,
metadata_store=database_component.mds,
torrent_checker=torrent_checker_component.torrent_checker,
notifier=self.session.notifier
))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from marshmallow.fields import Integer, String

from tribler.core.components.database.restapi.schema import MetadataParameters


class RemoteQueryParameters(MetadataParameters):
uuid = String()
channel_pk = String(description='Channel to query, must also define origin_id')
origin_id = Integer(default=None, description='Peer id to query, must also define channel_pk')
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from binascii import hexlify, unhexlify

from aiohttp import web
from aiohttp_apispec import docs, querystring_schema
from marshmallow.fields import List, String

from ipv8.REST.schema import schema
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.content_discovery.restapi.schema import RemoteQueryParameters
from tribler.core.components.restapi.rest.rest_endpoint import HTTP_BAD_REQUEST, MAX_REQUEST_SIZE, RESTEndpoint, \
RESTResponse
from tribler.core.utilities.utilities import froze_it


@froze_it
class SearchEndpoint(RESTEndpoint):
"""
This endpoint is responsible for searching in channels and torrents present in the local Tribler database.
"""
path = '/search'

def __init__(self,
popularity_community: ContentDiscoveryCommunity,
middlewares=(),
client_max_size=MAX_REQUEST_SIZE):
super().__init__(middlewares, client_max_size)
self.popularity_community = popularity_community

def setup_routes(self):
self.app.add_routes([web.put('/remote', self.remote_search)])

@classmethod
def sanitize_parameters(cls, parameters):
sanitized = dict(parameters)
if "max_rowid" in parameters:
sanitized["max_rowid"] = int(parameters["max_rowid"])
if "channel_pk" in parameters:
sanitized["channel_pk"] = unhexlify(parameters["channel_pk"])
if "origin_id" in parameters:
sanitized["origin_id"] = int(parameters["origin_id"])
return sanitized


@docs(
tags=['Metadata'],
summary="Perform a search for a given query.",
responses={200: {
'schema': schema(RemoteSearchResponse={'request_uuid': String(), 'peers': List(String())})},
"examples": {
'Success': {
"request_uuid": "268560c0-3f28-4e6e-9d85-d5ccb0269693",
"peers": ["50e9a2ce646c373985a8e827e328830e053025c6", "107c84e5d9636c17b46c88c3ddb54842d80081b0"]
}
}
},
)
@querystring_schema(RemoteQueryParameters)
async def remote_search(self, request):
self._logger.info('Create remote search request')
# Query remote results from the GigaChannel Community.
# Results are returned over the Events endpoint.
try:
sanitized = self.sanitize_parameters(request.query)
except (ValueError, KeyError) as e:
return RESTResponse({"error": f"Error processing request parameters: {e}"}, status=HTTP_BAD_REQUEST)
self._logger.info(f'Parameters: {sanitized}')

request_uuid, peers_list = self.popularity_community.send_search_request(**sanitized)
peers_mid_list = [hexlify(p.mid).decode() for p in peers_list]

return RESTResponse({"request_uuid": str(request_uuid), "peers": peers_mid_list})
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import uuid
from unittest.mock import Mock

import pytest

from tribler.core.components.content_discovery.restapi.search_endpoint import SearchEndpoint
from tribler.core.components.restapi.rest.base_api_test import do_request
from tribler.core.utilities.unicode import hexlify


@pytest.fixture(name="mock_content_discovery_community")
def fixture_mock_content_discovery_community():
return Mock()


@pytest.fixture(name="endpoint")
def fixture_endpoint(mock_content_discovery_community):
return SearchEndpoint(mock_content_discovery_community)


async def test_create_remote_search_request(rest_api, mock_content_discovery_community):
"""
Test that remote search call is sent on a REST API search request
"""
sent = {}
peers = []
request_uuid = uuid.uuid4()

def mock_send(**kwargs):
sent.update(kwargs)
return request_uuid, peers

# Test querying for keywords
mock_content_discovery_community.send_search_request = mock_send
search_txt = "foo"
await do_request(
rest_api,
f'search/remote?txt_filter={search_txt}&max_rowid=1',
request_type="PUT",
expected_code=200,
expected_json={"request_uuid": str(request_uuid), "peers": peers},
)
assert sent['txt_filter'] == search_txt
sent.clear()

# Test querying channel data by public key, e.g. for channel preview purposes
channel_pk = "ff"
await do_request(
rest_api, f'search/remote?channel_pk={channel_pk}&metadata_type=torrent', request_type="PUT", expected_code=200
)
assert hexlify(sent['channel_pk']) == channel_pk


async def test_create_remote_search_request_illegal(rest_api):
"""
Test that remote search call is sent on a REST API search request
"""
response = await do_request(
rest_api,
'search/remote?origin_id=a',
request_type="PUT",
expected_code=400
)
assert "error" in response
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.knowledge_component import KnowledgeComponent
from tribler.core.components.libtorrent.libtorrent_component import LibtorrentComponent
from tribler.core.components.metadata_store.metadata_store_component import MetadataStoreComponent
from tribler.core.components.content_discovery.content_discovery_component import ContentDiscoveryComponent
from tribler.core.components.session import Session
from tribler.core.components.socks_servers.socks_servers_component import SocksServersComponent
Expand All @@ -15,8 +14,7 @@

async def test_content_discovery_component(tribler_config):
components = [DatabaseComponent(), SocksServersComponent(), LibtorrentComponent(), TorrentCheckerComponent(),
KnowledgeComponent(), MetadataStoreComponent(), KeyComponent(), Ipv8Component(),
ContentDiscoveryComponent()]
KnowledgeComponent(), KeyComponent(), Ipv8Component(), ContentDiscoveryComponent()]
async with Session(tribler_config, components) as session:
comp = session.get_instance(ContentDiscoveryComponent)
assert comp.community
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import re
from functools import cmp_to_key

from tribler.core.components.metadata_store.category_filter.family_filter import default_xxx_filter
from tribler.core.components.metadata_store.category_filter.init_category import getCategoryInfo
from tribler.core.components.database.category_filter.family_filter import default_xxx_filter
from tribler.core.components.database.category_filter.init_category import getCategoryInfo
from tribler.core.utilities.install_dir import get_lib_path
from tribler.core.utilities.unicode import recursive_unicode

CATEGORY_CONFIG_FILE = get_lib_path() / 'components/metadata_store/category_filter/category.conf'
CATEGORY_CONFIG_FILE = get_lib_path() / 'components/database/category_filter/category.conf'


def cmp_rank(a, b):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

WORDS_REGEXP = re.compile('[a-zA-Z0-9]+')

termfilename = get_lib_path() / 'components' / 'metadata_store' / 'category_filter' / 'filter_terms.filter'
termfilename = get_lib_path() / 'components' / 'database' / 'category_filter' / 'filter_terms.filter'


def initTerms(filename):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# !ACHTUNG! We must first read the line into a file, then release the lock, and only then pass it to regex compiler.
# Otherwise, there is an annoying race condition that reads in an empty file!
with open(get_lib_path() / 'components' / 'metadata_store' / 'category_filter' / 'level2.regex', encoding="utf-8") as f:
with open(get_lib_path() / 'components' / 'database' / 'category_filter' / 'level2.regex', encoding="utf-8") as f:
regex = f.read().strip()
stoplist_expression = re.compile(regex, re.IGNORECASE)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from tribler.core.components.metadata_store.category_filter.category import Category, cmp_rank
from tribler.core.components.metadata_store.category_filter.family_filter import XXXFilter
from tribler.core.components.database.category_filter.category import Category, cmp_rank
from tribler.core.components.database.category_filter.family_filter import XXXFilter


@pytest.fixture(name="xxx_filter")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from tribler.core.components.metadata_store.category_filter.family_filter import XXXFilter
from tribler.core.components.metadata_store.category_filter.l2_filter import is_forbidden
from tribler.core.components.database.category_filter.family_filter import XXXFilter
from tribler.core.components.database.category_filter.l2_filter import is_forbidden


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tribler.core.components.metadata_store.category_filter.category import CATEGORY_CONFIG_FILE
from tribler.core.components.metadata_store.category_filter.init_category import INIT_FUNC_DICT, getCategoryInfo
from tribler.core.components.database.category_filter.category import CATEGORY_CONFIG_FILE
from tribler.core.components.database.category_filter.init_category import INIT_FUNC_DICT, getCategoryInfo


def test_split_list():
Expand Down
48 changes: 46 additions & 2 deletions src/tribler/core/components/database/database_component.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,67 @@
from tribler.core import notifications
from tribler.core.components.component import Component
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor
from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR


class DatabaseComponent(Component):
tribler_should_stop_on_component_error = True

db: TriblerDatabase = None
mds: MetadataStore = None # TODO: legacy, should be merged into ``db``

async def run(self):
await super().run()

db_path = self.session.config.state_dir / STATEDIR_DB_DIR / "tribler.db"
if self.session.config.gui_test_mode:
config = self.session.config
db_path = config.state_dir / STATEDIR_DB_DIR / "tribler.db"
if config.gui_test_mode:
db_path = ":memory:"

self.db = TriblerDatabase(str(db_path))

# TODO: merge the code below into the TriblerDatabase

channels_dir = config.chant.get_path_as_absolute('channels_dir', config.state_dir)
chant_testnet = config.general.testnet or config.chant.testnet

metadata_db_name = 'metadata.db'
if chant_testnet:
metadata_db_name = 'metadata_testnet.db'
elif config.gui_test_mode: # Avoid interfering with the main database in test mode
# Note we don't use in-memory database in core test mode, because MDS uses threads,
# and SQLite creates a different in-memory DB for each connection by default.
# To change this behaviour, we have to use url-based SQLite initialization syntax,
# which is not supported by PonyORM yet.
metadata_db_name = 'metadata_gui_test.db'

database_path = config.state_dir / STATEDIR_DB_DIR / metadata_db_name

# Make sure that we start with a clean metadata database when in GUI mode every time.
if config.gui_test_mode and database_path.exists():
self.logger.info("Wiping metadata database in GUI test mode")
database_path.unlink(missing_ok=True)

key_component = await self.require_component(KeyComponent)

metadata_store = MetadataStore(
database_path,
channels_dir,
key_component.primary_key,
notifier=self.session.notifier,
disable_sync=config.gui_test_mode,
tag_processor_version=KnowledgeRulesProcessor.version
)
self.mds = metadata_store
self.session.notifier.add_observer(notifications.torrent_metadata_added,
metadata_store.TorrentMetadata.add_ffa_from_dict)

async def shutdown(self):
await super().shutdown()
if self.db:
self.db.shutdown()
if self.mds:
self.mds.shutdown()

0 comments on commit 5923954

Please sign in to comment.