Skip to content

Commit

Permalink
Merge pull request #899 from endlessm/890-preload-offline
Browse files Browse the repository at this point in the history
Skip downloading when all content available
  • Loading branch information
manuq committed Oct 27, 2023
2 parents 2a12060 + 9884ee9 commit 4abbef8
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 17 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ tomli = "*"
pytest = "~=7.4"
pytest-django = "~=3.10"
pytest-rerunfailures = "~=12.0"
requests-mock = "~=1.11"

[packages]
nodeenv = "==1.3.3"
Expand Down
18 changes: 17 additions & 1 deletion Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 47 additions & 8 deletions kolibri_explore_plugin/collectionviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,26 @@ def __init__(self):
self.available = None
super().__init__()

def get_latest_channels(self):
"""Return set of channel id and latest version tuples"""
return {
(channel_id, max(self.get_channel_versions(channel_id)))
for channel_id in self.get_channel_ids()
}

def get_extra_channel_ids(self):
all_channel_ids = _get_channel_ids_for_all_content_manifests(
self.language
)
return all_channel_ids.difference(self.get_channel_ids())

def get_latest_extra_channels(self):
"""Return set of extra channel id and latest version tuples"""
all_channels = _get_latest_channels_for_all_content_manifests(
self.language
)
return all_channels.difference(self.get_latest_channels())

def read_from_static_collection(
self, grade, name, language, validate=False
):
Expand Down Expand Up @@ -134,20 +148,34 @@ def get_channelimport_tasks(self):
For all the channels in this content manifest.
"""
return [
get_remotechannelimport_task(channel_id)
for channel_id in self.get_channel_ids()
]
tasks = []
for channel_id, channel_version in self.get_latest_channels():
metadata = get_channel_metadata(channel_id)
if metadata and metadata.version >= channel_version:
logger.debug(
f"Skipping channel import task for {channel_id} since "
"already present"
)
continue
tasks.append(get_remotechannelimport_task(channel_id))
return tasks

def get_extra_channelimport_tasks(self):
"""Return a serializable object to create extra channelimport tasks
For all channels featured in Endless Key content manifests.
"""
return [
get_remotechannelimport_task(channel_id)
for channel_id in self.get_extra_channel_ids()
]
tasks = []
for channel_id, channel_version in self.get_latest_extra_channels():
metadata = get_channel_metadata(channel_id)
if metadata and metadata.version >= channel_version:
logger.debug(
f"Skipping extra channel import task for {channel_id} "
"since already present"
)
continue
tasks.append(get_remotechannelimport_task(channel_id))
return tasks

def get_contentimport_tasks(self):
"""Return a serializable object to create contentimport tasks
Expand Down Expand Up @@ -612,6 +640,17 @@ def _get_channel_ids_for_all_content_manifests(language):
return channel_ids


def _get_latest_channels_for_all_content_manifests(language):
"""Return set of all channel id and latest version tuples"""
channels = {}
for content_manifest in _content_manifests_by_language[language]:
for channel_id in content_manifest.get_channel_ids():
version = max(content_manifest.get_channel_versions(channel_id))
if version > channels.get(channel_id, -1):
channels[channel_id] = version
return set(channels.items())


@api_view(["GET"])
def get_collection_info(request):
"""Return the collection metadata and availability."""
Expand Down
17 changes: 11 additions & 6 deletions kolibri_explore_plugin/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ class TaskType:


def get_channel_metadata(channel_id):
return ChannelMetadata.objects.get(id=channel_id)
"""Returns the ChannelMetadata object or None if it doesn't exist"""
try:
return ChannelMetadata.objects.get(id=channel_id)
except ChannelMetadata.DoesNotExist:
return None


def get_applyexternaltags_task(node_id, tags):
Expand All @@ -47,12 +51,11 @@ def get_remotechannelimport_task(channel_id, channel_name=None):
if not channel_name:
# Try to get the channel name from an existing channel database,
# but this will fail on first import.
try:
channel_metadata = get_channel_metadata(channel_id)
except ChannelMetadata.DoesNotExist:
channel_name = "unknown"
else:
channel_metadata = get_channel_metadata(channel_id)
if channel_metadata:
channel_name = channel_metadata.name
else:
channel_name = "unknown"
return {
"task": TaskType.REMOTECHANNELIMPORT,
"params": {
Expand All @@ -70,6 +73,8 @@ def get_remotecontentimport_task(
):
if not channel_name:
channel_metadata = get_channel_metadata(channel_id)
if not channel_metadata:
raise ValueError(f"Channel {channel_id} does not exist")
channel_name = channel_metadata.name
return {
"task": TaskType.REMOTECONTENTIMPORT,
Expand Down
45 changes: 45 additions & 0 deletions kolibri_explore_plugin/test/test_collectionviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from itertools import product

import pytest
import requests_mock
from django.core.management import call_command
from django.urls import reverse
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.models import LocalFile
from rest_framework.test import APIClient

from .utils import COLLECTIONSDIR
Expand Down Expand Up @@ -188,3 +191,45 @@ def test_download_manager_clean(facility_user, grade, name):
assert ChannelMetadata.objects.count() == 2 * num_packs
assert ContentNode.objects.filter().count() == 12 * num_packs
assert ContentNode.objects.filter(available=True).count() == 8


@pytest.mark.usefixtures("channel_import_db", "worker", "content_server")
@pytest.mark.django_db
@pytest.mark.parametrize(
("grade", "name"),
product(
collectionviews.COLLECTION_GRADES, collectionviews.COLLECTION_NAMES
),
)
def test_download_manager_preload(facility_user, grade, name):
"""Test collections downloads with preloaded content"""
# Import the channels in full.
manifest = collectionviews._content_manifests_by_grade_name[grade][name]
all_channels = set(
list(manifest.get_channel_ids())
+ list(manifest.get_extra_channel_ids())
)
for channel_id in all_channels:
call_command("importchannel", "network", channel_id)
call_command("importcontent", "--fail-on-error", "network", channel_id)

# Keep track of the number of channels and files downloaded. Make
# sure all channels and files have been downloaded.
num_initial_channels = ChannelMetadata.objects.count()
num_initial_files = LocalFile.objects.filter(available=True).count()
assert num_initial_channels == len(all_channels)
assert LocalFile.objects.filter(available=False).count() == 0

# Run the downloader with requests blocked. Since no URLs are mocked, all
# requests will fail. Since the download manager retries tasks forever, it
# will eventually time out on any request.
with requests_mock.Mocker():
manager = collectionviews.CollectionDownloadManager()
run_download_manager(manager, manifest, facility_user)
wait_for_background_tasks()

# Check that no additional channels or files have been downloaded.
assert ChannelMetadata.objects.count() == num_initial_channels
assert (
LocalFile.objects.filter(available=True).count() == num_initial_files
)
3 changes: 1 addition & 2 deletions kolibri_explore_plugin/test/test_jobs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest
from django.core.management import call_command
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.tasks.job import State

from .utils import wait_for_background_tasks
Expand Down Expand Up @@ -70,7 +69,7 @@ def test_get_remotecontentimport_task():
]

# If the channel doesn't exist an exception will be raised.
with pytest.raises(ChannelMetadata.DoesNotExist):
with pytest.raises(ValueError, match=r"does not exist"):
jobs.get_remotecontentimport_task(channel_id)

# Import the channel and try again with no nodes specified.
Expand Down

0 comments on commit 4abbef8

Please sign in to comment.