Skip to content

Commit

Permalink
Fix SYNERGY visible in Oracle mode setup (#1556)
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 committed Nov 2, 2023
1 parent 0444692 commit 58a49b2
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 66 deletions.
10 changes: 9 additions & 1 deletion asreview/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import socket
import tempfile
import warnings
from abc import ABC
from abc import abstractmethod
from pathlib import Path
Expand Down Expand Up @@ -737,14 +738,21 @@ def __init__(self):

class BenchmarkDataGroup(BaseDataGroup):
"""Datasets available in the benchmark platform.
Deprecated
"""

group_id = "benchmark"
description = "DEPRECATED: Datasets available in the online benchmark platform"

def __init__(self):

warnings.warn(
"The use of 'benchmark' datasets is deprecated, "
"use SYNERGY dataset instead. For more information, see "
"https://github.com/asreview/synergy-dataset.",
category=UserWarning
)

meta_file = "https://raw.githubusercontent.com/asreview/systematic-review-datasets/master/index_v1.json" # noqa
datasets = _download_from_metadata(meta_file)

Expand Down
23 changes: 2 additions & 21 deletions asreview/entry_points/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,6 @@
from asreview.utils import get_random_state


def _get_dataset_path_from_args(args_dataset):
"""Remove 'benchmark:' from the dataset name and add .csv suffix.
Parameters
----------
args_dataset : str
Name of the dataset.
Returns
-------
str
Dataset name without 'benchmark:' if it started with that,
and with .csv suffix.
"""
if args_dataset.startswith("benchmark:"):
args_dataset = args_dataset[10:]

return Path(args_dataset).with_suffix(".csv").name


def _set_log_verbosity(verbose):
if verbose == 0:
logging.getLogger().setLevel(logging.WARNING)
Expand Down Expand Up @@ -135,7 +115,8 @@ def execute(self, argv): # noqa
)

# Add the dataset to the project file.
dataset_path = _get_dataset_path_from_args(args.dataset)
dataset_path = Path(
args.dataset.replace(":", "-")).with_suffix(".csv").name

as_data.to_file(Path(fp_tmp_simulation, "data", dataset_path))
# Update the project.json.
Expand Down
4 changes: 3 additions & 1 deletion asreview/webapp/api/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,9 @@ def api_demo_data_project(): # noqa: F401
if subset == "plugin":
try:
result_datasets = manager.list(
exclude=["builtin", "benchmark", "benchmark-nature"]
exclude=[
"builtin", "synergy", "benchmark", "benchmark-nature"
]
)

except Exception as err:
Expand Down
2 changes: 1 addition & 1 deletion asreview/webapp/tests/test_api/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# NOTE: I don't see a plugin that can be used for testing
# purposes
UPLOAD_DATA = [
{"benchmark": "benchmark:Hall_2012"},
{"benchmark": "synergy:van_der_Valk_2021"},
{
"url": "https://raw.githubusercontent.com/asreview/"
+ "asreview/master/tests/demo_data/generic_labels.csv"
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Available datasets
.. autosummary::
:toctree: generated/

asreview.datasets.BenchmarkDataGroup
asreview.datasets.SynergyDataGroup
asreview.datasets.NaturePublicationDataGroup

Dataset managers
Expand Down
12 changes: 5 additions & 7 deletions docs/source/simulation_cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,21 @@ Dataset

.. option:: dataset

Required. File path or URL to the dataset or one of the benchmark datasets.
Required. File path or URL to the dataset or one of the SYNERGY datasets.

You can also use one of the :ref:`benchmark-datasets <data_labeled:fully
labeled data>` (see `index.csv
<https://github.com/asreview/systematic-review-datasets/blob/master/index.csv>`_
for dataset IDs). Use the following command and replace ``DATASET_ID`` by the
You can also use one of the :ref:`SYNERGY dataset <data_labeled:fully
labeled data>`. Use the following command and replace ``DATASET_ID`` by the
dataset ID.

.. code:: bash
asreview simulate benchmark:DATASET_ID
asreview simulate synergy:DATASET_ID
For example:

.. code:: bash
asreview simulate benchmark:van_de_Schoot_2017 -s myreview.asreview
asreview simulate synergy:van_de_schoot_2018 -s myreview.asreview
Active learning
Expand Down
2 changes: 1 addition & 1 deletion docs/source/simulation_overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ inspection
Datasets for simulation
-----------------------

Simulations require :ref:`fully labeled datasets <data_labeled:fully labeled data>` (labels: ``0`` = irrelevant, ``1`` = relevant). Such a dataset can be the result of an earlier study. ASReview offers also fully labeled datasets via the `benchmark platform <https://github.com/asreview/systematic-review-datasets>`_. These datasets are available via the user interface in the *Data* step of the setup and in the command line with the prefix `benchmark:` (e.g. `benchmark:van_de_schoot_2017`).
Simulations require :ref:`fully labeled datasets <data_labeled:fully labeled data>` (labels: ``0`` = irrelevant, ``1`` = relevant). Such a dataset can be the result of an earlier study. ASReview offers also fully labeled datasets via the `SYNERGY dataset <https://github.com/asreview/synergy-dataset>`_. These datasets are available via the user interface in the *Data* step of the setup and in the command line with the prefix `synergy:` (e.g. `synergy:van_de_schoot_2018`).

.. tip::

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ def get_cmdclass():
".xlsx = asreview.io:ExcelWriter",
],
"asreview.datasets": [
"benchmark = asreview.datasets:BenchmarkDataGroup",
"benchmark-nature = asreview.datasets:NaturePublicationDataGroup",
"synergy = asreview.datasets:SynergyDataGroup",
"benchmark = asreview.datasets:BenchmarkDataGroup",
],
"asreview.models.classifiers": [
"svm = asreview.models.classifiers:SVMClassifier",
Expand Down
5 changes: 0 additions & 5 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ def test_fuzzy_finder(keywords, paper_id):
@mark.parametrize(
"data_name",
[
# datasets from the datasets repo
"benchmark:van_de_Schoot_2017",
"benchmark:Hall_2012",
"benchmark:Cohen_2006_ACEInhibitors",
"benchmark:Bos_2018",
# datasets from the Van de Schoot et al. paper
# https://github.com/asreview/paper-asreview/blob/master/index_v1.json
"benchmark-nature:van_de_Schoot_2017",
Expand Down
15 changes: 0 additions & 15 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,6 @@
from asreview.datasets import NaturePublicationDataGroup


@pytest.mark.parametrize(
"data_id",
[
"benchmark:van_de_Schoot_2017",
"benchmark:Hall_2012",
"benchmark:Cohen_2006_ACEInhibitors",
"benchmark:Bos_2018",
],
)
def test_datasets(data_id):
data = DatasetManager().find(data_id)
assert data.filepath.startswith("https://raw.githubusercontent.com/asreview/")
assert data.title is not None


def test_group():
group_nature = NaturePublicationDataGroup()

Expand Down
10 changes: 5 additions & 5 deletions tests/test_models.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@

DATASET="van_de_schoot_2017"
DATASET="van_de_schoot_2018"

QUERY_STRATEGIES=('max_random' 'max_uncertainty' 'max' 'uncertainty' 'random')
# ('max_random' 'max_uncertainty' 'max' 'uncertainty' 'random' 'cluster')

for qs in "${QUERY_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $qs --seed 535 --init_seed 535 -s ${DATASET}_${qs}.asreview
asreview simulate synergy:${DATASET} -q $qs --seed 535 --init_seed 535 -s ${DATASET}_${qs}.asreview
asreview plot recall ${DATASET}_${qs}.asreview -o ${DATASET}_${qs}_recall.png
done

Expand All @@ -16,7 +16,7 @@ BALANCE_STRATEGIES=('double' 'simple' 'undersample')

for bs in "${BALANCE_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $bs --seed 535 --init_seed 535 -s ${DATASET}_${bs}.asreview
asreview simulate synergy:${DATASET} -q $bs --seed 535 --init_seed 535 -s ${DATASET}_${bs}.asreview
asreview plot recall ${DATASET}_${bs}.asreview -o ${DATASET}_${bs}_recall.png
done

Expand All @@ -27,7 +27,7 @@ MODELS=('logistic' 'nb' 'rf' 'svm')

for m in "${MODELS[@]}"
do
asreview simulate benchmark:${DATASET} -q $m --seed 535 --init_seed 535 -s ${DATASET}_${m}.asreview
asreview simulate synergy:${DATASET} -q $m --seed 535 --init_seed 535 -s ${DATASET}_${m}.asreview
asreview plot recall ${DATASET}_${m}.asreview -o ${DATASET}_${m}_recall.png
done

Expand All @@ -38,7 +38,7 @@ FEATURE_STRATEGIES=('tfidf')

for fs in "${FEATURE_STRATEGIES[@]}"
do
asreview simulate benchmark:${DATASET} -q $fs --seed 535 --init_seed 535 -s ${DATASET}_${fs}.asreview
asreview simulate synergy:${DATASET} -q $fs --seed 535 --init_seed 535 -s ${DATASET}_${fs}.asreview
asreview plot recall ${DATASET}_${fs}.asreview -o ${DATASET}_${fs}_recall.png
done

Expand Down
7 changes: 0 additions & 7 deletions tests/test_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest

from asreview.entry_points.simulate import SimulateEntryPoint
from asreview.entry_points.simulate import _get_dataset_path_from_args
from asreview.entry_points.simulate import _simulate_parser
from asreview.project import ASReviewProject
from asreview.project import ProjectExistsError
Expand Down Expand Up @@ -337,9 +336,3 @@ def test_is_partial_simulation(tmpdir):
entry_point.execute(argv)

assert _is_partial_simulation(args) # noqa


def test_get_dataset_path_from_args():
assert _get_dataset_path_from_args("test") == "test.csv"
assert _get_dataset_path_from_args("test.ris") == "test.csv"
assert _get_dataset_path_from_args("benchmark:test") == "test.csv"

0 comments on commit 58a49b2

Please sign in to comment.