diff --git a/asreview/webapp/api/projects.py b/asreview/webapp/api/projects.py index ef6e03681..0a985e1e2 100644 --- a/asreview/webapp/api/projects.py +++ b/asreview/webapp/api/projects.py @@ -40,6 +40,7 @@ from asreview.config import DEFAULT_FEATURE_EXTRACTION from asreview.config import DEFAULT_MODEL from asreview.config import DEFAULT_QUERY_STRATEGY +from asreview.config import LABEL_NA from asreview.config import PROJECT_MODE_EXPLORE from asreview.config import PROJECT_MODE_SIMULATE from asreview.data import ASReviewData @@ -424,7 +425,7 @@ def api_upload_data_to_project(project): # noqa: F401 data = ASReviewData.from_file(data_path_raw) if data.labels is None: - raise ValueError("Import fully labeled dataset.") + raise ValueError("Import partly or fully labeled dataset.") data.df.rename( {data.column_spec["included"]: "debug_label"}, axis=1, inplace=True @@ -809,6 +810,45 @@ def api_random_prior_papers(project): # noqa: F401 } ) + elif subset == "unseen": + # Fetch records that are unseen + unlabeled_indices = as_data.df[as_data.df["debug_label"] == LABEL_NA] \ + .index.values + unlabeled_indices_pool = np.intersect1d(pool, unlabeled_indices) + + if len(unlabeled_indices_pool) == 0: + return jsonify(payload) + elif n > len(unlabeled_indices_pool): + rand_pool_unlabeled = np.random.choice( + unlabeled_indices_pool, + len(unlabeled_indices_pool), + replace=False + ) + else: + rand_pool_unlabeled = np.random.choice( + unlabeled_indices_pool, + n, + replace=False + ) + + try: + unlabeled_records = as_data.record(rand_pool_unlabeled) + except Exception as err: + logging.error(err) + return jsonify(message=f"Failed to load unseen records. {err}"), 500 + + for record in unlabeled_records: + payload["result"].append( + { + "id": int(record.record_id), + "title": record.title, + "abstract": record.abstract, + "authors": record.authors, + "keywords": record.keywords, + "included": None, + "_debug_label": -1, + } + ) else: if len(pool) == 0: return jsonify(payload) @@ -1165,6 +1205,8 @@ def api_export_dataset(project): # read the dataset into a ASReview data object as_data = project.read_data() + as_data.df["debug_label"] = as_data.df["debug_label"].replace(LABEL_NA, None) + # Adding Notes from State file to the exported dataset # Check if exported_notes column already exists due to multiple screenings screening = 0 diff --git a/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js b/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js index 1c646ef41..6fae668ec 100644 --- a/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js +++ b/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js @@ -350,7 +350,7 @@ const ProjectTable = (props) => { return "Oracle"; } if (mode === "explore") { - return "Exploration"; + return "Validation"; } if (mode === "simulate") { return "Simulation"; diff --git a/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js b/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js index 0187d6953..7a4d2bb0e 100644 --- a/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js +++ b/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js @@ -42,8 +42,8 @@ export default function ProjectModeSelect(props) { diff --git a/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js b/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js index 9675e3bd6..77b69d1eb 100644 --- a/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js +++ b/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js @@ -8,14 +8,14 @@ const ExplorationModeBanner = (props) => { props.setExplorationMode(false)} - label="You are reviewing a completely labeled dataset." + label="You are reviewing a (partly) labeled dataset in the validation mode." icon={} iconProps={{ sx: { bgcolor: "transparent" }, }} buttonLabel="Learn more" buttonProps={{ - href: "https://asreview.readthedocs.io/en/latest/lab/exploration.html", + href: "https://asreview.readthedocs.io/en/latest/screening.html#screening-in-validation-mode", target: "_blank", sx: { color: "text.secondary" }, }} diff --git a/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js b/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js index 7fe323dd1..63e4966fb 100644 --- a/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js +++ b/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js @@ -131,7 +131,8 @@ const RecordCard = (props) => { {/* Previous decision alert */} {props.activeRecord._debug_label !== null && ( )} diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js index 19b8f75ba..1ac446ae6 100644 --- a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js +++ b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js @@ -105,10 +105,10 @@ const AddDataset = (props) => { React.useEffect(() => { if (props.mode === projectModes.EXPLORATION) { - setDatasetSource("benchmark"); + setDatasetSource("file"); } if (props.mode !== projectModes.EXPLORATION) { - setDatasetSource("file"); + setDatasetSource("benchmark"); } }, [props.mode]); @@ -190,7 +190,7 @@ const AddDataset = (props) => { {(datasetSource === "file" || datasetSource === "url") && ( The dataset should contain a title and abstract for each record.{" "} - {props.mode !== projectModes.ORACLE + {props.mode !== projectModes.ORACLE || projectModes.EXPLORATION ? "The dataset should contain labels for each record. " : ""} To optimally benefit from the performance of the active learning diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js index 10b2f29ae..5a1eeef48 100644 --- a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js +++ b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js @@ -183,6 +183,7 @@ const PriorRandom = (props) => { diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js index cf49d0818..4b939fa75 100644 --- a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js +++ b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js @@ -85,7 +85,7 @@ const PriorUnlabeled = (props) => { }; }), }; - }, + } ); } else { // update cached data @@ -112,11 +112,11 @@ const PriorUnlabeled = (props) => { }; }), }; - }, + } ); } }, - }, + } ); const isDebugInclusion = () => { @@ -140,7 +140,14 @@ const PriorUnlabeled = (props) => { {props.record._debug_label !== null && ( )} diff --git a/asreview/webapp/src/StyledComponents/StyledAlert.js b/asreview/webapp/src/StyledComponents/StyledAlert.js index df4fce9a2..6a3702b37 100644 --- a/asreview/webapp/src/StyledComponents/StyledAlert.js +++ b/asreview/webapp/src/StyledComponents/StyledAlert.js @@ -8,7 +8,7 @@ export function ExplorationModeRecordAlert(props) { className={"fontSize" + props.fontSize?.label} sx={{ borderBottomRightRadius: 0, borderBottomLeftRadius: 0 }} > - Labeled as{" "} + Initially labeled as{" "} { {props.label} - }{" "} - in the dataset + }{props.prior ? "" : ", what would be your decision?"} ); } diff --git a/docs/images/dashboard_project_options.png b/docs/images/dashboard_project_options.png index 36f68d552..dec447768 100644 Binary files a/docs/images/dashboard_project_options.png and b/docs/images/dashboard_project_options.png differ diff --git a/docs/images/project_screening_exploration.png b/docs/images/project_screening_exploration.png deleted file mode 100644 index 047bff2d0..000000000 Binary files a/docs/images/project_screening_exploration.png and /dev/null differ diff --git a/docs/images/project_screening_validation.png b/docs/images/project_screening_validation.png new file mode 100644 index 000000000..0a38af583 Binary files /dev/null and b/docs/images/project_screening_validation.png differ diff --git a/docs/images/setup_prior_knowledge_random_validate.png b/docs/images/setup_prior_knowledge_random_validate.png new file mode 100644 index 000000000..1f7deccbc Binary files /dev/null and b/docs/images/setup_prior_knowledge_random_validate.png differ diff --git a/docs/images/setup_project_info_simulate.png b/docs/images/setup_project_info_simulate.png deleted file mode 100644 index d8e62500e..000000000 Binary files a/docs/images/setup_project_info_simulate.png and /dev/null differ diff --git a/docs/images/setup_project_modes.png b/docs/images/setup_project_modes.png index e95e514b6..47a15a779 100644 Binary files a/docs/images/setup_project_modes.png and b/docs/images/setup_project_modes.png differ diff --git a/docs/source/about.rst b/docs/source/about.rst index afe898079..90de13446 100644 --- a/docs/source/about.rst +++ b/docs/source/about.rst @@ -14,7 +14,8 @@ ASReview LAB implements three different options: - **Oracle:** Screen textual data in interaction with the active learning model. The reviewer is the 'oracle', making the labeling decisions. - **Simulation:** Evaluate the performance of active learning models on fully labeled data. -- **Exploration:** Explore or demonstrate ASReview LAB with a completely labeled dataset. This mode is suitable for teaching purposes. +- **Validation:** Validate labels provided by another screener or derived from an LLM or AI, and explore benchmark datasets without being an oracle. + ASReview LAB is one of the products of the `ASReview research project `_ initiated at Utrecht University, which has @@ -56,7 +57,7 @@ these steps: 3. :doc:`start` 4. :doc:`project_create` 5. :ref:`Import your dataset ` -6. :ref:`project_create:Select Prior Knowledge` +6. :ref:`Select Prior Knowledge ` 7. Select the four components of the :ref:`Active learning model ` (feature extractor, classifier, balancing method, query strategy) 8. Wait until the warm up of the AI is ready (the software is extracting the features and trains the classifier on the prior knowledge) 9. Start :doc:`screening` until you reach your `stopping criterion `__ @@ -86,11 +87,11 @@ Quick start 4. Click *Create* to create a project -5. Select a mode (Oracle, Exploration, Simulation) +5. Select a mode (Oracle, Validation, Simulation) 6. Name the project, and if you want, add an author name(s) and type a description -7. Import a dataset you want to review, or select a benchmark dataset (only available for the Exploration and Simulation mode) +7. Import a dataset you want to review, or select a benchmark dataset (only available for the Validation and Simulation mode) 8. Add prior knowledge. Select at least 1 relevant and 1 irrelevant record to warm up the AI. You can search for a specific record or request random records @@ -174,15 +175,14 @@ encounter as you use ASReview LAB. ASReview LAB and can be :term:`imported` back. Project mode - the project mode includes oracle, simulation, and exploration in + The project mode includes oracle, simulation, and validation in ASReview LAB: **Oracle** mode is used when a :term:`user` reviews a :term:`dataset` systematically with interactive artificial intelligence (AI). - **Exploration** mode is used when a user explores or demonstrates ASReview - LAB with a completely labeled dataset. This mode is suitable for teaching - purposes. + **Validation** mode is used when a user validates existing labels or + engages in a review process without being an oracle **Simulation** mode is used when a user simulates a review on a completely labeled dataset to see the performance of ASReview LAB. @@ -196,11 +196,11 @@ encounter as you use ASReview LAB. :term:`configures the model` and initiates the first iteration of :term:`model` training. - **In Review** refers to the fact that in oracle or exploration mode, + **In Review** refers to the fact that in oracle or validation mode, the user adds labels to :term:`records`, or in simulation mode, the simulation is running. - **Finished** refers to the fact that in oracle or exploration mode, the user + **Finished** refers to the fact that in oracle or validation mode, the user decides to complete the :term:`reviewing` process or has labeled all the records, or in simulation mode, the simulation has been completed. diff --git a/docs/source/conf.py b/docs/source/conf.py index ac77e9c30..2ed2903d7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -254,7 +254,7 @@ "lab/overview_lab": "../about.html", "lab/launch": "../start.html", "lab/oracle": "../about.html", - "lab/exploration": "../about.html", + "lab/exploration": "../screening.html#screening-in-validation-mode", "lab/simulation": "../simulation_overview.html", "features/settings": "../screening.html", "features/pre_screening": "../project_create.html", diff --git a/docs/source/data.rst b/docs/source/data.rst index 491bb65c1..90ed8ea51 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -9,9 +9,8 @@ should be expected that only a fraction of the records in the dataset is relevant. Datasets can be unlabeled as well as :ref:`data_labeled:Partially labeled -data` and :ref:`data_labeled:Fully labeled data`. The latter ones are useful -in the Simulation and Exploration mode. See :ref:`project_create:Project -modes` for more information. +data` and :ref:`data_labeled:Fully labeled data`. +See :ref:`project_create:Project modes` for more information. The easiest way to obtain a dataset is via a search engine or with the help of a reference manager. See :ref:`data:Compatibility` for reference managers diff --git a/docs/source/data_format.rst b/docs/source/data_format.rst index 8ecce5293..5b4923c63 100644 --- a/docs/source/data_format.rst +++ b/docs/source/data_format.rst @@ -62,29 +62,12 @@ is provided, this is also displayed as a clickable link. Note by using ASReview you do *not* automatically have access to full-text and if you do not have access you might want to read this `blog post `__. -**Included** A binary variable indicating the existing labeling decisions with -``0`` = irrelevant/excluded, and ``1`` = relevant/included. Different column -names are allowed, see the table. It can be used for: - -- **Screening**: In ASReview LAB, if labels are available for a part of the - dataset (see :doc:`data_labeled`), the - labels will be automatically detected and used for prior knowledge. The first - iteration of the model will then be based on these decisions and used to - predict relevance scores for the unlabeled part of the data. -- **Exploration**: You can explore a completely labeled dataset in the Exploration - Mode. The relevant/irrelevant label in the dataset will be displayed on each record. - This option is useful for training purposes, presentations, and workshops. -- **Simulation**: In a :doc:`Simulation`, - the column containing the labels is used to simulate a systematic review run. - Only records containing labels are used for the simulation, unlabeled records are ignored. - -.. note:: - - Files exported with ASReview LAB contain the column ``included``. When - re-importing a partly labeled dataset in RIS file format, the labels - stored in the N1 field are used as prior knowledge. When a completely - labeled dataset is re-imported it can be used in the Exploration and - Simulation mode. +**Included** +A binary variable indicating the existing labeling decisions with ``0`` = +irrelevant/excluded, or ``1`` = relevant/included. If no label is present, we +assume the record is ``unseen``. Different column names are allowed, see the +table. The behavior of the labels is different for each mode, +see :doc:`data_labeled`. RIS file format @@ -101,22 +84,24 @@ reader (`rispy `__). Successful import/export depends on a proper data set structure. The complete list of accepted fields and default mapping can be found on the `rispy GitHub page `_. +The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and +``ASReview_not_seen`` are stored with the N1 (Notes) tag, and can be +re-imported into ASReview LAB. The behavior of the labels is different for +each mode, see :doc:`data_labeled`. .. tip:: The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and ``ASReview_not_seen`` are stored with the N1 (Notes) tag. In citation managers Zotero and Endnote the labels can be used for making selections; see the - screenshots or watch the `instruction video `_. + screenshots or watch the `instruction video `_. .. note:: - When re-importing a partly labeled dataset in the the RIS file format, the + When re-importing a partly labeled dataset in the RIS file format, the labels stored in the N1 field are used as prior knowledge. When a completely labeled dataset is re-imported it can be used in the Exploration and - Simulation mode. - - + Simulation mode. .. figure:: ../images/asreview_export_to_zotero_labeled.png :alt: Example record with a labeling decision imported to Zotero diff --git a/docs/source/data_labeled.rst b/docs/source/data_labeled.rst index f23903a67..bc4a7514a 100644 --- a/docs/source/data_labeled.rst +++ b/docs/source/data_labeled.rst @@ -1,82 +1,80 @@ -Fully and partially labeled data -================================ +Fully, partially, and unlabeled data +==================================== Fully and partially labeled datasets serve a special role in the ASReview context. These datasets have review decisions for a subset of the records or -for all records in the dataset. - -.. tip:: - - :ref:`data_labeled:Partially labeled data` is useful in the Oracle - mode, whereas :ref:`data_labeled:Fully labeled data` is useful in the Simulation - and Exploration mode. - - +for all records in the dataset. Label format ------------ - For tabular datasets (:doc:`e.g., CSV, XLSX `), the dataset should contain a column called "included" or "label" (See :ref:`Data format ` for all naming conventions), which is filled with ``1``'s or -``0``'s for the records that are already screened, or selected by experts to -be used for prior knowledge. The value is left empty for the records that you -haven't screened yet, or which are added to the dataset. - - +``0``'s for the records that are already screened. The value is left empty +for the records that you haven't screened yet, or which are added to the +dataset in case of updating a review. For the RIS file format, the labels +``ASReview_relevant``, ``ASReview_irrelevant``, and ``ASReview_not_seen``) +can be stored with the N1(Notes) tag. -For the RIS file format, the labels ``ASReview_relevant``, -``ASReview_irrelevant``, and ``ASReview_not_seen``) can be stored with the N1 -(Notes) tag. An example of a RIS file with labels in the N1 tag can be found -in the `ASReview GitHub repository -`_. -All labels in this example are valid ways to label the data. +Exported files containing labeling decisions can be re-imported into ASReview +LAB whereafter all labels are recognized and its behavior is different for +each mode: +- In **Oracle mode** existing labels are used for prior knowledge. +- In **Validation mode** records are presented along with an indication of their previous labeling status: relevant, irrelevant, or unseen. This status is displayed via a color-coded bar above each record. +- In **Simulation** the column containing the labels is used to simulate a systematic review. -.. note:: - - Exported files containing labeling decisions can be imported into ASReview LAB again, - and whereafter all labels are recognized. +Unlabeled data +-------------- +Unlabeled datasets do not contain any labels and can be used in the **Oracle +mode** to start a review from scratch. Prior knowledge has to be selected in +the :ref:`Prior Knowledge ` step of the project set-up. Partially labeled data ---------------------- -.. tip:: - - Useful for Oracle projects. Read more about :ref:`project_create:Project modes`. - Partially labeled datasets are datasets with a labeling decision for a subset -of the records in the dataset and no decision for another subset. - -A partially labeled dataset can be obtained by exporting results from ASReview -LAB or other software. It can also be constructed given the format described -above by merging a labeled dataset with new unlabeled records. - -Partially labeled datasets are useful as the labels will be recognized by -ASReview LAB as :ref:`Prior Knowledge `, and labels are used to -train the first iteration of the active learning model. +of the records in the dataset and no decision for another subset. + +In **Oracle mode**, if labels are available for a part of the dataset, the +labels will be automatically detected and used for :ref:`Prior Knowledge +`. The first iteration of the model +will then be based on these decisions and used to predict relevance scores +for the unlabeled part of the data. It is usefull when a large number of +records is needed for training, or when updating a systematic review, or to +continue the screening process with `model switching `_. + +In **Validation mode**, the labels available are presented in the review +screen along with an indication of their previous labeling status: relevant, +irrelevant, or unseen. This status is displayed via a color-coded bar above +each record, and you have the opportunity to refine the dataset by correcting +any potential misclassifications, useful for the quality evaluation(see, for +example, the `SAFE procedure `_). .. note:: Merging labeled with unlabeled data should be done outside ASReview LAB, for - example, with :ref:`data:Citation Managers`. + example, with the `compose `_ function of ASReview Datatools, or via :ref:`data:Citation Managers`. Fully labeled data ------------------ -.. tip:: - - Useful for Simulation and Exploration projects. Read more about :ref:`project_create:Project modes`. +Fully labeled datasets are datasets with a labeling decision for all records +in the dataset. -Fully labeled datasets are datasets with a labeling decision for each record in -the dataset. Fully labeled datasets are useful for exploration or simulation -purposes (see also :ref:`simulation_overview:What is a simulation?` and -:ref:`project_create:Project modes`). +In **Simulation mode**, the labels are used for mimicking the review proces +for a :doc:`Simulation study`. Only records containing +labels are used for the simulation, unlabeled records are ignored. +In **Validation mode**, the labels available in a fully labeled dataset are +presented in the review screen along with an indication of their previous +labeling status: relevant or irrelevant. It is usefull to validate labels as +a human when the labels are predicted by a large language model (LLM), like +by ChatGPT. Also, one can use this mode for teaching purporses. Benchmark datasets ~~~~~~~~~~~~~~~~~~ @@ -86,8 +84,9 @@ labeled datasets published open access. The labeled datasets are PRISMA-based systematic reviews or meta-analyses on various research topics. They can be useful for teaching purposes or for testing the performance of (new) active learning models. The datasets and their metadata are available via the -`SYNERGY Dataset `_ repository. In -ASReview LAB, these datasets are found under "Benchmark Datasets". +`SYNERGY Dataset `_ repository. +In ASReview LAB, these datasets are found under "Benchmark Datasets"; only +available for the Validation and Simulation modi. The Benchmark Datasets are directly available in the software. During the :ref:`project_create:Add Dataset` step of the project setup, there is a panel diff --git a/docs/source/example_api_asreview_file.ipynb b/docs/source/example_api_asreview_file.ipynb index 1686a3545..99d4fd4d4 100644 --- a/docs/source/example_api_asreview_file.ipynb +++ b/docs/source/example_api_asreview_file.ipynb @@ -24,7 +24,7 @@ "## Example Data\n", "To illustrate the ASReview Python API, the benchmark dataset `van_de_Schoot_2017` is used. The project file `example.asreview` can be obtained by running ```asreview simulate benchmark:van_de_Schoot_2017 -s example.asreview --seed 101```. \n", "\n", - "The ASReview Python API can be used for project files obtained via the Oracle, Exploration, and Simulation mode. " + "The ASReview Python API can be used for project files obtained via the Oracle, Validation, and Simulation mode. " ] }, { diff --git a/docs/source/project_create.rst b/docs/source/project_create.rst index ed087393b..56da3e329 100644 --- a/docs/source/project_create.rst +++ b/docs/source/project_create.rst @@ -5,7 +5,7 @@ Create a project To start reviewing a dataset with ASReview LAB, you first need to create a project. The project will contain your dataset, settings, labeling decisions, and machine learning models. You can choose from three different project -types: Oracle, Exploration, and Simulation. The project setup consists of +types: Oracle, Validation, and Simulation. The project setup consists of 4 steps: Project information, Data, Model, and Warm up. The sections below explain each of the steps of the setup. @@ -26,17 +26,16 @@ more information on the input fields. After you complete this step, click Project modes ------------- -In this step, you have to select a mode. The default is **Oracle**. Oracle mode -is used to screen an unlabeled dataset with the help of AI (it's fine if you -already have labels, these will used as prior knowledge). The other -two modes, Simulation and Exploration, require fully labeled datasets. They -are useful for teaching purposes or studying the performance of active -learning in a simulation study. +In this step, you have to select a mode. The default is **Oracle**. For a +description of all modi, see :ref:`data_labeled:Fully, partially, and unlabeled data`. +In short, if you want to: -In short: +- screen a dataset from scratch -> Oracle mode with unlabeled data; +- continue screening, for example using a different model -> Oracle mode with partly labeled data; +- validate labels provided by a another screener or predicted by a Large Language Model (e.g., ChatGPT) -> Validation mode with partly or fully labeled data; +- learn how the software with active learning works -> Validation mode with fully labeled data; +- mimic the screening process in a simulation study -> Simulation mode with fully labeled data. -- You have an Unlabeled, or :ref:`data_labeled:Partially labeled data` -> Oracle -- You have a :ref:`data_labeled:Fully labeled data` -> Simulation or Exploration. .. figure:: ../images/setup_project_modes.png :alt: Project modes @@ -50,32 +49,41 @@ example, the name of the screener), and a description. You can edit these values later in the *Details* page. -Data -==== +Data and Prior Knowledge +======================== In Step 2, you import a dataset and select prior knowledge. -Add Dataset +Add dataset ----------- Click on *Add* to select a dataset. The data needs to adhere to a -:doc:`specific format `. - -Depending on the :ref:`Project mode `, you are -offered the following options for adding a dataset. Keep in mind that in Oracle -mode, your dataset is unlabeled or :ref:`data_labeled:Partially labeled data`. For Exploration and Simulation mode, you need :ref:`data_labeled:Fully labeled -data`. +:doc:`specific format `. Keep in mind that in +Oracle mode, your dataset is unlabeled or :ref:`data_labeled:Partially +labeled data`; in Validation mode :ref:`data_labeled:Partially labeled data` or +fully labeled; and for Simulation mode, you need :ref:`data_labeled:Fully +labeled data`. .. tip:: You will benefit most from what active learning has to offer with :ref:`data:High-quality data`. +Depending on the :ref:`Project mode `, you are +offered different options for adding a dataset: From File ~~~~~~~~~ Drag and drop your file or select your file. Click on *Save* on the top right. +.. note:: + After adding your dataset, ASReview LAB shows the approximate number of duplicates. + This number is based on duplicate titles and abstracts and if available, on the Digital Object Identifier (`DOI `_). + Removing duplicates can be done via `ASReview Datatools `_, + which also allows using a persistent identifier (PID) other than DOI for + identifying and removing duplicates. + + From URL or DOI ~~~~~~~~~~~~~~~ @@ -91,29 +99,19 @@ Click on *Add* to add the dataset. From Extension ~~~~~~~~~~~~~~ -Oracle and Exploration only. Select a file available via an extension. Click +Select a file available via an extension (Oracle and Validation only). Click on *Save* on the top right. Benchmark Datasets ~~~~~~~~~~~~~~~~~~ -For Simulation and Exploration only. Select one of the -:ref:`data_labeled:benchmark datasets`. Click +Select one of the +:ref:`data_labeled:benchmark datasets` (Simulation and Validation only). Click on *Save* on the top right. -.. note:: - After adding your dataset, ASReview LAB shows the approximate number of duplicates. - This number is based on duplicate titles and abstracts and if available, on the Digital Object Identifier (`DOI `_). - Removing duplicates can be done via `ASReview Datatools `_, - which also allows using a persistent identifier (PID) other than DOI for - identifying and removing duplicates. - -Select Prior Knowledge ----------------------- - -.. note:: - If you use :ref:`data_labeled:Partially labeled data` you can skip this step. +Prior Knowledge +--------------- The first iteration of the active learning cycle requires training data, referred to as prior knowledge. This knowledge is used by the classifier to @@ -121,11 +119,14 @@ create an initial ranking of the unseen records. In this step, you need to provide a minimum training data set of size two, with **at least** one relevant and one irrelevant labeled record. -To facilitate prior selection, it is possible to search within your dataset. +.. note:: + If you use :ref:`data_labeled:Partially labeled data` in the Oracle mode, you can skip this step, because the labels available in the dataset are used for training the first iteration of the model. + +To facilitate prior selection, it is possible to search within your dataset, or . This is especially useful for finding records that are relevant based on previous studies or expert consensus. -You can also let ASReview LAB present you with random documents. This can be +You can also let ASReview LAB present you with random records. This can be useful for finding irrelevant records. The interface works as follows; on the left, you will see methods to find @@ -160,7 +161,16 @@ The prior knowledge will now show up on the right. There are no restrictions on the number of records and the software already works with 2 labels (1 relevant and 1 irrelevant). -If you are done searching prior knowledge, click *Close*. +The prior knowledge will now show up on the right. Use the buttons to see all +prior knowledge or a subset. You can also change the label or remove the +record from the training set. There are no restrictions on the number of +records you provide, and the software already works with 2 labeled records +(1 relevant and 1 irrelevant). After labeling five randomly selected records, +ASReview LAB will ask you whether you want to stop searching prior knowledge. +Click on *STOP* and click *Next*. + +Inspect the records to be used for training the first iteration of the model, +and if you are done, click *Close*. .. figure:: ../images/setup_prior_search_1rel.png :alt: ASReview prior knowledge search 1 relevant @@ -183,16 +193,13 @@ irrelevant (or relevant). .. figure:: ../images/setup_prior_random_1rel.png :alt: ASReview prior knowledge random -The prior knowledge will now show up on the right. Use the buttons to see all -prior knowledge or irrelevant items. There are no restrictions on the number -of records you provide, and the software already works with 2 labeled -records (1 relevant and 1 irrelevant). - -After labeling five randomly selected records, ASReview LAB will ask you -whether you want to stop searching prior knowledge. Click on *STOP* and -click *Next*. +In the Validation mode when selecting random records, one can choose random +records from the subset of initially labeled relevant, irrelevant or unseen +records. The initial labels are displayed via a color-coded bar above each +record. -If you are done, click *Close*. +.. figure:: ../images/setup_prior_knowledge_random_validate.png + :alt: ASReview prior knowledge selector Model ===== diff --git a/docs/source/screening.rst b/docs/source/screening.rst index ff9230316..03baf92c5 100644 --- a/docs/source/screening.rst +++ b/docs/source/screening.rst @@ -3,7 +3,7 @@ Screening .. note:: - Only for Oracle and Exploration projects. Read more about :ref:`project_create:Project modes`. + Only for Oracle and Validation. Read more about the options for the Simulation mode in the :doc:`simulation_overview`. Introduction @@ -56,16 +56,36 @@ when to stop. consult others, and read the full text (in case of reviewing abstracts of scientific papers) -Screening in Exploration mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Exploration mode is useful for teaching purposes because a blue bar is -displayed on top of the record indicating whether the record has been labeled -relevant or irrelevant in the dataset. You can make the same labeling -decision without the need to be the oracle. - -.. figure:: ../images/project_screening_exploration.png - :alt: ASReview Screening +Screening in Validation mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Validation mode (formerly known as Exploration mode) is tailored for +scenarios where it's necessary to validate existing labels or engage in a +review process without being an oracle. This mode is especially beneficial +for validating labels made by a first screener, reviewing labels predicted by +Large Language Models(LLMs) such as ChatGPT, or for educational and training +purposes. + +In this mode, records are presented along with an indication of their previous +labeling status: relevant, irrelevant, or unseen. This status is displayed +via a color-coded bar above each record. If a record was labeled by another +screener or an AI model, you have the opportunity to validate, or challenge +these labels, helping to refine the dataset by correcting any potential +misclassifications, useful for the quality evaluation of the `SAFE procedure `_. + +Additionally, the Validation mode is useful for educational use. Instructors +and learners can utilize this mode to simulate the screening process without +being the expert decision-maker. This setup is particularly advantageous in +workshop settings, where participants can engage with the screening process +using the labeled `SYNERGY datasets `_. +This hands-on experience +offers valuable insights into the software's functionality and the systematic +review process without the need to be a content expert. For comprehensive +online teaching materials and tutorials on using ASReview LAB +effectively, please visit the `ASReview Academy `_. + +.. figure:: ../images/project_screening_validation.png + :alt: ASReview Screening in Validation Mode Autosave -------- @@ -123,9 +143,6 @@ screening. Most of the time, DOIs point to the full-text of a publication. See :doc:`datasets ` for more information on including DOI and URL values to your datasets. -.. figure:: ../images/screening_full_text.png - :alt: Digital Object Identifier (DOI) - Keyboard shortcuts ------------------ diff --git a/docs/source/simulation_webapp.rst b/docs/source/simulation_webapp.rst index 7163e84f7..3b8ba7a6c 100644 --- a/docs/source/simulation_webapp.rst +++ b/docs/source/simulation_webapp.rst @@ -8,7 +8,7 @@ straightforward. In this section, some of the differences are highlighted. In the step on *Project Information*, select the "Simulation" mode (see figure below). -.. figure:: ../images/setup_project_info_simulate.png +.. figure:: ../images/setup_project_modes.png :alt: ASReview LAB simulate option In the step *Data*, import a :ref:`fully labeled dataset ` @@ -22,7 +22,7 @@ irrelevant records. Label some relevant and some irrelevant records. .. figure:: ../images/setup_prior_knowledge_random_simulate.png :alt: ASReview LAB Prior selection for simulation study -The step *Warm up* is differs slightly from the Oracle and Exploration mode. +The step *Warm up* is differs slightly from the Oracle and Validation mode. This step starts the simulation, after some seconds, it will return "Got it". This means, the simulation runs further in the background. You are returned to the Analytics page.