Rebrand Exploration mode to Validation mode (asreview#1617)

Co-authored-by: Jonathan de Bruin <jonathandebruinos@gmail.com>
cskaandorp · Jan 4, 2024 · 3d4167d · 3d4167d
1 parent 6d4a309
commit 3d4167d
Show file tree

Hide file tree

Showing 24 changed files with 229 additions and 172 deletions.
diff --git a/asreview/webapp/api/projects.py b/asreview/webapp/api/projects.py
@@ -40,6 +40,7 @@
 from asreview.config import DEFAULT_FEATURE_EXTRACTION
 from asreview.config import DEFAULT_MODEL
 from asreview.config import DEFAULT_QUERY_STRATEGY
+from asreview.config import LABEL_NA
 from asreview.config import PROJECT_MODE_EXPLORE
 from asreview.config import PROJECT_MODE_SIMULATE
 from asreview.data import ASReviewData
@@ -424,7 +425,7 @@ def api_upload_data_to_project(project):  # noqa: F401
         data = ASReviewData.from_file(data_path_raw)
 
         if data.labels is None:
-            raise ValueError("Import fully labeled dataset.")
+            raise ValueError("Import partly or fully labeled dataset.")
 
         data.df.rename(
             {data.column_spec["included"]: "debug_label"}, axis=1, inplace=True
@@ -809,6 +810,45 @@ def api_random_prior_papers(project):  # noqa: F401
                 }
             )
 
+    elif subset == "unseen":
+        # Fetch records that are unseen
+        unlabeled_indices = as_data.df[as_data.df["debug_label"] == LABEL_NA] \
+            .index.values
+        unlabeled_indices_pool = np.intersect1d(pool, unlabeled_indices)
+
+        if len(unlabeled_indices_pool) == 0:
+            return jsonify(payload)
+        elif n > len(unlabeled_indices_pool):
+            rand_pool_unlabeled = np.random.choice(
+                unlabeled_indices_pool,
+                len(unlabeled_indices_pool),
+                replace=False
+            )
+        else:
+            rand_pool_unlabeled = np.random.choice(
+                unlabeled_indices_pool,
+                n,
+                replace=False
+            )
+
+        try:
+            unlabeled_records = as_data.record(rand_pool_unlabeled)
+        except Exception as err:
+            logging.error(err)
+            return jsonify(message=f"Failed to load unseen records. {err}"), 500
+
+        for record in unlabeled_records:
+            payload["result"].append(
+                {
+                    "id": int(record.record_id),
+                    "title": record.title,
+                    "abstract": record.abstract,
+                    "authors": record.authors,
+                    "keywords": record.keywords,
+                    "included": None,
+                    "_debug_label": -1,
+                }
+            )
     else:
         if len(pool) == 0:
             return jsonify(payload)
@@ -1165,6 +1205,8 @@ def api_export_dataset(project):
         # read the dataset into a ASReview data object
         as_data = project.read_data()
 
+        as_data.df["debug_label"] = as_data.df["debug_label"].replace(LABEL_NA, None)
+
         # Adding Notes from State file to the exported dataset
         # Check if exported_notes column already exists due to multiple screenings
         screening = 0

diff --git a/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js b/asreview/webapp/src/HomeComponents/DashboardComponents/ProjectTable.js
@@ -350,7 +350,7 @@ const ProjectTable = (props) => {
       return "Oracle";
     }
     if (mode === "explore") {
-      return "Exploration";
+      return "Validation";
     }
     if (mode === "simulate") {
       return "Simulation";

diff --git a/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js b/asreview/webapp/src/ProjectComponents/ProjectModeSelect.js
@@ -42,8 +42,8 @@ export default function ProjectModeSelect(props) {
           </MenuItem>
           <MenuItem value={projectModes.EXPLORATION} divider>
             <SelectItem
-              primary="Exploration"
-              secondary="Explore or demonstrate ASReview LAB with a completely labeled dataset"
+              primary="Validation"
+              secondary="Validate labels provided by another screener or derived from an LLM or AI, and explore benchmark datasets"
             />
           </MenuItem>
           <MenuItem value={projectModes.SIMULATION}>

diff --git a/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js b/asreview/webapp/src/ProjectComponents/ReviewComponents/ExplorationModeBanner.js
@@ -8,14 +8,14 @@ const ExplorationModeBanner = (props) => {
       <Banner
         open={props.explorationMode}
         onClose={() => props.setExplorationMode(false)}
-        label="You are reviewing a completely labeled dataset."
+        label="You are reviewing a (partly) labeled dataset in the validation mode."
         icon={<InfoOutlinedIcon sx={{ color: "text.secondary" }} />}
         iconProps={{
           sx: { bgcolor: "transparent" },
         }}
         buttonLabel="Learn more"
         buttonProps={{
-          href: "https://asreview.readthedocs.io/en/latest/lab/exploration.html",
+          href: "https://asreview.readthedocs.io/en/latest/screening.html#screening-in-validation-mode", 
           target: "_blank",
           sx: { color: "text.secondary" },
         }}

diff --git a/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js b/asreview/webapp/src/ProjectComponents/ReviewComponents/RecordCard.js
@@ -131,7 +131,8 @@ const RecordCard = (props) => {
           {/* Previous decision alert */}
           {props.activeRecord._debug_label !== null && (
             <ExplorationModeRecordAlert
-              label={!isDebugInclusion() ? "irrelevant" : "relevant"}
+              label={props.activeRecord._debug_label === -1 ? "not seen" : 
+              !isDebugInclusion() ? "irrelevant" : "relevant"}
               fontSize={props.fontSize}
             />
           )}

diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/AddDataset.js
@@ -105,10 +105,10 @@ const AddDataset = (props) => {
 
   React.useEffect(() => {
     if (props.mode === projectModes.EXPLORATION) {
-      setDatasetSource("benchmark");
+      setDatasetSource("file");
     }
     if (props.mode !== projectModes.EXPLORATION) {
-      setDatasetSource("file");
+      setDatasetSource("benchmark");
     }
   }, [props.mode]);
 
@@ -190,7 +190,7 @@ const AddDataset = (props) => {
             {(datasetSource === "file" || datasetSource === "url") && (
               <Typography variant="body2" sx={{ color: "text.secondary" }}>
                 The dataset should contain a title and abstract for each record.{" "}
-                {props.mode !== projectModes.ORACLE
+                {props.mode !== projectModes.ORACLE || projectModes.EXPLORATION
                   ? "The dataset should contain labels for each record. "
                   : ""}
                 To optimally benefit from the performance of the active learning

diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorRandom.js
@@ -183,6 +183,7 @@ const PriorRandom = (props) => {
                     <Select value={subset} onChange={handleSubsetChange}>
                       <MenuItem value="relevant">relevant</MenuItem>
                       <MenuItem value="irrelevant">irrelevant</MenuItem>
+                      <MenuItem value="unseen">unseen</MenuItem>
                     </Select>
                   </FormControl>
                   <Typography sx={{ color: "text.secondary" }}>

diff --git a/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js b/asreview/webapp/src/ProjectComponents/SetupComponents/DataComponents/PriorUnlabeled.js
@@ -85,7 +85,7 @@ const PriorUnlabeled = (props) => {
                   };
                 }),
               };
-            },
+            }
           );
         } else {
           // update cached data
@@ -112,11 +112,11 @@ const PriorUnlabeled = (props) => {
                   };
                 }),
               };
-            },
+            }
           );
         }
       },
-    },
+    }
   );
 
   const isDebugInclusion = () => {
@@ -140,7 +140,14 @@ const PriorUnlabeled = (props) => {
         <Card elevation={3} className={classes.root}>
           {props.record._debug_label !== null && (
             <ExplorationModeRecordAlert
-              label={!isDebugInclusion() ? "irrelevant" : "relevant"}
+              label={
+                props.record._debug_label === -1
+                  ? "not seen"
+                  : !isDebugInclusion()
+                    ? "irrelevant"
+                    : "relevant"
+              }
+              prior={true}
             />
           )}
           <CardContent className="record-card-content">

diff --git a/asreview/webapp/src/StyledComponents/StyledAlert.js b/asreview/webapp/src/StyledComponents/StyledAlert.js
@@ -8,7 +8,7 @@ export function ExplorationModeRecordAlert(props) {
       className={"fontSize" + props.fontSize?.label}
       sx={{ borderBottomRightRadius: 0, borderBottomLeftRadius: 0 }}
     >
-      Labeled as{" "}
+      Initially labeled as{" "}
       {
         <Box
           className="labeled-as"
@@ -17,8 +17,7 @@ export function ExplorationModeRecordAlert(props) {
         >
           {props.label}
         </Box>
-      }{" "}
-      in the dataset
+      }{props.prior ? "" : ", what would be your decision?"}
     </Alert>
   );
 }
diff --git a/docs/images/dashboard_project_options.png b/docs/images/dashboard_project_options.png
diff --git a/docs/images/project_screening_exploration.png b/docs/images/project_screening_exploration.png
diff --git a/docs/images/project_screening_validation.png b/docs/images/project_screening_validation.png
diff --git a/docs/images/setup_prior_knowledge_random_validate.png b/docs/images/setup_prior_knowledge_random_validate.png
diff --git a/docs/images/setup_project_info_simulate.png b/docs/images/setup_project_info_simulate.png
diff --git a/docs/images/setup_project_modes.png b/docs/images/setup_project_modes.png
diff --git a/docs/source/about.rst b/docs/source/about.rst
@@ -14,7 +14,8 @@ ASReview LAB implements three different options:
 
 - **Oracle:** Screen textual data in interaction with the active learning model. The reviewer is the 'oracle', making the labeling decisions.
 - **Simulation:** Evaluate the performance of active learning models on fully labeled data.
-- **Exploration:** Explore or demonstrate ASReview LAB with a completely labeled dataset. This mode is suitable for teaching purposes.
+- **Validation:** Validate labels provided by another screener or derived from an LLM or AI, and explore benchmark datasets without being an oracle.
+
 
 ASReview LAB is one of the products of the `ASReview research project
 <https://asreview.ai/about/>`_  initiated at Utrecht University, which has
@@ -56,7 +57,7 @@ these steps:
 3. :doc:`start`
 4. :doc:`project_create`
 5. :ref:`Import your dataset <project_create:Add dataset>`
-6. :ref:`project_create:Select Prior Knowledge`
+6. :ref:`Select Prior Knowledge <project_create:Prior Knowledge>`
 7. Select the four components of the :ref:`Active learning model <project_create:Model>` (feature extractor, classifier, balancing method, query strategy)
 8. Wait until the warm up of the AI is ready (the software is extracting the features and trains the classifier on the prior knowledge)
 9. Start :doc:`screening` until you reach your `stopping criterion <https://github.com/asreview/asreview/discussions/557>`__
@@ -86,11 +87,11 @@ Quick start
 
 4. Click *Create* to create a project
 
-5. Select a mode (Oracle, Exploration, Simulation)
+5. Select a mode (Oracle, Validation, Simulation)
 
 6. Name the project, and if you want, add an author name(s) and type a description
 
-7. Import a dataset you want to review, or select a benchmark dataset (only available for the Exploration and Simulation mode)
+7. Import a dataset you want to review, or select a benchmark dataset (only available for the Validation and Simulation mode)
 
 8. Add prior knowledge. Select at least 1 relevant and 1 irrelevant record to warm up the AI. You can search for a specific record or request random records
 
@@ -174,15 +175,14 @@ encounter as you use ASReview LAB.
     ASReview LAB and can be :term:`imported<import>` back.
 
   Project mode
-    the project mode includes oracle, simulation, and exploration in
+    The project mode includes oracle, simulation, and validation in
     ASReview LAB:
 
     **Oracle** mode is used when a :term:`user` reviews a :term:`dataset`
     systematically with interactive artificial intelligence (AI).
 
-    **Exploration** mode is used when a user explores or demonstrates ASReview
-    LAB with a completely labeled dataset. This mode is suitable for teaching
-    purposes.
+    **Validation** mode is used when a user validates existing labels or
+      engages in a review process without being an oracle
 
     **Simulation** mode is used when a user simulates a review on a completely
     labeled dataset to see the performance of ASReview LAB.
@@ -196,11 +196,11 @@ encounter as you use ASReview LAB.
     :term:`configures the model<Model configuration>` and initiates the first
     iteration of :term:`model<Active learning model>` training.
 
-    **In Review** refers to the fact that in oracle or exploration mode,
+    **In Review** refers to the fact that in oracle or validation mode,
     the user adds labels to :term:`records<record>`, or in simulation mode, the
     simulation is running.
 
-    **Finished** refers to the fact that in oracle or exploration mode, the user
+    **Finished** refers to the fact that in oracle or validation mode, the user
     decides to complete the :term:`reviewing` process or has labeled all the
     records, or in simulation mode, the simulation has been completed.
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -254,7 +254,7 @@
     "lab/overview_lab": "../about.html",
     "lab/launch": "../start.html",
     "lab/oracle": "../about.html",
-    "lab/exploration": "../about.html",
+    "lab/exploration": "../screening.html#screening-in-validation-mode",
     "lab/simulation": "../simulation_overview.html",
     "features/settings": "../screening.html",
     "features/pre_screening": "../project_create.html",

diff --git a/docs/source/data.rst b/docs/source/data.rst
@@ -9,9 +9,8 @@ should be expected that only a fraction of the records in the dataset is
 relevant.
 
 Datasets can be unlabeled as well as :ref:`data_labeled:Partially labeled
-data` and :ref:`data_labeled:Fully labeled data`. The latter ones are useful
-in the Simulation and Exploration mode. See :ref:`project_create:Project
-modes` for more information.
+data` and :ref:`data_labeled:Fully labeled data`.
+See :ref:`project_create:Project modes` for more information.
 
 The easiest way to obtain a dataset is via a search engine or with the help of
 a reference manager. See :ref:`data:Compatibility` for reference managers

diff --git a/docs/source/data_format.rst b/docs/source/data_format.rst
@@ -62,29 +62,12 @@ is provided, this is also displayed as a clickable link. Note by
 using ASReview you do *not* automatically have access to full-text and if you do
 not have access you might want to read this `blog post <https://asreview.ai/blog/tools-that-work-well-with-asreview-google-scholar-button/>`__.
 
-**Included** A binary variable indicating the existing labeling decisions with
-``0`` = irrelevant/excluded, and ``1`` = relevant/included. Different column
-names are allowed, see the table. It can be used for:
-
-- **Screening**: In ASReview LAB, if labels are available for a part of the
-  dataset (see :doc:`data_labeled`), the
-  labels will be automatically detected and used for prior knowledge. The first
-  iteration of the model will then be based on these decisions and used to
-  predict relevance scores for the unlabeled part of the data.
-- **Exploration**: You can explore a completely labeled dataset in the Exploration
-  Mode. The relevant/irrelevant label in the dataset will be displayed on each record.
-  This option is useful for training purposes, presentations, and workshops.
-- **Simulation**: In a :doc:`Simulation<simulation_overview/>`,
-  the column containing the labels is used to simulate a systematic review run.
-  Only records containing labels are used for the simulation, unlabeled records are ignored.
-
-.. note::
-
-  Files exported with ASReview LAB contain the column ``included``. When
-  re-importing a partly labeled dataset in RIS file format, the labels
-  stored in the N1 field are used as prior knowledge. When a completely
-  labeled dataset is re-imported it can be used in the Exploration and
-  Simulation mode. 
+**Included** 
+A binary variable indicating the existing labeling decisions with ``0`` =
+irrelevant/excluded, or ``1`` = relevant/included. If no label is present, we
+assume the record is ``unseen``. Different column names are allowed, see the
+table. The behavior of the labels is different for each mode,
+see :doc:`data_labeled`.
 
 
 RIS file format
@@ -101,22 +84,24 @@ reader (`rispy <https://pypi.org/project/rispy/>`__). Successful import/export
 depends on a proper data set structure. The complete list of accepted fields and 
 default mapping can be found on the `rispy GitHub page <https://github.com/MrTango/rispy>`_.
 
+The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and
+``ASReview_not_seen`` are stored with the N1 (Notes) tag, and can be
+re-imported into ASReview LAB. The behavior of the labels is different for
+each mode, see :doc:`data_labeled`.
 
 .. tip:: 
 
   The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and
   ``ASReview_not_seen`` are stored with the N1 (Notes) tag. In citation managers
   Zotero and Endnote the labels can be used for making selections; see the
-  screenshots or watch the `instruction video  <https://www.youtube.com/watch?v=-Rw291AE2OI>`_. 
+  screenshots or watch the `instruction video <https://www.youtube.com/watch?v=-Rw291AE2OI>`_. 
 
 .. note:: 
 
-  When re-importing a partly labeled dataset in the the RIS file format, the
+  When re-importing a partly labeled dataset in the RIS file format, the
   labels stored in the N1 field are used as prior knowledge. When a completely
   labeled dataset is re-imported it can be used in the Exploration and
-  Simulation mode.  
-
-
+  Simulation mode.
 
 .. figure:: ../images/asreview_export_to_zotero_labeled.png
    :alt: Example record with a labeling decision imported to Zotero