Skip to content

Commit

Permalink
Rebrand Exploration mode to Validation mode (asreview#1617)
Browse files Browse the repository at this point in the history
Co-authored-by: Jonathan de Bruin <jonathandebruinos@gmail.com>
  • Loading branch information
Rensvandeschoot and J535D165 committed Jan 4, 2024
1 parent 6d4a309 commit 3d4167d
Show file tree
Hide file tree
Showing 24 changed files with 229 additions and 172 deletions.
44 changes: 43 additions & 1 deletion asreview/webapp/api/projects.py
Expand Up @@ -40,6 +40,7 @@
from asreview.config import DEFAULT_FEATURE_EXTRACTION
from asreview.config import DEFAULT_MODEL
from asreview.config import DEFAULT_QUERY_STRATEGY
from asreview.config import LABEL_NA
from asreview.config import PROJECT_MODE_EXPLORE
from asreview.config import PROJECT_MODE_SIMULATE
from asreview.data import ASReviewData
Expand Down Expand Up @@ -424,7 +425,7 @@ def api_upload_data_to_project(project): # noqa: F401
data = ASReviewData.from_file(data_path_raw)

if data.labels is None:
raise ValueError("Import fully labeled dataset.")
raise ValueError("Import partly or fully labeled dataset.")

data.df.rename(
{data.column_spec["included"]: "debug_label"}, axis=1, inplace=True
Expand Down Expand Up @@ -809,6 +810,45 @@ def api_random_prior_papers(project): # noqa: F401
}
)

elif subset == "unseen":
# Fetch records that are unseen
unlabeled_indices = as_data.df[as_data.df["debug_label"] == LABEL_NA] \
.index.values
unlabeled_indices_pool = np.intersect1d(pool, unlabeled_indices)

if len(unlabeled_indices_pool) == 0:
return jsonify(payload)
elif n > len(unlabeled_indices_pool):
rand_pool_unlabeled = np.random.choice(
unlabeled_indices_pool,
len(unlabeled_indices_pool),
replace=False
)
else:
rand_pool_unlabeled = np.random.choice(
unlabeled_indices_pool,
n,
replace=False
)

try:
unlabeled_records = as_data.record(rand_pool_unlabeled)
except Exception as err:
logging.error(err)
return jsonify(message=f"Failed to load unseen records. {err}"), 500

for record in unlabeled_records:
payload["result"].append(
{
"id": int(record.record_id),
"title": record.title,
"abstract": record.abstract,
"authors": record.authors,
"keywords": record.keywords,
"included": None,
"_debug_label": -1,
}
)
else:
if len(pool) == 0:
return jsonify(payload)
Expand Down Expand Up @@ -1165,6 +1205,8 @@ def api_export_dataset(project):
# read the dataset into a ASReview data object
as_data = project.read_data()

as_data.df["debug_label"] = as_data.df["debug_label"].replace(LABEL_NA, None)

# Adding Notes from State file to the exported dataset
# Check if exported_notes column already exists due to multiple screenings
screening = 0
Expand Down
Expand Up @@ -350,7 +350,7 @@ const ProjectTable = (props) => {
return "Oracle";
}
if (mode === "explore") {
return "Exploration";
return "Validation";
}
if (mode === "simulate") {
return "Simulation";
Expand Down
4 changes: 2 additions & 2 deletions asreview/webapp/src/ProjectComponents/ProjectModeSelect.js
Expand Up @@ -42,8 +42,8 @@ export default function ProjectModeSelect(props) {
</MenuItem>
<MenuItem value={projectModes.EXPLORATION} divider>
<SelectItem
primary="Exploration"
secondary="Explore or demonstrate ASReview LAB with a completely labeled dataset"
primary="Validation"
secondary="Validate labels provided by another screener or derived from an LLM or AI, and explore benchmark datasets"
/>
</MenuItem>
<MenuItem value={projectModes.SIMULATION}>
Expand Down
Expand Up @@ -8,14 +8,14 @@ const ExplorationModeBanner = (props) => {
<Banner
open={props.explorationMode}
onClose={() => props.setExplorationMode(false)}
label="You are reviewing a completely labeled dataset."
label="You are reviewing a (partly) labeled dataset in the validation mode."
icon={<InfoOutlinedIcon sx={{ color: "text.secondary" }} />}
iconProps={{
sx: { bgcolor: "transparent" },
}}
buttonLabel="Learn more"
buttonProps={{
href: "https://asreview.readthedocs.io/en/latest/lab/exploration.html",
href: "https://asreview.readthedocs.io/en/latest/screening.html#screening-in-validation-mode",
target: "_blank",
sx: { color: "text.secondary" },
}}
Expand Down
Expand Up @@ -131,7 +131,8 @@ const RecordCard = (props) => {
{/* Previous decision alert */}
{props.activeRecord._debug_label !== null && (
<ExplorationModeRecordAlert
label={!isDebugInclusion() ? "irrelevant" : "relevant"}
label={props.activeRecord._debug_label === -1 ? "not seen" :
!isDebugInclusion() ? "irrelevant" : "relevant"}
fontSize={props.fontSize}
/>
)}
Expand Down
Expand Up @@ -105,10 +105,10 @@ const AddDataset = (props) => {

React.useEffect(() => {
if (props.mode === projectModes.EXPLORATION) {
setDatasetSource("benchmark");
setDatasetSource("file");
}
if (props.mode !== projectModes.EXPLORATION) {
setDatasetSource("file");
setDatasetSource("benchmark");
}
}, [props.mode]);

Expand Down Expand Up @@ -190,7 +190,7 @@ const AddDataset = (props) => {
{(datasetSource === "file" || datasetSource === "url") && (
<Typography variant="body2" sx={{ color: "text.secondary" }}>
The dataset should contain a title and abstract for each record.{" "}
{props.mode !== projectModes.ORACLE
{props.mode !== projectModes.ORACLE || projectModes.EXPLORATION
? "The dataset should contain labels for each record. "
: ""}
To optimally benefit from the performance of the active learning
Expand Down
Expand Up @@ -183,6 +183,7 @@ const PriorRandom = (props) => {
<Select value={subset} onChange={handleSubsetChange}>
<MenuItem value="relevant">relevant</MenuItem>
<MenuItem value="irrelevant">irrelevant</MenuItem>
<MenuItem value="unseen">unseen</MenuItem>
</Select>
</FormControl>
<Typography sx={{ color: "text.secondary" }}>
Expand Down
Expand Up @@ -85,7 +85,7 @@ const PriorUnlabeled = (props) => {
};
}),
};
},
}
);
} else {
// update cached data
Expand All @@ -112,11 +112,11 @@ const PriorUnlabeled = (props) => {
};
}),
};
},
}
);
}
},
},
}
);

const isDebugInclusion = () => {
Expand All @@ -140,7 +140,14 @@ const PriorUnlabeled = (props) => {
<Card elevation={3} className={classes.root}>
{props.record._debug_label !== null && (
<ExplorationModeRecordAlert
label={!isDebugInclusion() ? "irrelevant" : "relevant"}
label={
props.record._debug_label === -1
? "not seen"
: !isDebugInclusion()
? "irrelevant"
: "relevant"
}
prior={true}
/>
)}
<CardContent className="record-card-content">
Expand Down
5 changes: 2 additions & 3 deletions asreview/webapp/src/StyledComponents/StyledAlert.js
Expand Up @@ -8,7 +8,7 @@ export function ExplorationModeRecordAlert(props) {
className={"fontSize" + props.fontSize?.label}
sx={{ borderBottomRightRadius: 0, borderBottomLeftRadius: 0 }}
>
Labeled as{" "}
Initially labeled as{" "}
{
<Box
className="labeled-as"
Expand All @@ -17,8 +17,7 @@ export function ExplorationModeRecordAlert(props) {
>
{props.label}
</Box>
}{" "}
in the dataset
}{props.prior ? "" : ", what would be your decision?"}
</Alert>
);
}
Binary file modified docs/images/dashboard_project_options.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/images/project_screening_exploration.png
Binary file not shown.
Binary file added docs/images/project_screening_validation.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/images/setup_project_info_simulate.png
Binary file not shown.
Binary file modified docs/images/setup_project_modes.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 10 additions & 10 deletions docs/source/about.rst
Expand Up @@ -14,7 +14,8 @@ ASReview LAB implements three different options:

- **Oracle:** Screen textual data in interaction with the active learning model. The reviewer is the 'oracle', making the labeling decisions.
- **Simulation:** Evaluate the performance of active learning models on fully labeled data.
- **Exploration:** Explore or demonstrate ASReview LAB with a completely labeled dataset. This mode is suitable for teaching purposes.
- **Validation:** Validate labels provided by another screener or derived from an LLM or AI, and explore benchmark datasets without being an oracle.


ASReview LAB is one of the products of the `ASReview research project
<https://asreview.ai/about/>`_ initiated at Utrecht University, which has
Expand Down Expand Up @@ -56,7 +57,7 @@ these steps:
3. :doc:`start`
4. :doc:`project_create`
5. :ref:`Import your dataset <project_create:Add dataset>`
6. :ref:`project_create:Select Prior Knowledge`
6. :ref:`Select Prior Knowledge <project_create:Prior Knowledge>`
7. Select the four components of the :ref:`Active learning model <project_create:Model>` (feature extractor, classifier, balancing method, query strategy)
8. Wait until the warm up of the AI is ready (the software is extracting the features and trains the classifier on the prior knowledge)
9. Start :doc:`screening` until you reach your `stopping criterion <https://github.com/asreview/asreview/discussions/557>`__
Expand Down Expand Up @@ -86,11 +87,11 @@ Quick start
4. Click *Create* to create a project

5. Select a mode (Oracle, Exploration, Simulation)
5. Select a mode (Oracle, Validation, Simulation)

6. Name the project, and if you want, add an author name(s) and type a description

7. Import a dataset you want to review, or select a benchmark dataset (only available for the Exploration and Simulation mode)
7. Import a dataset you want to review, or select a benchmark dataset (only available for the Validation and Simulation mode)

8. Add prior knowledge. Select at least 1 relevant and 1 irrelevant record to warm up the AI. You can search for a specific record or request random records

Expand Down Expand Up @@ -174,15 +175,14 @@ encounter as you use ASReview LAB.
ASReview LAB and can be :term:`imported<import>` back.

Project mode
the project mode includes oracle, simulation, and exploration in
The project mode includes oracle, simulation, and validation in
ASReview LAB:

**Oracle** mode is used when a :term:`user` reviews a :term:`dataset`
systematically with interactive artificial intelligence (AI).

**Exploration** mode is used when a user explores or demonstrates ASReview
LAB with a completely labeled dataset. This mode is suitable for teaching
purposes.
**Validation** mode is used when a user validates existing labels or
engages in a review process without being an oracle

**Simulation** mode is used when a user simulates a review on a completely
labeled dataset to see the performance of ASReview LAB.
Expand All @@ -196,11 +196,11 @@ encounter as you use ASReview LAB.
:term:`configures the model<Model configuration>` and initiates the first
iteration of :term:`model<Active learning model>` training.

**In Review** refers to the fact that in oracle or exploration mode,
**In Review** refers to the fact that in oracle or validation mode,
the user adds labels to :term:`records<record>`, or in simulation mode, the
simulation is running.

**Finished** refers to the fact that in oracle or exploration mode, the user
**Finished** refers to the fact that in oracle or validation mode, the user
decides to complete the :term:`reviewing` process or has labeled all the
records, or in simulation mode, the simulation has been completed.

Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Expand Up @@ -254,7 +254,7 @@
"lab/overview_lab": "../about.html",
"lab/launch": "../start.html",
"lab/oracle": "../about.html",
"lab/exploration": "../about.html",
"lab/exploration": "../screening.html#screening-in-validation-mode",
"lab/simulation": "../simulation_overview.html",
"features/settings": "../screening.html",
"features/pre_screening": "../project_create.html",
Expand Down
5 changes: 2 additions & 3 deletions docs/source/data.rst
Expand Up @@ -9,9 +9,8 @@ should be expected that only a fraction of the records in the dataset is
relevant.

Datasets can be unlabeled as well as :ref:`data_labeled:Partially labeled
data` and :ref:`data_labeled:Fully labeled data`. The latter ones are useful
in the Simulation and Exploration mode. See :ref:`project_create:Project
modes` for more information.
data` and :ref:`data_labeled:Fully labeled data`.
See :ref:`project_create:Project modes` for more information.

The easiest way to obtain a dataset is via a search engine or with the help of
a reference manager. See :ref:`data:Compatibility` for reference managers
Expand Down
41 changes: 13 additions & 28 deletions docs/source/data_format.rst
Expand Up @@ -62,29 +62,12 @@ is provided, this is also displayed as a clickable link. Note by
using ASReview you do *not* automatically have access to full-text and if you do
not have access you might want to read this `blog post <https://asreview.ai/blog/tools-that-work-well-with-asreview-google-scholar-button/>`__.

**Included** A binary variable indicating the existing labeling decisions with
``0`` = irrelevant/excluded, and ``1`` = relevant/included. Different column
names are allowed, see the table. It can be used for:

- **Screening**: In ASReview LAB, if labels are available for a part of the
dataset (see :doc:`data_labeled`), the
labels will be automatically detected and used for prior knowledge. The first
iteration of the model will then be based on these decisions and used to
predict relevance scores for the unlabeled part of the data.
- **Exploration**: You can explore a completely labeled dataset in the Exploration
Mode. The relevant/irrelevant label in the dataset will be displayed on each record.
This option is useful for training purposes, presentations, and workshops.
- **Simulation**: In a :doc:`Simulation<simulation_overview/>`,
the column containing the labels is used to simulate a systematic review run.
Only records containing labels are used for the simulation, unlabeled records are ignored.

.. note::

Files exported with ASReview LAB contain the column ``included``. When
re-importing a partly labeled dataset in RIS file format, the labels
stored in the N1 field are used as prior knowledge. When a completely
labeled dataset is re-imported it can be used in the Exploration and
Simulation mode.
**Included**
A binary variable indicating the existing labeling decisions with ``0`` =
irrelevant/excluded, or ``1`` = relevant/included. If no label is present, we
assume the record is ``unseen``. Different column names are allowed, see the
table. The behavior of the labels is different for each mode,
see :doc:`data_labeled`.


RIS file format
Expand All @@ -101,22 +84,24 @@ reader (`rispy <https://pypi.org/project/rispy/>`__). Successful import/export
depends on a proper data set structure. The complete list of accepted fields and
default mapping can be found on the `rispy GitHub page <https://github.com/MrTango/rispy>`_.

The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and
``ASReview_not_seen`` are stored with the N1 (Notes) tag, and can be
re-imported into ASReview LAB. The behavior of the labels is different for
each mode, see :doc:`data_labeled`.

.. tip::

The labels ``ASReview_relevant``, ``ASReview_irrelevant``, and
``ASReview_not_seen`` are stored with the N1 (Notes) tag. In citation managers
Zotero and Endnote the labels can be used for making selections; see the
screenshots or watch the `instruction video <https://www.youtube.com/watch?v=-Rw291AE2OI>`_.
screenshots or watch the `instruction video <https://www.youtube.com/watch?v=-Rw291AE2OI>`_.

.. note::

When re-importing a partly labeled dataset in the the RIS file format, the
When re-importing a partly labeled dataset in the RIS file format, the
labels stored in the N1 field are used as prior knowledge. When a completely
labeled dataset is re-imported it can be used in the Exploration and
Simulation mode.


Simulation mode.

.. figure:: ../images/asreview_export_to_zotero_labeled.png
:alt: Example record with a labeling decision imported to Zotero
Expand Down

0 comments on commit 3d4167d

Please sign in to comment.