Skip to content

Commit

Permalink
Merge branch 'releases/0.x' into bugfixes/resolve-1551
Browse files Browse the repository at this point in the history
  • Loading branch information
David Fidalgo committed Jun 20, 2022
2 parents 62657ca + 9674c19 commit a9dd2ad
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 4 deletions.
2 changes: 1 addition & 1 deletion environment_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies:
- pre-commit==2.15.0
# extra test dependencies
- cleanlab
- datasets>1.17.0,<2.3.0 # TODO: Remove this when 2.3.0 compatibility is resolved
- datasets>1.17.0,<2.3.0 # TODO: push_to_hub fails up to 2.3.2, check patches when they come out eventually
- huggingface_hub != 0.5.0 # some backward comp. problems introduced in 0.5.0
- https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0.tar.gz
- flair==0.10
Expand Down
1 change: 1 addition & 0 deletions frontend/components/text2text/results/RecordText2Text.vue
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ export default {
&:hover {
::v-deep .button-primary--outline {
opacity: 1 !important;
pointer-events: all;
transition: opacity 0.5s ease-in-out 0.2s !important;
}
}
Expand Down
4 changes: 2 additions & 2 deletions frontend/plugins/highlight-search.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export default (context, inject) => {
);
text = htmlText(text);
sortedKeywords.forEach((keyword) => {
const regex = new RegExp(`\\b${keyword}\\b`, "gmi");
const regex = new RegExp(`([^a-zA-ZÀ-ÿ\u00f1\u00d1]|^)${keyword}`, "gmi");
text = text.replace(regex, (match) => htmlHighlightText(match));
});

Expand All @@ -62,7 +62,7 @@ export default (context, inject) => {

const keywordsSpans = function (text, keywords) {
return (keywords || []).flatMap((keyword) => {
const regex = new RegExp(`\\b${keyword}\\b`, "gmi");
const regex = new RegExp(`([^a-zA-ZÀ-ÿ\u00f1\u00d1]|^)${keyword}`, "gmi");
return [...text.matchAll(regex)].map((match) => {
return {
start: match.index,
Expand Down
41 changes: 40 additions & 1 deletion src/rubrix/client/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
import functools
import logging
from typing import Dict, List, Optional, Tuple, Type, Union
from typing import Any, Dict, List, Optional, Tuple, Type, Union

import pandas as pd
from pkg_resources import parse_version
Expand Down Expand Up @@ -516,6 +516,11 @@ def _from_datasets(
row["inputs"] = {
key: val for key, val in row["inputs"].items() if val is not None
}
if row.get("annotation") is not None:
row["annotation"] = cls._parse_annotation_field2(
row["annotation"], dataset.features["annotation"]
)

if row.get("prediction"):
row["prediction"] = (
[
Expand Down Expand Up @@ -544,6 +549,40 @@ def _from_datasets(
records.append(TextClassificationRecord.parse_obj(row))
return cls(records)

@staticmethod
def _parse_annotation_field2(
annotation: Union[str, List[str], int, List[int]],
feature: Optional[Any],
) -> Optional[Union[str, List[str], int, List[int]]]:
"""Helper function to parse the annotation field.
Args:
annotation: The value from the annotation column.
feature: The feature of the annotation column to optionally convert ints to strs.
Returns:
The input value for the annotation field.
"""
import datasets

# extract ClassLabel feature
if isinstance(feature, list):
feature = feature[0]
if isinstance(feature, datasets.Sequence):
feature = feature.feature
if not isinstance(feature, datasets.ClassLabel):
feature = None

if feature is None:
return annotation

try:
return feature.int2str(annotation)
# integers don't have to map to the names ...
# it seems that sometimes -1 is used to denote "no label"
except ValueError:
return None

@classmethod
def _parse_inputs_field(
cls, dataset: "datasets.Dataset", fields: Optional[Union[str, List[str]]]
Expand Down

0 comments on commit a9dd2ad

Please sign in to comment.