Skip to content

Commit

Permalink
Merge tag '3.36.2' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Oct 31, 2023
2 parents a309db8 + 61033ec commit 8d59ef1
Show file tree
Hide file tree
Showing 61 changed files with 897 additions and 175 deletions.
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ Change Log
[next] - TBA
------------


[3.36.2] - 2023-10-31
--------------------
##### Enhancements
* oweditdomain: Add variable filter ([#6603](../../pull/6603))
* IO - Change origin attribute when not find on system ([#6555](../../pull/6555))
* Predictions: Output errors ([#6577](../../pull/6577))

##### Bugfixes
* EmbedderCache - Handle cache persisting when no permissions ([#6611](../../pull/6611))


[3.36.1] - 2023-09-22
--------------------
##### Bugfixes
Expand Down Expand Up @@ -1829,7 +1841,8 @@ Change Log
* Initial version based on Python 1.5.2 and Qt 2.3


[next]: https://github.com/biolab/orange3/compare/3.36.1..HEAD
[next]: https://github.com/biolab/orange3/compare/3.36.2..HEAD
[3.36.2]: https://github.com/biolab/orange3/compare/3.36.1...3.36.2
[3.36.1]: https://github.com/biolab/orange3/compare/3.36.0...3.36.1
[3.36.0]: https://github.com/biolab/orange3/compare/3.35.0...3.36.0
[3.35.0]: https://github.com/biolab/orange3/compare/3.34.1...3.35.0
Expand Down
16 changes: 12 additions & 4 deletions Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from Orange.util import Reprable, OrangeDeprecationWarning, wrap_callback, \
dummy_callback


__all__ = ["Learner", "Model", "SklLearner", "SklModel",
"ReprableWithPreprocessors"]

Expand Down Expand Up @@ -465,10 +466,9 @@ def fix_dim(x):
elif prediction.ndim == 2 + multitarget:
value, probs = None, prediction
else:
raise TypeError("model returned a %i-dimensional array",
prediction.ndim)
raise TypeError(f"model returned a {prediction.ndim}-dimensional array")

# Ensure that we have what we need to return; backmapp everything
# Ensure that we have what we need to return; backmap everything
if probs is None and (ret != Model.Value or backmappers is not None):
probs = one_hot_probs(value)
if probs is not None:
Expand Down Expand Up @@ -596,7 +596,15 @@ def fit(self, X, Y, W=None):
def supports_weights(self):
"""Indicates whether this learner supports weighted instances.
"""
return 'sample_weight' in self.__wraps__.fit.__code__.co_varnames
warnings.warn('SklLearner.supports_weights property is deprecated. All '
'subclasses should redefine the supports_weights attribute. '
'The property will be removed in 3.39.',
OrangeDeprecationWarning)
varnames = self.__wraps__.fit.__code__.co_varnames
# scikit-learn often uses decorators on fit()
if hasattr(self.__wraps__.fit, "__wrapped__"):
varnames = varnames + self.__wraps__.fit.__wrapped__.__code__.co_varnames
return 'sample_weight' in varnames

def __getattr__(self, item):
try:
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/gb.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def score(self, data: Table) -> Tuple[np.ndarray, Tuple[Variable]]:
class GBClassifier(SklLearner, _FeatureScorerMixin):
__wraps__ = skl_ensemble.GradientBoostingClassifier
__returns__ = SklModel
supports_weights = True

def __init__(self,
loss="log_loss",
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@

class KNNLearner(KNNBase, SklLearner):
__wraps__ = skl_neighbors.KNeighborsClassifier
supports_weights = False
1 change: 1 addition & 0 deletions Orange/classification/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class LogisticRegressionLearner(SklLearner, _FeatureScorerMixin):
__wraps__ = skl_linear_model.LogisticRegression
__returns__ = LogisticRegressionClassifier
preprocessors = SklLearner.preprocessors
supports_weights = True

def __init__(self, penalty="l2", dual=False, tol=0.0001, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class MLPClassifierWCallback(skl_nn.MLPClassifier, NIterCallbackMixin):

class NNClassificationLearner(NNBase, SklLearner):
__wraps__ = MLPClassifierWCallback
supports_weights = False

def _initialize_wrapped(self):
clf = SklLearner._initialize_wrapped(self)
Expand Down
4 changes: 4 additions & 0 deletions Orange/classification/outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class OneClassSVMLearner(_OutlierLearner):
name = "One class SVM"
__wraps__ = OneClassSVM
preprocessors = SklLearner.preprocessors + [AdaptiveNormalize()]
supports_weights = True

def __init__(self, kernel='rbf', degree=3, gamma="auto", coef0=0.0,
tol=0.001, nu=0.5, shrinking=True, cache_size=200,
Expand All @@ -100,6 +101,7 @@ def __init__(self, kernel='rbf', degree=3, gamma="auto", coef0=0.0,
class LocalOutlierFactorLearner(_OutlierLearner):
__wraps__ = LocalOutlierFactor
name = "Local Outlier Factor"
supports_weights = False

def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30,
metric="minkowski", p=2, metric_params=None,
Expand All @@ -112,6 +114,7 @@ def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30,
class IsolationForestLearner(_OutlierLearner):
__wraps__ = IsolationForest
name = "Isolation Forest"
supports_weights = True

def __init__(self, n_estimators=100, max_samples='auto',
contamination='auto', max_features=1.0, bootstrap=False,
Expand Down Expand Up @@ -156,6 +159,7 @@ class EllipticEnvelopeLearner(_OutlierLearner):
__wraps__ = EllipticEnvelope
__returns__ = EllipticEnvelopeClassifier
name = "Covariance Estimator"
supports_weights = False

def __init__(self, store_precision=True, assume_centered=False,
support_fraction=None, contamination=0.1,
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def wrap(tree, i):
class RandomForestLearner(SklLearner, _FeatureScorerMixin):
__wraps__ = skl_ensemble.RandomForestClassifier
__returns__ = RandomForestClassifier
supports_weights = True

def __init__(self,
n_estimators=10,
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class SGDClassificationLearner(SklLearner):
__wraps__ = SGDClassifier
__returns__ = LinearModel
preprocessors = SklLearner.preprocessors + [Normalize()]
supports_weights = True

def __init__(self, loss='hinge', penalty='l2', alpha=0.0001,
l1_ratio=0.15, fit_intercept=True, max_iter=5,
Expand Down
1 change: 1 addition & 0 deletions Orange/classification/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ class SklTreeLearner(SklLearner):
__wraps__ = skl_tree.DecisionTreeClassifier
__returns__ = SklTreeClassifier
name = 'tree'
supports_weights = True

def __init__(self, criterion="gini", splitter="best", max_depth=None,
min_samples_split=2, min_samples_leaf=1,
Expand Down
2 changes: 2 additions & 0 deletions Orange/classification/xgb.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def score(self, data: Table) -> Tuple[np.ndarray, Tuple[Variable]]:
class XGBClassifier(XGBBase, Learner, _FeatureScorerMixin):
__wraps__ = xgboost.XGBClassifier
__returns__ = SklModel
supports_weights = True

def __init__(self,
max_depth=None,
Expand Down Expand Up @@ -88,6 +89,7 @@ def __init__(self,
class XGBRFClassifier(XGBBase, Learner, _FeatureScorerMixin):
__wraps__ = xgboost.XGBRFClassifier
__returns__ = SklModel
supports_weights = True

def __init__(self,
max_depth=None,
Expand Down
14 changes: 5 additions & 9 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import xlsxwriter
import openpyxl

from Orange.data import _io, Table, Domain, ContinuousVariable
from Orange.data import _io, Table, Domain, ContinuousVariable, update_origin
from Orange.data import Compression, open_compressed, detect_encoding, \
isnastr, guess_data_type, sanitize_variable
from Orange.data.io_base import FileFormatBase, Flags, DataTableMixin, PICKLE_PROTOCOL
Expand Down Expand Up @@ -164,21 +164,15 @@ def read(self):
skipinitialspace=True,
)
data = self.data_table(reader)

# TODO: Name can be set unconditionally when/if
# self.filename will always be a string with the file name.
# Currently, some tests pass StringIO instead of
# the file name to a reader.
if isinstance(self.filename, str):
data.name = path.splitext(
path.split(self.filename)[-1])[0]
data.name = path.splitext(path.split(self.filename)[-1])[0]
if error and isinstance(error, UnicodeDecodeError):
pos, endpos = error.args[2], error.args[3]
warning = ('Skipped invalid byte(s) in position '
'{}{}').format(pos,
('-' + str(endpos)) if (endpos - pos) > 1 else '')
warnings.warn(warning)
self.set_table_metadata(self.filename, data)
update_origin(data, self.filename)
return data
except Exception as e:
error = e
Expand Down Expand Up @@ -215,6 +209,7 @@ def read(self):
if not isinstance(table, Table):
raise TypeError("file does not contain a data table")
else:
update_origin(table, self.filename)
return table

@classmethod
Expand Down Expand Up @@ -264,6 +259,7 @@ def read(self):
try:
cells = self.get_cells()
table = self.data_table(cells)
update_origin(table, self.filename)
table.name = path.splitext(path.split(self.filename)[-1])[0]
if self.sheet and len(self.sheets) > 1:
table.name = '-'.join((table.name, self.sheet))
Expand Down
13 changes: 10 additions & 3 deletions Orange/data/io_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class Flags:
('weight', 'w'),
('.+?=.*?', ''), # general key=value attributes
))
_RE_ALL = re.compile(r'^({})$'.format('|'.join(
RE_ALL = re.compile(r'^({})$'.format('|'.join(
filter(None, flatten(ALL.items())))))

def __init__(self, flags):
Expand All @@ -57,7 +57,7 @@ def __init__(self, flags):
self.attributes = {}
for flag in flags or []:
flag = flag.strip()
if self._RE_ALL.match(flag):
if self.RE_ALL.match(flag):
if '=' in flag:
k, v = flag.split('=', 1)
if not Flags._RE_ATTR_UNQUOTED_STR(v):
Expand Down Expand Up @@ -167,8 +167,15 @@ def _header1(cls, headers: List[List[str]]) -> Tuple[List, List, List]:
2) -||- with type and flags prepended, separated by #,
e.g. d#sex,c#age,cC#IQ
"""

def is_flag(x):
return bool(Flags.RE_ALL.match(cls._type_from_flag([x])[0]) or
Flags.RE_ALL.match(cls._flag_from_flag([x])[0]))

flags, names = zip(*[i.split(cls.HEADER1_FLAG_SEP, 1)
if cls.HEADER1_FLAG_SEP in i else ('', i)
if cls.HEADER1_FLAG_SEP in i and
is_flag(i.split(cls.HEADER1_FLAG_SEP)[0])
else ('', i)
for i in headers[0]])
names = list(names)
return names, cls._type_from_flag(flags), cls._flag_from_flag(flags)
Expand Down
82 changes: 79 additions & 3 deletions Orange/data/io_util.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
import os.path
import subprocess
from collections import defaultdict
from typing import Tuple, Optional

import numpy as np
import pandas as pd
from chardet.universaldetector import UniversalDetector

from Orange.data import (
is_discrete_values, MISSING_VALUES, Variable,
DiscreteVariable, StringVariable, ContinuousVariable, TimeVariable,
DiscreteVariable, StringVariable, ContinuousVariable, TimeVariable, Table,
)
from Orange.misc.collections import natural_sorted

__all__ = ["Compression", "open_compressed", "detect_encoding", "isnastr",
"guess_data_type", "sanitize_variable"]
__all__ = [
"Compression",
"open_compressed",
"detect_encoding",
"isnastr",
"guess_data_type",
"sanitize_variable",
"update_origin",
]


class Compression:
Expand Down Expand Up @@ -207,3 +217,69 @@ def mapvalues(arr):
values = [_var.parse(i) for i in orig_values]

return values, var


def _extract_new_origin(attr: Variable, table: Table, lookup_dirs: Tuple[str]) -> Optional[str]:
# origin exists
if os.path.exists(attr.attributes["origin"]):
return attr.attributes["origin"]

# last dir of origin in lookup dirs
dir_ = os.path.basename(os.path.normpath(attr.attributes["origin"]))
for ld in lookup_dirs:
new_dir = os.path.join(ld, dir_)
if os.path.isdir(new_dir):
return new_dir

# all column paths in lookup dirs
for ld in lookup_dirs:
if all(
os.path.exists(os.path.join(ld, attr.str_val(v)))
for v in table.get_column(attr)
if v and not pd.isna(v)
):
return ld

return None


def update_origin(table: Table, file_path: str):
"""
When a dataset with file paths in the column is moved to another computer,
the absolute path may not be correct. This function updates the path for all
columns with an "origin" attribute.
The process consists of two steps. First, we identify directories to search
for files, and in the second step, we check if paths exist.
Lookup directories:
1. The directory where the file from file_path is placed
2. The parent directory of 1. The situation when the user places dataset
file in the directory with files (for example, workflow in a directory
with images)
Possible situations for file search:
1. The last directory of origin (basedir) is in one of the lookup directories
2. Origin doesn't exist in any lookup directories, but paths in a column can
be found in one of the lookup directories. This is usually a situation
when paths in a column are complex (e.g. a/b/c/d/file.txt).
Note: This function updates the existing table
Parameters
----------
table
Orange Table to be updated if origin exits in any column
file_path
Path of the loaded dataset for reference. Only paths inside datasets
directory or its parent directory will be considered for new origin.
"""
file_dir = os.path.dirname(file_path)
parent_dir = os.path.dirname(file_dir)
# if file_dir already root file_dir == parent_dir
lookup_dirs = tuple({file_dir: 0, parent_dir: 0})
for attr in table.domain.metas:
if "origin" in attr.attributes and (attr.is_string or attr.is_discrete):
new_orig = _extract_new_origin(attr, table, lookup_dirs)
if new_orig:
attr.attributes["origin"] = new_orig
12 changes: 7 additions & 5 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,7 @@ def __repr__(self):
return s

@classmethod
def concatenate(cls, tables, axis=0):
def concatenate(cls, tables, axis=0, *, ignore_domains=None):
"""
Concatenate tables into a new table, either vertically or horizontally.
Expand All @@ -1346,14 +1346,15 @@ def concatenate(cls, tables, axis=0):
"""
if axis not in (0, 1):
raise ValueError("invalid axis")
if ignore_domains is not None and axis != 0:
raise ValueError("'ignore_domains' is incompatible with 'axis=1'")
if not tables:
raise ValueError('need at least one table to concatenate')

if len(tables) == 1:
return tables[0].copy()

if axis == 0:
conc = cls._concatenate_vertical(tables)
conc = cls._concatenate_vertical(tables, bool(ignore_domains))
else:
conc = cls._concatenate_horizontal(tables)

Expand All @@ -1368,7 +1369,7 @@ def concatenate(cls, tables, axis=0):
return conc

@classmethod
def _concatenate_vertical(cls, tables):
def _concatenate_vertical(cls, tables, ignore_domains=False):
def vstack(arrs):
return [np, sp][any(sp.issparse(arr) for arr in arrs)].vstack(arrs)

Expand All @@ -1387,7 +1388,8 @@ def collect(attr):
return [getattr(arr, attr) for arr in tables]

domain = tables[0].domain
if any(table.domain != domain for table in tables):
if not ignore_domains \
and any(table.domain != domain for table in tables):
raise ValueError('concatenated tables must have the same domain')

conc = cls.from_numpy(
Expand Down

0 comments on commit 8d59ef1

Please sign in to comment.