Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed message for missing embeddings #2554

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions deepchecks/nlp/text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ def n_samples(self) -> int:
@property
def embeddings(self) -> pd.DataFrame:
"""Return the metadata of for the dataset."""
if self._embeddings is None:
raise DeepchecksValueError(
'Functionality requires embeddings, but the the TextData object had none. To use this functionality, '
'use the set_embeddings method to set your own embeddings with a numpy.array or use '
'TextData.calculate_builtin_embeddings to add the default deepchecks embeddings.'
)
return self._embeddings

def calculate_builtin_embeddings(self, model: str = 'miniLM', file_path: str = 'embeddings.npy'):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,24 @@
# ----------------------------------------------------------------------------
#
"""Module containing common EmbeddingsDrift Check (domain classifier drift) utils."""
import warnings

import numpy as np
import pandas as pd
from numba import NumbaDeprecationWarning
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from umap import UMAP

from deepchecks.core.check_utils.multivariate_drift_utils import auc_to_drift_score, build_drift_plot
from deepchecks.nlp import TextData
from deepchecks.nlp.utils.nlp_plot import two_datasets_scatter_plot

with warnings.catch_warnings():
warnings.simplefilter(action='ignore', category=NumbaDeprecationWarning)
from umap import UMAP

# Max number of samples to use for dimensionality reduction fit (to make calculation faster):
SAMPLES_FOR_REDUCTION_FIT = 1000

Expand Down