In [1]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertModel
from skmultilearn.adapt import MLkNN
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, hamming_loss
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import gensim.downloader as api

from datasets_utils import get_luxury_data, get_tech_data, get_retail_data, get_big_basket_data
from preprocess import preprocess, with_category_features

device = 'cuda' if torch.cuda.is_available() else 'cpu'

[nltk_data] Downloading package stopwords to /home/stepan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/stepan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/stepan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
  return torch._C._cuda_getDeviceCount() > 0


In [None]:
luxury_data = get_luxury_data()
tech_data = get_tech_data()
retail_data = get_retail_data()
big_basket_data = get_big_basket_data()

datasets = [big_basket_data, retail_data, luxury_data, tech_data]
dataset_names = ['Big basket', 'Retail', 'Luxury', 'Tech']

# Get datasets with description column preprocessed
tech_data['description'] = tech_data['description'].apply(preprocess)
luxury_data['description'] = luxury_data['description'].apply(preprocess)
retail_data['description'] = retail_data['description'].apply(preprocess)
big_basket_data['description'] = big_basket_data['description'].apply(preprocess)

# Preprocess categories
tech_data = with_category_features(tech_data)
luxury_data = with_category_features(luxury_data)
retail_data = with_category_features(retail_data)
big_basket_data = with_category_features(big_basket_data)

In [None]:
big_basket_X_train, big_basket_X_test, big_basket_y_train, big_basket_y_test = train_test_split(
    big_basket_data['description'],
    big_basket_data[[column for column in big_basket_data.columns if column != 'description']],
    test_size=0.2,
    random_state=13,
)

### TF-IDF

In [4]:
def tfidf_vectorize(train_data, test_data):
    tfidf = TfidfVectorizer()
    tfidf_train_data = tfidf.fit_transform(train_data)
    tfidf_test_data = tfidf.transform(test_data)
    return tfidf_train_data, tfidf_test_data

In [5]:
big_basket_X_train_tfidf, big_basket_X_test_tfidf = tfidf_vectorize(
    train_data=big_basket_X_train,
    test_data=big_basket_X_test,
)

In [11]:
ml_knn_tfidf_grid_cv = GridSearchCV(
    MLkNN(),
    param_grid={
        'k': range(1,3),
        's': [0.5, 0.7, 1.0]
    },
    scoring={
        'accuracy': make_scorer(accuracy_score),
        'micro_precision': make_scorer(precision_score, average='micro'),
        'macro_precision': make_scorer(precision_score, average='macro'),
        'micro_recall': make_scorer(recall_score, average='micro'),
        'macro_recall': make_scorer(recall_score, average='macro'),
        'hamming_loss': make_scorer(hamming_loss),
    },
    refit='hamming_loss',
    verbose=3,
)

ml_knn_tfidf_grid_cv.fit(big_basket_X_train_tfidf, big_basket_y_train.to_numpy())

Fitting 5 folds for each of 6 candidates, totalling 30 fits


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  36.1s
[CV 2/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  36.7s
[CV 3/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  37.3s
[CV 4/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  37.7s
[CV 5/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  38.4s
[CV 1/5] END k=1, s=



In [13]:
ml_knn_tfidf_grid_cv.best_params_

{'k': 1, 's': 0.5}

In [14]:
ml_knn_tfidf_best = MLkNN(**ml_knn_tfidf_grid_cv.best_params_)
ml_knn_tfidf_best.fit(big_basket_X_train_tfidf, big_basket_y_train.to_numpy())

In [15]:
big_basket_y_pred_tfidf = ml_knn_tfidf_best.predict(big_basket_X_test_tfidf)

In [16]:
print(f"Accuracy: {accuracy_score(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf)}")
print(f"Precision (macro): {precision_score(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf, average='macro')}")
print(f"Precision (micro): {precision_score(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf, average='micro')}")
print(f"Recall (macro): {recall_score(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf, average='macro')}")
print(f"Recall (micro): {recall_score(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf, average='micro')}")
print(f"Hamming loss: {hamming_loss(big_basket_y_test.to_numpy(), big_basket_y_pred_tfidf)}")

Accuracy: 0.7811588921282799
Precision (macro): 0.7291101900788546
Precision (micro): 0.8513535159378504
Recall (macro): 0.7295349992627546
Recall (micro): 0.8519676204215757
Hamming loss: 0.006487931711145997


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Word2Vec

In [22]:
api.info()['models'].keys()

dict_keys(['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis'])

In [23]:
wv = api.load('word2vec-google-news-300')

In [62]:
def to_w2v_embedding(sentence):
    embeddings = []

    for word in sentence.split():
        if word in wv:
            embeddings.append(wv[word])
    embeddings = np.array(embeddings)
    return np.mean(embeddings, axis=0)

In [64]:
big_basket_X_train_w2v = big_basket_X_train.apply(to_w2v_embedding)
big_basket_X_test_w2v = big_basket_X_test.apply(to_w2v_embedding)

In [74]:
ml_knn_w2v_grid_cv = GridSearchCV(
    MLkNN(),
    param_grid={
        'k': range(1,3),
        's': [0.5, 0.7, 1.0]
    },
    scoring={
        'accuracy': make_scorer(accuracy_score),
        'micro_precision': make_scorer(precision_score, average='micro'),
        'macro_precision': make_scorer(precision_score, average='macro'),
        'micro_recall': make_scorer(recall_score, average='micro'),
        'macro_recall': make_scorer(recall_score, average='macro'),
        'hamming_loss': make_scorer(hamming_loss),
    },
    refit='hamming_loss',
    verbose=3,
)

ml_knn_w2v_grid_cv.fit(np.array([x for x in big_basket_X_train_w2v]), big_basket_y_train.to_numpy())

Fitting 5 folds for each of 6 candidates, totalling 30 fits


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  30.0s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  30.6s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  30.7s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  31.4s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=1, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  32.0s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=1, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  30.5s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=1, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  43.8s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=1, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time= 1.2min


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=1, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time= 1.1min


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=1, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time= 1.1min


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=1, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  46.0s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=1, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  28.3s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=1, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.8s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=1, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.9s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=1, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  28.2s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=2, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.8s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=2, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.6s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=2, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  28.2s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=2, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.6s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=2, s=0.5; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.7s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=2, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.6s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=2, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.4s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=2, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.2s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=2, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.5s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=2, s=0.7; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.6s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 1/5] END k=2, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.4s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 2/5] END k=2, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.0s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 3/5] END k=2, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.3s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 4/5] END k=2, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.0s


Traceback (most recent call last):
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 345, in _score
    y_pred = method_caller(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 87, in _cached_call
    result, _ = _get_response_values(
  File "/home/stepan/HSEPythonCourse/thesis/hse_thesis_final/lib/python3.10/site-packages/sklearn/utils/_response.py", line 198, in _get_response_values
    classes = estimator.classes_
AttributeError: 'MLkNN' object has no attribute 'classes_'



[CV 5/5] END k=2, s=1.0; accuracy: (test=nan) hamming_loss: (test=nan) macro_precision: (test=nan) macro_recall: (test=nan) micro_precision: (test=nan) micro_recall: (test=nan) total time=  27.1s


In [75]:
ml_knn_w2v_grid_cv.best_params_

{'k': 1, 's': 0.5}

In [77]:
ml_knn_w2v_best = MLkNN(**ml_knn_w2v_grid_cv.best_params_)
ml_knn_w2v_best.fit(np.array([x for x in big_basket_X_train_w2v]), big_basket_y_train.to_numpy())

In [78]:
big_basket_y_pred_w2v = ml_knn_w2v_best.predict(np.array([x for x in big_basket_X_test_w2v]))

In [79]:
print(f"Accuracy: {accuracy_score(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v)}")
print(f"Precision (macro): {precision_score(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v, average='macro')}")
print(f"Precision (micro): {precision_score(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v, average='micro')}")
print(f"Recall (macro): {recall_score(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v, average='macro')}")
print(f"Recall (micro): {recall_score(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v, average='micro')}")
print(f"Hamming loss: {hamming_loss(big_basket_y_test.to_numpy(), big_basket_y_pred_w2v)}")

Accuracy: 0.7458090379008746
Precision (macro): 0.7000471649716751
Precision (micro): 0.8255589512626669
Recall (macro): 0.6928356405844773
Recall (micro): 0.8227137933798189
Hamming loss: 0.007675838192419825


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### BERT embeddings

In [None]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

In [None]:
class MLDataset(torch.utils.data.Dataset):
    def __init__(self, df, max_len, tokenizer, target_cols):
        super().__init__()
        self.df = df
        self.max_len = max_len
        self.tokenizer = tokenizer
        self.target_cols = target_cols


    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        text = self.df['description'][index]
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            return_tensors='pt',
            return_attention_mask=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'ids': ids.clone().detach().flatten(),
            'mask': mask.clone().detach().flatten(),
            'token_type_ids': token_type_ids.clone().detach().flatten(),
            'targets': torch.tensor(self.df[self.target_cols].values[index], dtype=torch.float)
        }


big_basket_bert_train_dataset = MLDataset(
    pd.concat([
        pd.DataFrame(big_basket_X_train.values, columns=['description']).reset_index().drop(['index'], axis=1),
        big_basket_y_train.reset_index().drop(['index'], axis=1),
    ], axis=1),
    200,
    bert_tokenizer,
    big_basket_y_train.columns.values
)
big_basket_bert_test_dataset = MLDataset(
    pd.concat([
        pd.DataFrame(big_basket_X_test.values, columns=['description']).reset_index().drop(['index'], axis=1),
        big_basket_y_test.reset_index().drop(['index'], axis=1),
    ], axis=1),
    200,
    bert_tokenizer,
    big_basket_y_train.columns.values
)

big_basket_bert_train_loader = torch.utils.data.DataLoader(
    big_basket_bert_train_dataset,
    batch_size=8,
    num_workers=4,
    shuffle=True,
    pin_memory=True
)
big_basket_bert_test_loader = torch.utils.data.DataLoader(
    big_basket_bert_test_dataset,
    batch_size=8,
    num_workers=4,
    shuffle=False,
    pin_memory=True
)

In [None]:
bert_model.eval()

bert_train_embeddings = torch.tensor([]).to(device)

with torch.no_grad():
    for batch_idx, data in enumerate(big_basket_bert_train_loader, 0):
        if ((batch_idx + 1) % 100) == 0:
            print(f"Batch: {batch_idx + 1}")
        ids = data['ids'].to(device, dtype=torch.long)
        mask = data['mask'].to(device, dtype=torch.long)
        output = bert_model(ids, mask)
        bert_train_embeddings = torch.cat((bert_train_embeddings, torch.mean(output[0], axis=1))).to(device)

In [9]:
bert_test_embeddings = torch.tensor([]).to(device)

with torch.no_grad():
    for batch_idx, data in enumerate(big_basket_bert_test_loader, 0):
        if ((batch_idx + 1) % 100) == 0:
            print(f"Batch: {batch_idx + 1}")
        ids = data['ids'].to(device, dtype=torch.long)
        mask = data['mask'].to(device, dtype=torch.long)
        output = bert_model(ids, mask)
        bert_test_embeddings = torch.cat((bert_test_embeddings, torch.mean(output[0], axis=1))).to(device)

Batch: 100
Batch: 200
Batch: 300
Batch: 400
Batch: 500
Batch: 600


In [21]:
bert_test_embeddings.shape

torch.Size([5488, 768])

In [14]:
ml_knn_emb_grid_cv = GridSearchCV(
    MLkNN(),
    param_grid={
        'k': range(1,3),
        's': [0.5, 0.7, 1.0]
    },
    scoring={
        'accuracy': make_scorer(accuracy_score),
        'micro_precision': make_scorer(precision_score, average='micro'),
        'macro_precision': make_scorer(precision_score, average='macro'),
        'micro_recall': make_scorer(recall_score, average='micro'),
        'macro_recall': make_scorer(recall_score, average='macro'),
        'hamming_loss': make_scorer(hamming_loss),
    },
    refit='hamming_loss',
    verbose=3,
)

ml_knn_emb_grid_cv.fit(bert_train_embeddings, big_basket_y_train.to_numpy())

Fitting 5 folds for each of 6 candidates, totalling 30 fits


RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
