In [1]:
import torch
import pandas as pd
import numpy as np
from transformers import DebertaTokenizer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, hamming_loss
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

from bert_dataset import BertDataset
from preprocessing.utils import is_sentence_in_boundaries
from datasets_utils import get_luxury_data, get_tech_data, get_retail_data, get_big_basket_data
from preprocess import preprocess, with_category_features
from deberta import DeBERTa
from bert_dataset import BertDataset
from bert_train import bert_train
from bert_test import bert_test
from utils import (
    tfidf_vectorize,
    w2v_vectorize,
    display_metrics,
    accuracy_ml_score,
    get_splitted_dataset,
)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

[nltk_data] Downloading package stopwords to /home/stepan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/stepan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/stepan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
device

'cuda'

In [3]:
big_basket_data = get_big_basket_data()
luxury_data = get_luxury_data()
tech_data = get_tech_data()
retail_data = get_retail_data()

# datasets = [big_basket_data]
# dataset_names = ['Big basket']
datasets = [big_basket_data, retail_data, luxury_data, tech_data]
dataset_names = ['Big basket', 'Retail', 'Luxury', 'Tech']

# Get datasets with description column preprocessed
big_basket_data['description'] = big_basket_data['description'].apply(preprocess)
tech_data['description'] = tech_data['description'].apply(preprocess)
luxury_data['description'] = luxury_data['description'].apply(preprocess)
retail_data['description'] = retail_data['description'].apply(preprocess)

# Preprocess categories
big_basket_data = with_category_features(big_basket_data)
tech_data = with_category_features(tech_data)
luxury_data = with_category_features(luxury_data)
retail_data = with_category_features(retail_data)

big_basket_data = big_basket_data[big_basket_data['description'].apply(lambda x: is_sentence_in_boundaries(x, max_tokens=200))]
retail_data = retail_data[retail_data['description'].apply(lambda x: is_sentence_in_boundaries(x, max_tokens=250))]
luxury_data = luxury_data[luxury_data['description'].apply(lambda x: is_sentence_in_boundaries(x, max_tokens=100))]
tech_data = tech_data[tech_data['description'].apply(lambda x: is_sentence_in_boundaries(x, max_tokens=200))]

  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 1 if category in x else 0)
  data[category] = data['category'].apply(lambda x: 

In [4]:
MAX_LEN = 512
BATCH_SIZE = 1
EPOCHS = 1
LEARNING_RATE = 1e-5

def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

## Big Basket

In [5]:
big_basket_X_train, big_basket_X_test, big_basket_y_train, big_basket_y_test = get_splitted_dataset(big_basket_data)

big_basket_tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

BIG_BASKET_NUM_CLASSES = len(big_basket_data.columns) - 1

big_basket_train_dataset = BertDataset(big_basket_X_train, MAX_LEN, big_basket_tokenizer, big_basket_y_train)
big_basket_test_dataset = BertDataset(big_basket_X_test, MAX_LEN, big_basket_tokenizer, big_basket_y_test)

big_basket_train_loader = DataLoader(big_basket_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
big_basket_test_loader = DataLoader(big_basket_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

big_basket_deberta_model = DeBERTa(BIG_BASKET_NUM_CLASSES)
big_basket_deberta_model = big_basket_deberta_model.to(device)

big_basket_optimizer = torch.optim.AdamW(params=big_basket_deberta_model.parameters(), lr=LEARNING_RATE)

In [6]:
big_basket_train_result = bert_train(
    model=big_basket_deberta_model,
    train_loader=big_basket_train_loader,
    optimizer=big_basket_optimizer,
    loss_fn=loss_fn,
    epochs=EPOCHS,
    num_classes=BIG_BASKET_NUM_CLASSES,
    device=device,
)

Batch: 500
Epoch: 0, Loss:  0.10997679084539413
Batch: 1000
Epoch: 0, Loss:  0.0956525206565857
Batch: 1500
Epoch: 0, Loss:  0.06453629583120346
Batch: 2000
Epoch: 0, Loss:  0.05032150819897652
Batch: 2500
Epoch: 0, Loss:  0.04961768165230751
Batch: 3000
Epoch: 0, Loss:  0.08449603617191315
Batch: 3500
Epoch: 0, Loss:  0.05415613204240799
Batch: 4000
Epoch: 0, Loss:  0.05776608735322952
Batch: 4500
Epoch: 0, Loss:  0.06503487378358841
Batch: 5000
Epoch: 0, Loss:  0.05860677361488342
Batch: 5500
Epoch: 0, Loss:  0.032551076263189316
Batch: 6000
Epoch: 0, Loss:  0.060763660818338394
Batch: 6500
Epoch: 0, Loss:  0.007163521833717823
Batch: 7000
Epoch: 0, Loss:  0.09875167906284332
Batch: 7500
Epoch: 0, Loss:  0.012125995941460133
Batch: 8000
Epoch: 0, Loss:  0.08352023363113403
Batch: 8500
Epoch: 0, Loss:  0.05259224399924278
Batch: 9000
Epoch: 0, Loss:  0.03458310291171074
Batch: 9500
Epoch: 0, Loss:  0.04687275364995003
Batch: 10000
Epoch: 0, Loss:  0.033162690699100494
Batch: 10500
Epo

In [7]:
big_basket_test_result = bert_test(
    model=big_basket_deberta_model,
    validation_loader=big_basket_test_loader,
    loss_fn=loss_fn,
    num_classes=BIG_BASKET_NUM_CLASSES,
    device=device,
)

Batch: 100
Batch: 200
Batch: 300
Batch: 400
Batch: 500
Batch: 600
Batch: 700
Batch: 800
Batch: 900
Batch: 1000
Batch: 1100
Batch: 1200
Batch: 1300
Batch: 1400
Batch: 1500
Batch: 1600
Batch: 1700
Batch: 1800
Batch: 1900
Batch: 2000
Batch: 2100
Batch: 2200
Batch: 2300
Batch: 2400
Batch: 2500
Batch: 2600
Batch: 2700
Batch: 2800
Batch: 2900
Batch: 3000
Batch: 3100
Batch: 3200
Batch: 3300
Batch: 3400
Batch: 3500
Batch: 3600
Batch: 3700
Batch: 3800
Batch: 3900
Batch: 4000
Batch: 4100
Batch: 4200
Batch: 4300
Batch: 4400
Batch: 4500
Batch: 4600
Batch: 4700
Batch: 4800
Batch: 4900
Batch: 5000
Batch: 5100
Batch: 5200


In [8]:
print(f"Accuracy (subset): {torch.tensor(big_basket_test_result['accuracy_subset']).mean()}")
print(f"Accuracy (ML): {torch.tensor(big_basket_test_result['accuracy_adapated']).mean()}")
print(f"Precision (macro): {torch.tensor(big_basket_test_result['macro_precision']).mean()}")
print(f"Precision (micro): {torch.tensor(big_basket_test_result['micro_precision']).mean()}")
print(f"Recall (macro): {torch.tensor(big_basket_test_result['macro_recall']).mean()}")
print(f"Recall (micro): {torch.tensor(big_basket_test_result['micro_recall']).mean()}")
print(f"Hamming loss: {torch.tensor(big_basket_test_result['hamming_loss']).mean()}")

Accuracy (subset): 0.5961429476737976
Accuracy (ML): 0.9908991084544119
Precision (macro): 0.9966203441104177
Precision (micro): 0.8235362702464233
Recall (macro): 0.9942787643439941
Recall (micro): 0.7316758051301444
Hamming loss: 0.009100891649723053


In [9]:
big_basket_test_result['correct_predictions'], BIG_BASKET_NUM_CLASSES

(array([5251., 5288., 5263., 5286., 5263., 5222., 5277., 5177., 5282.,
        5280., 5225., 5205., 5141., 5268., 5250., 5284., 5233., 5179.,
        5271., 5278., 5246., 5200., 5230., 5221., 5266., 5263., 5168.,
        5278., 5258., 5266., 5226., 5249., 5262., 5265., 5241., 5277.,
        5166., 5244., 5256., 5270., 5266., 5274., 5276., 5271., 5274.,
        5278., 5286., 5099., 5227., 5266., 5247., 5265., 5243., 5250.,
        5268., 5228., 5283., 4735., 5281., 5243., 5209., 5268., 5284.,
        5279., 5280., 5230., 5262., 5283., 5289., 5205., 5270., 5189.,
        5271., 5286., 5285., 5286., 5271., 5094., 5255., 5282., 5279.,
        5207., 5282., 5268., 5285., 5275., 5278., 5289., 5211., 5249.,
        5272., 5189., 5276., 5181., 4952., 5217., 5167., 5228., 5279.,
        5247., 5231., 5220., 5273., 5282.]),
 104)

In [10]:
# torch.save(big_basket_deberta_model.state_dict(), "big_basket_deberta_weights.bin")

## Retail

In [5]:
retail_X_train, retail_X_test, retail_y_train, retail_y_test = get_splitted_dataset(retail_data)

retail_tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

RETAIL_NUM_CLASSES = len(retail_data.columns) - 1

retail_train_dataset = BertDataset(retail_X_train, MAX_LEN, retail_tokenizer, retail_y_train)
retail_test_dataset = BertDataset(retail_X_test, MAX_LEN, retail_tokenizer, retail_y_test)

retail_train_loader = DataLoader(retail_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
retail_test_loader = DataLoader(retail_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

retail_deberta_model = DeBERTa(RETAIL_NUM_CLASSES)
retail_deberta_model = retail_deberta_model.to(device)

retail_optimizer = torch.optim.AdamW(params=retail_deberta_model.parameters(), lr=LEARNING_RATE)

In [6]:
retail_train_result = bert_train(
    model=retail_deberta_model,
    train_loader=retail_train_loader,
    optimizer=retail_optimizer,
    loss_fn=loss_fn,
    epochs=EPOCHS,
    num_classes=RETAIL_NUM_CLASSES,
    device=device,
)

Batch: 500
Epoch: 0, Loss:  0.2820425033569336
Batch: 1000
Epoch: 0, Loss:  0.20312553644180298
Batch: 1500
Epoch: 0, Loss:  0.11390887200832367
Batch: 2000
Epoch: 0, Loss:  0.7667176723480225
Batch: 2500
Epoch: 0, Loss:  0.03477907180786133
Batch: 3000
Epoch: 0, Loss:  0.021700913086533546
Batch: 3500
Epoch: 0, Loss:  0.012278025969862938
Batch: 4000
Epoch: 0, Loss:  0.009723354130983353
Batch: 4500
Epoch: 0, Loss:  0.7970298528671265
Batch: 5000
Epoch: 0, Loss:  0.013870775699615479
Batch: 5500
Epoch: 0, Loss:  0.0152785275131464
Batch: 6000
Epoch: 0, Loss:  0.011775610037147999
Batch: 6500
Epoch: 0, Loss:  0.24865658581256866
Batch: 7000
Epoch: 0, Loss:  0.012934030964970589
Batch: 7500
Epoch: 0, Loss:  0.006149649620056152
Batch: 8000
Epoch: 0, Loss:  0.3348850905895233
Batch: 8500
Epoch: 0, Loss:  0.022820016369223595
Batch: 9000
Epoch: 0, Loss:  0.007082012947648764
Batch: 9500
Epoch: 0, Loss:  0.45168548822402954
Batch: 10000
Epoch: 0, Loss:  0.110031358897686
Batch: 10500
Epoch

In [7]:
retail_test_result = bert_test(
    model=retail_deberta_model,
    validation_loader=retail_test_loader,
    loss_fn=loss_fn,
    num_classes=RETAIL_NUM_CLASSES,
    device=device,
)

Batch: 100
Batch: 200
Batch: 300
Batch: 400
Batch: 500
Batch: 600
Batch: 700
Batch: 800
Batch: 900
Batch: 1000
Batch: 1100
Batch: 1200
Batch: 1300
Batch: 1400
Batch: 1500
Batch: 1600
Batch: 1700
Batch: 1800
Batch: 1900
Batch: 2000
Batch: 2100
Batch: 2200
Batch: 2300
Batch: 2400
Batch: 2500
Batch: 2600
Batch: 2700
Batch: 2800
Batch: 2900
Batch: 3000
Batch: 3100
Batch: 3200
Batch: 3300
Batch: 3400
Batch: 3500
Batch: 3600
Batch: 3700


In [8]:
print(f"Accuracy (subset): {torch.tensor(retail_test_result['accuracy_subset']).mean()}")
print(f"Accuracy (ML): {torch.tensor(retail_test_result['accuracy_adapated']).mean()}")
print(f"Precision (macro): {torch.tensor(retail_test_result['macro_precision']).mean()}")
print(f"Precision (micro): {torch.tensor(retail_test_result['micro_precision']).mean()}")
print(f"Recall (macro): {torch.tensor(retail_test_result['macro_recall']).mean()}")
print(f"Recall (micro): {torch.tensor(retail_test_result['micro_recall']).mean()}")
print(f"Hamming loss: {torch.tensor(retail_test_result['hamming_loss']).mean()}")

Accuracy (subset): 0.8153846263885498
Accuracy (ML): 0.9667898844100218
Precision (macro): 0.9847209998239747
Precision (micro): 0.8437112010796222
Recall (macro): 0.9820688845860471
Recall (micro): 0.8200629779577148
Hamming loss: 0.033210113644599915


In [9]:
retail_test_result['correct_predictions'], RETAIL_NUM_CLASSES

(array([3680., 3590., 3680., 3585., 3590., 3616., 3599., 3665., 3665.,
        3572., 3522., 3435., 3588., 3606., 3536., 3601., 3570., 3437.,
        3589., 3586., 3537., 3521., 3615.]),
 23)

In [11]:
# torch.save(retail_deberta_model.state_dict(), "retail_deberta_weights.bin")

## Luxury

In [12]:
luxury_X_train, luxury_X_test, luxury_y_train, luxury_y_test = get_splitted_dataset(luxury_data)

luxury_tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

LUXURY_NUM_CLASSES = len(luxury_data.columns) - 1

luxury_train_dataset = BertDataset(luxury_X_train, MAX_LEN, luxury_tokenizer, luxury_y_train)
luxury_test_dataset = BertDataset(luxury_X_test, MAX_LEN, luxury_tokenizer, luxury_y_test)

luxury_train_loader = DataLoader(luxury_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
luxury_test_loader = DataLoader(luxury_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

luxury_deberta_model = DeBERTa(LUXURY_NUM_CLASSES)
luxury_deberta_model = luxury_deberta_model.to(device)

luxury_optimizer = torch.optim.AdamW(params=luxury_deberta_model.parameters(), lr=LEARNING_RATE)

In [13]:
luxury_train_result = bert_train(
    model=luxury_deberta_model,
    train_loader=luxury_train_loader,
    optimizer=luxury_optimizer,
    loss_fn=loss_fn,
    epochs=EPOCHS,
    num_classes=LUXURY_NUM_CLASSES,
    device=device,
)

Batch: 500
Epoch: 0, Loss:  0.19766664505004883
Batch: 1000
Epoch: 0, Loss:  0.09360241889953613
Batch: 1500
Epoch: 0, Loss:  0.11691860109567642
Batch: 2000
Epoch: 0, Loss:  0.0521564707159996
Batch: 2500
Epoch: 0, Loss:  0.04405416175723076
Batch: 3000
Epoch: 0, Loss:  0.034986503422260284
Batch: 3500
Epoch: 0, Loss:  0.2125275731086731


In [14]:
luxury_test_result = bert_test(
    model=luxury_deberta_model,
    validation_loader=luxury_test_loader,
    loss_fn=loss_fn,
    num_classes=LUXURY_NUM_CLASSES,
    device=device,
)

Batch: 100
Batch: 200
Batch: 300
Batch: 400
Batch: 500
Batch: 600
Batch: 700
Batch: 800
Batch: 900


In [15]:
print(f"Accuracy (subset): {torch.tensor(luxury_test_result['accuracy_subset']).mean()}")
print(f"Accuracy (ML): {torch.tensor(luxury_test_result['accuracy_adapated']).mean()}")
print(f"Precision (macro): {torch.tensor(luxury_test_result['macro_precision']).mean()}")
print(f"Precision (micro): {torch.tensor(luxury_test_result['micro_precision']).mean()}")
print(f"Recall (macro): {torch.tensor(luxury_test_result['macro_recall']).mean()}")
print(f"Recall (micro): {torch.tensor(luxury_test_result['micro_recall']).mean()}")
print(f"Hamming loss: {torch.tensor(luxury_test_result['hamming_loss']).mean()}")

Accuracy (subset): 0.7170010805130005
Accuracy (ML): 0.9894178705430363
Precision (macro): 0.9962928845851402
Precision (micro): 0.8970432946145723
Recall (macro): 0.9931249859578963
Recall (micro): 0.8299894403379092
Hamming loss: 0.01058212947100401


In [16]:
luxury_test_result['correct_predictions'], LUXURY_NUM_CLASSES

(array([932., 930., 910., 942., 942., 933., 940., 945., 919., 943., 945.,
        932., 939., 945., 946., 940., 935., 923., 945., 942., 944., 940.,
        942., 920., 947., 944., 944., 944., 925., 941., 928., 939., 925.,
        929., 936., 944., 944., 942., 930., 917., 946., 944., 930., 936.,
        938., 945., 946.]),
 47)

In [18]:
# torch.save(luxury_deberta_model.state_dict(), "luxury_deberta_weights.bin")

## Tech

In [5]:
tech_X_train, tech_X_test, tech_y_train, tech_y_test = get_splitted_dataset(tech_data)

tech_tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

TECH_NUM_CLASSES = len(tech_data.columns) - 1

tech_train_dataset = BertDataset(tech_X_train, MAX_LEN, tech_tokenizer, tech_y_train)
tech_test_dataset = BertDataset(tech_X_test, MAX_LEN, tech_tokenizer, tech_y_test)

tech_train_loader = DataLoader(tech_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
tech_test_loader = DataLoader(tech_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

tech_deberta_model = DeBERTa(TECH_NUM_CLASSES)
tech_deberta_model = tech_deberta_model.to(device)

tech_optimizer = torch.optim.AdamW(params=tech_deberta_model.parameters(), lr=LEARNING_RATE)

In [6]:
tech_train_result = bert_train(
    model=tech_deberta_model,
    train_loader=tech_train_loader,
    optimizer=tech_optimizer,
    loss_fn=loss_fn,
    epochs=EPOCHS,
    num_classes=TECH_NUM_CLASSES,
    device=device,
)

Batch: 500
Epoch: 0, Loss:  0.08800991624593735
Batch: 1000
Epoch: 0, Loss:  0.043663036078214645
Batch: 1500
Epoch: 0, Loss:  0.0391928106546402
Batch: 2000
Epoch: 0, Loss:  0.026107951998710632
Batch: 2500
Epoch: 0, Loss:  0.04666320979595184
Batch: 3000
Epoch: 0, Loss:  0.03347565978765488
Batch: 3500
Epoch: 0, Loss:  0.02845829911530018
Batch: 4000
Epoch: 0, Loss:  0.01946243643760681
Batch: 4500
Epoch: 0, Loss:  0.024576136842370033
Batch: 5000
Epoch: 0, Loss:  0.03866327553987503
Batch: 5500
Epoch: 0, Loss:  0.02505808137357235
Batch: 6000
Epoch: 0, Loss:  0.02172507531940937
Batch: 6500
Epoch: 0, Loss:  0.04038165509700775
Batch: 7000
Epoch: 0, Loss:  0.02792441099882126
Batch: 7500
Epoch: 0, Loss:  0.030322624370455742
Batch: 8000
Epoch: 0, Loss:  0.03737741336226463
Batch: 8500
Epoch: 0, Loss:  0.024512631818652153
Batch: 9000
Epoch: 0, Loss:  0.025153135880827904
Batch: 9500
Epoch: 0, Loss:  0.022564176470041275
Batch: 10000
Epoch: 0, Loss:  0.02405584417283535
Batch: 10500
E

In [7]:
tech_test_result = bert_test(
    model=tech_deberta_model,
    validation_loader=tech_test_loader,
    loss_fn=loss_fn,
    num_classes=TECH_NUM_CLASSES,
    device=device,
)

Batch: 100
Batch: 200
Batch: 300
Batch: 400
Batch: 500
Batch: 600
Batch: 700
Batch: 800
Batch: 900
Batch: 1000
Batch: 1100
Batch: 1200
Batch: 1300
Batch: 1400
Batch: 1500
Batch: 1600
Batch: 1700
Batch: 1800
Batch: 1900
Batch: 2000
Batch: 2100
Batch: 2200
Batch: 2300
Batch: 2400
Batch: 2500
Batch: 2600
Batch: 2700
Batch: 2800
Batch: 2900
Batch: 3000
Batch: 3100
Batch: 3200
Batch: 3300
Batch: 3400
Batch: 3500
Batch: 3600
Batch: 3700
Batch: 3800
Batch: 3900
Batch: 4000
Batch: 4100
Batch: 4200
Batch: 4300
Batch: 4400
Batch: 4500


In [8]:
print(f"Accuracy (subset): {torch.tensor(tech_test_result['accuracy_subset']).mean()}")
print(f"Accuracy (ML): {torch.tensor(tech_test_result['accuracy_adapated']).mean()}")
print(f"Precision (macro): {torch.tensor(tech_test_result['macro_precision']).mean()}")
print(f"Precision (micro): {torch.tensor(tech_test_result['micro_precision']).mean()}")
print(f"Recall (macro): {torch.tensor(tech_test_result['macro_recall']).mean()}")
print(f"Recall (micro): {torch.tensor(tech_test_result['micro_recall']).mean()}")
print(f"Hamming loss: {torch.tensor(tech_test_result['hamming_loss']).mean()}")

Accuracy (subset): 0.0035257823765277863
Accuracy (ML): 0.9945563228705098
Precision (macro): 0.9998343977509889
Precision (micro): 0.9328632290289407
Recall (macro): 0.9947219251195205
Recall (micro): 0.08561459841759533
Hamming loss: 0.005443676840513945


In [9]:
tech_test_result['correct_predictions'], TECH_NUM_CLASSES

(array([4509., 4535., 4476., 4516., 4533., 4523., 4515., 4537., 4494.,
        4522., 4532., 4533., 4530., 4534., 4537., 4533., 4524., 4538.,
        4499., 4488., 4538., 4516., 4504., 4521., 4516., 4533., 4522.,
        4538., 4528., 4536., 4538., 4537., 4534., 4538., 4499., 4536.,
        4511., 4522., 4529., 4494., 4536., 4529., 4537., 4519., 4538.,
        4508., 4530., 4524., 4490., 4535., 4457., 4534., 4536., 4528.,
        4528., 4530., 4534., 4532., 4525., 4502., 4532., 4529., 4524.,
        4536., 4534., 4537., 4531., 4499., 4534., 4462., 4535., 4538.,
        4527., 4537., 4533., 4528., 4530., 4534., 4525., 4474., 4534.,
        4534., 4531., 4533., 4507., 4323., 4492., 4537., 4470., 4535.,
        4531., 4529., 4518., 4529., 4529., 4479., 4537., 4538., 4503.,
        4535., 4530., 4537., 4530., 4448., 4524., 4529., 4534., 4538.,
        4534., 4397., 4533., 4525., 4538., 4445., 4525., 4313., 4536.,
        4482., 4536., 4510., 4538., 4516., 4354., 4521., 4538., 4518.,
      

In [11]:
# torch.save(tech_deberta_model.state_dict(), "tech_deberta_weights.bin")