# Data Injestion

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [4]:
# storing data in pandas dataframes
aisles_df = pd.read_csv("data/aisles.csv")
departments_df = pd.read_csv("data/departments.csv")
order_products__prior_df = pd.read_csv("data/order_products__prior.csv")
order_products__train_df = pd.read_csv("data/order_products__train.csv")
orders_df = pd.read_csv("data/orders.csv")
products_df = pd.read_csv("data/products.csv")
sample_submission_df = pd.read_csv("data/sample_submission.csv")


In [5]:
# selecting top 1000 frequently purchased products
req_prod_ids = order_products__prior_df["product_id"].value_counts()[0:1000].index
top_product_df = products_df.iloc[req_prod_ids,:]

In [4]:
# filterings orders based on top 1000 products
order_products__prior_df = order_products__prior_df[order_products__prior_df["product_id"].isin(req_prod_ids)]
order_products__train_df= order_products__train_df[order_products__train_df["product_id"].isin(req_prod_ids)]
order_products__prior_df = order_products__prior_df.merge(products_df[["product_id", "product_name"]],on="product_id",how="left")

In [8]:
# grouing previous order products into a single list for all users
prior_order_prods = order_products__prior_df.groupby('order_id')['product_name'].apply(list).to_frame().reset_index()
prior_order_df = prior_order_prods.merge(orders_df,on="order_id",how="inner")
prior_orders = prior_order_df.groupby('user_id')['product_name'].apply(list).to_frame().reset_index()
prior_orders = prior_orders.rename(columns={"product_name": "prev_orders"})


In [17]:
# grouing current order products into a single list for all users
order_products__train_df = order_products__train_df.merge(products_df[["product_id", "product_name"]],on="product_id",how="left")
current_order_prods = order_products__train_df.groupby('order_id')['product_name'].apply(list).to_frame().reset_index()
current_order_df = orders_df.merge(current_order_prods,on="order_id",how="inner")
orders_df = prior_orders.merge(current_order_df[["user_id","product_name"]],on="user_id",how="inner")
orders_df = orders_df.rename(columns={"product_name": "current_order"})

# Data Pre-processing

In [3]:
import pickle
from sklearn.preprocessing import MultiLabelBinarizer

In [25]:
# converting user purchase history into comma seperated list
orders_df["prev_orders"] = orders_df.apply(lambda x: ', '.join(list(np.concatenate(x["prev_orders"]).flat)), axis=1)
orders_df = orders_df.set_index('user_id')
orders_df = orders_df.drop("current_order",axis=1)

In [42]:
# pickling and reloadings orders dtaframe
with open('/media/drb9/Elements/test_df', 'wb') as f:
    pickle.dump(test_df, f)
orders_df = pd.read_pickle("data/orders_df.pkl")

In [30]:
# converting current products into vetors using one-hot encoding method, this will be used as a label class in training the model 
mlb = MultiLabelBinarizer()
mlb = mlb.fit(list(orders_df["current_order"]))
labels = mlb.transform(list(orders_df["current_order"]))
labels = np.array(labels, dtype=np.int32)
orders_df["labels"] = list(labels)

In [None]:
# spliting input data into train, test, validation sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(orders_df["prev_orders"], orders_df["labels"], test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(test_df["prev_orders"], test_df["labels"], test_size=0.1)

In [12]:
# combining labels to previous orders of each data-point
val_df["labels"] = list(y_val)
val_df = X_val.to_frame()
train_df = x_train.to_frame()
train_df["labels"] = y_train
test_df = x_test.to_frame()
test_df["labels"] = y_test

In [6]:
train_df = pd.read_pickle("train_df")

In [4]:
test_df = pd.read_pickle("test_df")

In [5]:
test_df

Unnamed: 0_level_0,prev_orders,labels
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
33390,"Soda, Large Lemon, Organic Russet Potato, Swee...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
118826,"Large Alfresco Eggs, Smoked Turkey Breast Slic...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
47739,"Vitamin D Whole Milk, Organic Blackberries, Fr...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
119075,"Organic Tomato Cluster, Bag of Organic Bananas...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
199294,"Beef Franks, Scoops! Tortilla Chips, Cola, Str...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...
57796,"Sweet Baguette, Banana, Sweet Baguette, Organi...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
81035,"Fridge Pack Cola, Red Vine Tomato, Dressing, J...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
18474,"Organic Milk, Organic Large Brown Grade AA Cag...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
72982,"Sweet Onion, Jalapeno Peppers, Organic Avocado...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


# Simpletransformer

In [7]:
from simpletransformers.classification import MultiLabelClassificationModel, MultiLabelClassificationArgs
import pandas as pd
import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [13]:
train_df = train_df.rename(columns={"prev_orders": "text"})

In [10]:
test_df = test_df.rename(columns={"prev_orders": "text"})

In [15]:
val_df = test_df.rename(columns={"prev_orders": "text"})

In [None]:
# using wandb dashboard to track the training process
import wandb
import random

wandb.init(project="project-733")
wandb.finish()

In [15]:
mlb = pickle.load(open("data/mlb.pkl", 'rb'))

In [16]:
# defining RoBERTa model with tested hyper parameters
model = MultiLabelClassificationModel("roberta","roberta-base",num_labels=1000,args={'train_batch_size':4, 'learning_rate': 3e-5, 'num_train_epochs': 100, 'max_seq_length': 512,'logging_steps': 10000,'wandb_project': "project-733","evaluate_during_training":True,"evaluate_during_training_steps":23000,"eval_batch_size":2,"save_steps":-1,"evaluate_during_training_verbose":True})
# training the model using simple transformers library
model.train_model(train_df,eval_df=val_df)

result, model_outputs, wrong_predictions = model.eval_model(test_df)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForMultiLabelSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'cla

  0%|          | 0/95377 [00:00<?, ?it/s]

  labels = torch.tensor(labels, dtype=torch.long)
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_512_0_2


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Initializing WandB run for training.
[34m[1mwandb[0m: Currently logged in as: [33mdiliprb1999[0m ([33mdilip_999[0m). Use [1m`wandb login --relogin`[0m to force relogin


Running Epoch 0 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.09374914258074564, 'eval_loss': 0.03282658541386826}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.09405315739699097, 'eval_loss': 0.03273936117862795}


Running Epoch 1 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.11887696870958575, 'eval_loss': 0.031552256833442295}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.12121481075053617, 'eval_loss': 0.03153569413317631}


Running Epoch 2 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.1363186243941157, 'eval_loss': 0.03061502682802627}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.13819535820677142, 'eval_loss': 0.030565956245063}


Running Epoch 3 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.15259410829496653, 'eval_loss': 0.0299878123820601}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.1539398793988906, 'eval_loss': 0.029900990143562192}


Running Epoch 4 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.16854183585517157, 'eval_loss': 0.029512105356989525}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.17287277881367719, 'eval_loss': 0.029429143252453285}


Running Epoch 5 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.18703472733372864, 'eval_loss': 0.029014414786766165}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.18960878894572653, 'eval_loss': 0.02888570766065138}


Running Epoch 6 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.09646552790079609, 'eval_loss': 0.032824707612758346}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.11102494548803175, 'eval_loss': 0.03164560667146255}


Running Epoch 7 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0807723521295778, 'eval_loss': 0.036660221570454884}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07565121107046992, 'eval_loss': 0.03846236899631664}


Running Epoch 8 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0760404296079129, 'eval_loss': 0.038097470629101696}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06575193727421419, 'eval_loss': 0.03733675830803732}


Running Epoch 9 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07605640117380505, 'eval_loss': 0.037694148787892595}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07553481749270925, 'eval_loss': 0.038748093338854}


Running Epoch 10 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0653400517513149, 'eval_loss': 0.03896177332907831}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07656564323868459, 'eval_loss': 0.038553354713536506}


Running Epoch 11 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0625400957186472, 'eval_loss': 0.03858881700323267}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0633950547793772, 'eval_loss': 0.03883741605276393}


Running Epoch 12 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07152677512075936, 'eval_loss': 0.038954906718784967}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07425024849878346, 'eval_loss': 0.03907212906860857}


Running Epoch 13 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07599628436923098, 'eval_loss': 0.03779121352075171}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07492250489962862, 'eval_loss': 0.03899035119723957}


Running Epoch 14 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0770001007793991, 'eval_loss': 0.03851147661600951}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07431099130646628, 'eval_loss': 0.039876915742554524}


Running Epoch 15 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06229406500849639, 'eval_loss': 0.03867097194565492}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06111219930343547, 'eval_loss': 0.0399775954596414}


Running Epoch 16 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.062472793261472294, 'eval_loss': 0.03751430674120571}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06331978083115898, 'eval_loss': 0.03937158534250348}


Running Epoch 17 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06173131144312793, 'eval_loss': 0.03897143619333894}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0762135805449998, 'eval_loss': 0.03872168612611984}


Running Epoch 18 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07615668392566195, 'eval_loss': 0.03806085271521332}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06219549562247596, 'eval_loss': 0.038060537640069086}


Running Epoch 19 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06211229567997368, 'eval_loss': 0.03954470576355231}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07579985468980974, 'eval_loss': 0.03805680647340873}


Running Epoch 20 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07697007168244774, 'eval_loss': 0.03841114103140149}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07715081917857863, 'eval_loss': 0.03808796965667625}


Running Epoch 21 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06280022791487574, 'eval_loss': 0.038380018981144105}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07562066071317489, 'eval_loss': 0.038770741085820806}


Running Epoch 22 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0600944609152483, 'eval_loss': 0.03925719261194429}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07489193096608508, 'eval_loss': 0.038523362298431785}


Running Epoch 23 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07470541287416758, 'eval_loss': 0.03822096834283541}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06211844966325493, 'eval_loss': 0.038090081689787905}


Running Epoch 24 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0631243313135179, 'eval_loss': 0.03849338478362902}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0762080103180852, 'eval_loss': 0.0392648717865381}


Running Epoch 25 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07514829661168701, 'eval_loss': 0.038256971177410774}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07493855972514205, 'eval_loss': 0.03788621909381338}


Running Epoch 26 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.062230295848479195, 'eval_loss': 0.03809072219573819}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07461388185113815, 'eval_loss': 0.03841895912795388}


Running Epoch 27 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06307526478119906, 'eval_loss': 0.03915586730877409}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07658654070880853, 'eval_loss': 0.0383803385912036}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07754961299331353, 'eval_loss': 0.03910970395596675}


Running Epoch 28 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07477443501945621, 'eval_loss': 0.0386155321869937}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07694684148078682, 'eval_loss': 0.03982324684248878}


Running Epoch 29 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07520363244282366, 'eval_loss': 0.03869884347071015}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07438834096477706, 'eval_loss': 0.039450215307873356}


Running Epoch 30 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07665205677222194, 'eval_loss': 0.03905119195999628}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.062332230422454, 'eval_loss': 0.03834706142110134}


Running Epoch 31 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06084584563056856, 'eval_loss': 0.03833060092143924}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07555850279668341, 'eval_loss': 0.040574035231328034}


Running Epoch 32 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07889327211862752, 'eval_loss': 0.03819096484965529}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07587640413666727, 'eval_loss': 0.03858817774253479}


Running Epoch 33 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07555091652623358, 'eval_loss': 0.03784064745016044}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.061537059294197026, 'eval_loss': 0.038571241046579705}


Running Epoch 34 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07677561000485, 'eval_loss': 0.03965775233489628}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07449823946306935, 'eval_loss': 0.03821189152590033}


Running Epoch 35 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.05945782266737202, 'eval_loss': 0.03813887191993395}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07570384817931763, 'eval_loss': 0.03913714008935869}


Running Epoch 36 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.076152565923709, 'eval_loss': 0.03860876209750974}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07572623616331729, 'eval_loss': 0.03864342109789564}


Running Epoch 37 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07372880717964009, 'eval_loss': 0.0386799486517012}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07731857300810087, 'eval_loss': 0.03927082170101257}


Running Epoch 38 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07439828604579253, 'eval_loss': 0.03870014218519928}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0736345508812486, 'eval_loss': 0.0392100974281741}


Running Epoch 39 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07515639083536693, 'eval_loss': 0.038706574753985395}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0606633908055108, 'eval_loss': 0.038817937867330864}


Running Epoch 40 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06156831008719416, 'eval_loss': 0.038401319675379054}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0757864257852559, 'eval_loss': 0.038214330838364084}


Running Epoch 41 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06319898188512892, 'eval_loss': 0.03893452542548487}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07554825437474566, 'eval_loss': 0.03775830993969045}


Running Epoch 42 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07599453372333469, 'eval_loss': 0.038683618219649156}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07495382059856193, 'eval_loss': 0.03979121430506045}


Running Epoch 43 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07584267421566408, 'eval_loss': 0.03912336091585704}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07602893083379698, 'eval_loss': 0.03930038336263258}


Running Epoch 44 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06317625410705877, 'eval_loss': 0.03928162685299332}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07511517332163291, 'eval_loss': 0.03843158231595889}


Running Epoch 45 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.05900800860542288, 'eval_loss': 0.03809029338211177}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07522635383877213, 'eval_loss': 0.038486840482574676}


Running Epoch 46 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07530531770564013, 'eval_loss': 0.038268875218167055}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07611426840014354, 'eval_loss': 0.039285676085684555}


Running Epoch 47 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06128685857583204, 'eval_loss': 0.03879838270507581}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.06039308504017763, 'eval_loss': 0.039287324891187664}


Running Epoch 48 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07451569751107752, 'eval_loss': 0.0384875786728822}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07547227517196219, 'eval_loss': 0.039407924214792146}


Running Epoch 49 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0754721999193504, 'eval_loss': 0.03876145450892146}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.0757039143885937, 'eval_loss': 0.03934623151687963}


Running Epoch 50 of 100:   0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/23845 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_512_0_2
INFO:simpletransformers.classification.classification_model:{'LRAP': 0.07437089961234973, 'eval_loss': 0.03896914545916397}


KeyboardInterrupt: 

In [18]:
model = MultiLabelClassificationModel("roberta","D:/jupyter/bd2/project/outputs/checkpoint-1001490-epoch-42", use_cuda ="True")

In [19]:
predictions, raw_outputs = model.predict([test_df["text"][14757]])

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
# user "14757" previous purchase
test_df["text"][14757]

'Cage Free Grade A Large Brown Eggs, Organic 2% Reduced Fat Milk, Pure Sparkling Water, 100% Whole Wheat Bread, Organic Strawberries, Original Veggie Straws, Organic Large Extra Fancy Fuji Apple, Almond Breeze Original Almond Milk, Uncured Slow Cooked Ham, Strawberries, 100% Whole Wheat Bread'

In [16]:
# converting one-hot ecoded label back to purchased products
mlb.inverse_transform(test_df["labels"][14757].reshape(1,1000))

[('Diced Tomatoes',
  'Organic Avocado',
  'Organic Cucumber',
  'Organic Free Range Chicken Broth',
  'Organic Grape Tomatoes',
  'Organic Orange Bell Pepper',
  'Organic Plain Greek Whole Milk Yogurt',
  'Organic Roasted Turkey Breast',
  'Organic Sticks Low Moisture Part Skim Mozzarella String Cheese',
  'Organic Strawberries',
  'Unsweetened Original Almond Breeze Almond Milk',
  'Yellow Onions')]

In [17]:
# get top 10 predictions from the model
def get_top_10_preds(output):
    indices = sorted(range(len(output)), key=lambda i: output[i], reverse=True)[:10]
    return mlb.classes_[indices]

get_top_10_preds(raw_outputs[0])

array(['Organic 2% Reduced Fat Milk',
       'Unsweetened Original Almond Breeze Almond Milk', 'Banana',
       '100% Whole Wheat Bread', 'Organic Strawberries',
       'Grade A Large White Eggs', 'Vanilla Almond Breeze Almond Milk',
       'Almond Breeze Original Almond Milk', 'Organic Fuji Apple',
       'Original Orange Juice'], dtype=object)

some test user indicies:

33390
18474
81035
72982
14757

# Testing:

There is no directing testing method for our use case so, we have used a custom testing strategy that is similar to IoU score used in object detection. In this method, score calculated as ratio between (num of intersected products between purchased and recommended product) and num of auction purchased products.

In [None]:
import math
rs_score = 0
for i in test_df.index:
    predictions, raw_outputs = model.predict([test_df["text"][i]])
    num_pp = len(set(mlb.inverse_transform(test_df["labels"][i].reshape(1,1000))[0]))
    num_rr = len(set(get_top_10_preds(raw_outputs[0])))
    int_len = len(set(mlb.inverse_transform(test_df["labels"][i].reshape(1,1000))[0]).intersection(set(get_top_10_preds(raw_outputs[0]))))
    rs_score = rs_score + max(int_len/num_rr,int_len/num_pp)
    
rs_score = match.ceil(rs_score/len(test_df))*100

In [66]:
rs_score

20

On a average there a is twenty percent of interesection between purchased products and recommeded products

# Inference

In [None]:
def get_top_10_preds(output):
    indices = sorted(range(len(output)), key=lambda i: output[i], reverse=True)[:10]
    return mlb.classes_[indices]

get_top_10_preds(raw_outputs[0])

In [None]:
from simpletransformers.classification import MultiLabelClassificationModel, MultiLabelClassificationArgs
import pickle
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

test_df = pd.read_pickle("test_df")
model = MultiLabelClassificationModel("roberta","D:/jupyter/bd2/project/outputs/checkpoint-1001490-epoch-42")
mlb = pickle.load(open("mlb.pkl", 'rb'))

def recommend_products(user_index):
    _, raw_outputs = model.predict([test_df["text"][user_index]])
    cur_prod = mlb.inverse_transform(test_df["labels"][user_index].reshape(1,1000))[0]
    prev_prods = test_df["text"][user_index]
    rec_prods = get_top_10_preds(raw_outputs[0])
    return cur_prod, prev_prods, rec_prods