<a href="https://colab.research.google.com/github/guilherme-vieira/recipe_sorting/blob/main/recipe_classification_bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libraries

In [2]:
!pip install git+https://github.com/huggingface/transformers
!pip install sentencepiece

import pandas as pd
import numpy as np
import tensorflow as tf
import torch
from torch.nn import BCEWithLogitsLoss, BCELoss
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, f1_score, accuracy_score
import pickle
from transformers import *
from tqdm import tqdm, trange
from ast import literal_eval

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-woiemq_o
  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-woiemq_o
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 3.9MB/s 
[?25hCollecting huggingface-hub==0.0.8
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla K80'

## Load and Preprocess Training Data

Dataset will be tokenized then split into training and validation sets. The validation set will be used to monitor training. For testing a separate test set will be loaded for analysis.

In [6]:
df = pd.read_csv('methods_train_plus.csv')
df.head()

Unnamed: 0,description,Measuring,Plating,Smoking,Toasting,Microwaving,Air Frying,Double Boiler,Bain Marie,Reducing,Water Bathing,Deglazing,Caramelising,Poaching,Simmering,Boiling,Steaming,Stir Frying,Deep Frying,Flambing,Braising,Searing,Sauteeing,Grilling,Pan Frying,Grating,Baking,Roasting,Squeezing,Mashing,Rehidrating,Drying,Kneading,Tenderizing,Whisking,Mixing,Blending,Refrigerating,Pickling,Curing,Freezing,Infusing,Marinating,Seasoning,Salting,Slicing,Chopping Fruits,Chopping Mushroom,Chopping Herbs,Mincing,Batonnet,Dicing,Roughly Chopping
0,Slowly pour in around one-third of the porcini...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Combine the chickpeas, lemon juice, garlic, cu...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2,Add the drained pasta and chopped tomatoes to ...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Bring a large pan of salted water to the boil,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Add the crushed garlic and half of the cumin s...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
print('Unique comments: ', df.description.nunique() == df.shape[0])
print('Null values: ', df.isnull().values.any())
# df[df.isna().any(axis=1)]

Unique comments:  True
Null values:  False


In [8]:
print('average sentence length: ', df.description.str.split().str.len().mean())
print('stdev sentence length: ', df.description.str.split().str.len().std())

average sentence length:  27.568181818181817
stdev sentence length:  8.901000015082515


In [9]:
cols = df.columns
label_cols = list(cols[1:])
num_labels = len(label_cols)
print('Label columns: ', label_cols)

Label columns:  ['Measuring', 'Plating', 'Smoking', 'Toasting', 'Microwaving', 'Air Frying', 'Double Boiler', 'Bain Marie', 'Reducing', 'Water Bathing', 'Deglazing', 'Caramelising', 'Poaching', 'Simmering', 'Boiling', 'Steaming', 'Stir Frying', 'Deep Frying', 'Flambing', 'Braising', 'Searing', 'Sauteeing', 'Grilling', 'Pan Frying', 'Grating', 'Baking', 'Roasting', 'Squeezing', 'Mashing', 'Rehidrating', 'Drying', 'Kneading', 'Tenderizing', 'Whisking', 'Mixing', 'Blending', 'Refrigerating', 'Pickling', 'Curing', 'Freezing', 'Infusing', 'Marinating', 'Seasoning', 'Salting', 'Slicing', 'Chopping Fruits', 'Chopping Mushroom', 'Chopping Herbs', 'Mincing', 'Batonnet', 'Dicing', 'Roughly Chopping']


In [10]:
print('Count of 1 per label: \n', df[label_cols].sum(), '\n') # Label counts, may need to downsample or upsample
print('Count of 0 per label: \n', df[label_cols].eq(0).sum())

Count of 1 per label: 
 Measuring             0
Plating               7
Smoking               0
Toasting              3
Microwaving           0
Air Frying            0
Double Boiler         0
Bain Marie            0
Reducing              0
Water Bathing         0
Deglazing             0
Caramelising          0
Poaching              0
Simmering             0
Boiling               8
Steaming              0
Stir Frying           3
Deep Frying           0
Flambing              0
Braising              0
Searing               0
Sauteeing             0
Grilling              0
Pan Frying            3
Grating               0
Baking                8
Roasting              0
Squeezing             0
Mashing               0
Rehidrating           0
Drying                0
Kneading              0
Tenderizing           0
Whisking              0
Mixing               16
Blending              7
Refrigerating         0
Pickling              0
Curing                0
Freezing              0
Infusing        

In [11]:
df = df.sample(frac=1).reset_index(drop=True) #shuffle rows

In [12]:
df['one_hot_labels'] = list(df[label_cols].values)
df.head()

Unnamed: 0,description,Measuring,Plating,Smoking,Toasting,Microwaving,Air Frying,Double Boiler,Bain Marie,Reducing,Water Bathing,Deglazing,Caramelising,Poaching,Simmering,Boiling,Steaming,Stir Frying,Deep Frying,Flambing,Braising,Searing,Sauteeing,Grilling,Pan Frying,Grating,Baking,Roasting,Squeezing,Mashing,Rehidrating,Drying,Kneading,Tenderizing,Whisking,Mixing,Blending,Refrigerating,Pickling,Curing,Freezing,Infusing,Marinating,Seasoning,Salting,Slicing,Chopping Fruits,Chopping Mushroom,Chopping Herbs,Mincing,Batonnet,Dicing,Roughly Chopping,one_hot_labels
0,"For a couple of minutes, toast the seeds in a...",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Add more ingredients to make it taste better....,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"Put the milk, water and sugar in a small sauc...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
3,"Put the milk, water and sugar in a small sauc...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,Add the garlic and half of the cumin seeds an...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [13]:
labels = list(df.one_hot_labels.values)
descriptions = list(df.description.values)

Load the pretrained tokenizer that corresponds to your choice in model. e.g.,

```
BERT:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) 

XLNet:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=False) 

RoBERTa:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=False)
```


In order to avoid memory issues with Google Colab, I enforce a max_length of 100 tokens. Note that some sentences may not adequately represent each label because of this.

In [14]:
max_length = 100
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # tokenizer
encodings = tokenizer.batch_encode_plus(descriptions,max_length=max_length,pad_to_max_length=True) # tokenizer's encoding method
print('tokenizer outputs: ', encodings.keys())

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.



tokenizer outputs:  dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])




In [15]:
input_ids = encodings['input_ids'] # tokenized and encoded sentences
token_type_ids = encodings['token_type_ids'] # token type ids
attention_masks = encodings['attention_mask'] # attention masks

In [16]:
# Identifying indices of 'one_hot_labels' entries that only occur once - this will allow us to stratify split our training data later
label_counts = df.one_hot_labels.astype(str).value_counts()
one_freq = label_counts[label_counts==1].keys()
one_freq_idxs = sorted(list(df[df.one_hot_labels.astype(str).isin(one_freq)].index), reverse=True)
print('df label indices with only one instance: ', one_freq_idxs)

df label indices with only one instance:  []


In [17]:
# Gathering single instance inputs to force into the training set after stratified split
one_freq_input_ids = [input_ids.pop(i) for i in one_freq_idxs]
one_freq_token_types = [token_type_ids.pop(i) for i in one_freq_idxs]
one_freq_attention_masks = [attention_masks.pop(i) for i in one_freq_idxs]
one_freq_labels = [labels.pop(i) for i in one_freq_idxs]

Be sure to handle all classes during validation using "stratify" during train/validation split:

In [18]:
# Use train_test_split to split our data into train and validation sets

train_inputs, validation_inputs, train_labels, validation_labels, train_token_types, validation_token_types, train_masks, validation_masks = train_test_split(input_ids, labels, token_type_ids,attention_masks,
                                                            random_state=2020, test_size=0.20, stratify = labels)

# Add one frequency data to train data
train_inputs.extend(one_freq_input_ids)
train_labels.extend(one_freq_labels)
train_masks.extend(one_freq_attention_masks)
train_token_types.extend(one_freq_token_types)

# Convert all of our data into torch tensors, the required datatype for our model
train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels)
train_masks = torch.tensor(train_masks)
train_token_types = torch.tensor(train_token_types)

validation_inputs = torch.tensor(validation_inputs)
validation_labels = torch.tensor(validation_labels)
validation_masks = torch.tensor(validation_masks)
validation_token_types = torch.tensor(validation_token_types)

In [19]:
# Select a batch size for training. For fine-tuning with XLNet, the authors recommend a batch size of 32, 48, or 128. We will use 32 here to avoid memory issues.
batch_size = 32

# Create an iterator of our data with torch DataLoader. This helps save on memory during training because, unlike a for loop, 
# with an iterator the entire dataset does not need to be loaded into memory

train_data = TensorDataset(train_inputs, train_masks, train_labels, train_token_types)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels, validation_token_types)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

In [20]:
torch.save(validation_dataloader,'validation_data_loader')
torch.save(train_dataloader,'train_data_loader')

## Load Model & Set Params

Load the appropriate model below, each model already contains a single dense layer for classification on top.



```
BERT:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

XLNet:
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=num_labels)

RoBERTa:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=num_labels)
```



In [21]:
# Load model, the pretrained model will include a single linear classification layer on top for classification. 
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)
model.cuda()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=570.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

Setting custom optimization parameters for the AdamW optimizer https://huggingface.co/transformers/main_classes/optimizer_schedules.html

In [22]:
# setting custom optimization parameters. You may implement a scheduler here as well.
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

In [23]:
optimizer = AdamW(optimizer_grouped_parameters,lr=2e-5,correct_bias=True)
# optimizer = AdamW(model.parameters(),lr=2e-5)  # Default optimization

## Train Model

In [24]:
# Store our loss and accuracy for plotting
train_loss_set = []

# Number of training epochs (authors recommend between 2 and 4)
epochs = 3

# trange is a tqdm wrapper around the normal python range
for _ in trange(epochs, desc="Epoch"):

  # Training
  
  # Set our model to training mode (as opposed to evaluation mode)
  model.train()

  # Tracking variables
  tr_loss = 0 #running loss
  nb_tr_examples, nb_tr_steps = 0, 0
  
  # Train the data for one epoch
  for step, batch in enumerate(train_dataloader):
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels, b_token_types = batch
    # Clear out the gradients (by default they accumulate)
    optimizer.zero_grad()

    # # Forward pass for multiclass classification
    # outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
    # loss = outputs[0]
    # logits = outputs[1]

    # Forward pass for multilabel classification
    outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    logits = outputs[0]
    loss_func = BCEWithLogitsLoss() 
    loss = loss_func(logits.view(-1,num_labels),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
    # loss_func = BCELoss() 
    # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
    train_loss_set.append(loss.item())    

    # Backward pass
    loss.backward()
    # Update parameters and take a step using the computed gradient
    optimizer.step()
    # scheduler.step()
    # Update tracking variables
    tr_loss += loss.item()
    nb_tr_examples += b_input_ids.size(0)
    nb_tr_steps += 1

  print("Train loss: {}".format(tr_loss/nb_tr_steps))

###############################################################################

  # Validation

  # Put model in evaluation mode to evaluate loss on the validation set
  model.eval()

  # Variables to gather full output
  logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

  # Predict
  for i, batch in enumerate(validation_dataloader):
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels, b_token_types = batch
    with torch.no_grad():
      # Forward pass
      outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
      b_logit_pred = outs[0]
      pred_label = torch.sigmoid(b_logit_pred)

      b_logit_pred = b_logit_pred.detach().cpu().numpy()
      pred_label = pred_label.to('cpu').numpy()
      b_labels = b_labels.to('cpu').numpy()

    tokenized_texts.append(b_input_ids)
    logit_preds.append(b_logit_pred)
    true_labels.append(b_labels)
    pred_labels.append(pred_label)

  # Flatten outputs
  pred_labels = [item for sublist in pred_labels for item in sublist]
  true_labels = [item for sublist in true_labels for item in sublist]

  # Calculate Accuracy
  threshold = 0.50
  pred_bools = [pl>threshold for pl in pred_labels]
  true_bools = [tl==1 for tl in true_labels]
  val_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')*100
  val_flat_accuracy = accuracy_score(true_bools, pred_bools)*100

  print('F1 Validation Accuracy: ', val_f1_accuracy)
  print('Flat Validation Accuracy: ', val_flat_accuracy)

Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.55s/it]

Train loss: 0.7189199924468994
F1 Validation Accuracy:  8.16326530612245
Flat Validation Accuracy:  0.0


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.50s/it]

Train loss: 0.6783455312252045
F1 Validation Accuracy:  6.639004149377594
Flat Validation Accuracy:  0.0


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.44s/it]

Train loss: 0.6674087047576904
F1 Validation Accuracy:  9.836065573770492
Flat Validation Accuracy:  0.0





In [25]:
torch.save(model.state_dict(), 'bert_model_recipes')

## Load and Preprocess Test Data

In [26]:
test_df = pd.read_csv('methods_test.csv')
# test_labels_df = pd.read_csv('test_labels.csv')
# test_df = test_df.merge(test_labels_df, on='id', how='left')
test_label_cols = list(test_df.columns[1:])
print('Null values: ', test_df.isnull().values.any()) #should not be any null sentences or labels
print('Same columns between train and test: ', label_cols == test_label_cols) #columns should be the same
test_df.head()

Null values:  False
Same columns between train and test:  True


Unnamed: 0,description,Measuring,Plating,Smoking,Toasting,Microwaving,Air Frying,Double Boiler,Bain Marie,Reducing,Water Bathing,Deglazing,Caramelising,Poaching,Simmering,Boiling,Steaming,Stir Frying,Deep Frying,Flambing,Braising,Searing,Sauteeing,Grilling,Pan Frying,Grating,Baking,Roasting,Squeezing,Mashing,Rehidrating,Drying,Kneading,Tenderizing,Whisking,Mixing,Blending,Refrigerating,Pickling,Curing,Freezing,Infusing,Marinating,Seasoning,Salting,Slicing,Chopping Fruits,Chopping Mushroom,Chopping Herbs,Mincing,Batonnet,Dicing,Roughly Chopping
0,"Add the onion and cook, stirring occasionally,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,"Add the salt and pepper, taste and add a littl...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Put the mushrooms, chard, oil, garlic, chilli,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
#test_df = test_df[~test_df[test_label_cols].eq(-1).any(axis=1)] #remove irrelevant rows/comments with -1 values
test_df['one_hot_labels'] = list(test_df[test_label_cols].values)
test_df.head()

Unnamed: 0,description,Measuring,Plating,Smoking,Toasting,Microwaving,Air Frying,Double Boiler,Bain Marie,Reducing,Water Bathing,Deglazing,Caramelising,Poaching,Simmering,Boiling,Steaming,Stir Frying,Deep Frying,Flambing,Braising,Searing,Sauteeing,Grilling,Pan Frying,Grating,Baking,Roasting,Squeezing,Mashing,Rehidrating,Drying,Kneading,Tenderizing,Whisking,Mixing,Blending,Refrigerating,Pickling,Curing,Freezing,Infusing,Marinating,Seasoning,Salting,Slicing,Chopping Fruits,Chopping Mushroom,Chopping Herbs,Mincing,Batonnet,Dicing,Roughly Chopping,one_hot_labels
0,"Add the onion and cook, stirring occasionally,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"Add the salt and pepper, taste and add a littl...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"Put the mushrooms, chard, oil, garlic, chilli,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [28]:
# Gathering input data
test_labels = list(test_df.one_hot_labels.values)
test_description = list(test_df.description.values)

In [29]:
# Encoding input data
test_encodings = tokenizer.batch_encode_plus(test_description,max_length=max_length,pad_to_max_length=True)
test_input_ids = test_encodings['input_ids']
test_token_type_ids = test_encodings['token_type_ids']
test_attention_masks = test_encodings['attention_mask']



In [30]:
# Make tensors out of data
test_inputs = torch.tensor(test_input_ids)
test_labels = torch.tensor(test_labels)
test_masks = torch.tensor(test_attention_masks)
test_token_types = torch.tensor(test_token_type_ids)
# Create test dataloader
test_data = TensorDataset(test_inputs, test_masks, test_labels, test_token_types)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
# Save test dataloader
torch.save(test_dataloader,'test_data_loader')

## Prediction and Metics

In [31]:
# Test

# Put model in evaluation mode to evaluate loss on the validation set
model.eval()

#track variables
logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

# Predict
for i, batch in enumerate(test_dataloader):
  batch = tuple(t.to(device) for t in batch)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels, b_token_types = batch
  with torch.no_grad():
    # Forward pass
    outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    b_logit_pred = outs[0]
    pred_label = torch.sigmoid(b_logit_pred)

    b_logit_pred = b_logit_pred.detach().cpu().numpy()
    pred_label = pred_label.to('cpu').numpy()
    b_labels = b_labels.to('cpu').numpy()

  tokenized_texts.append(b_input_ids)
  logit_preds.append(b_logit_pred)
  true_labels.append(b_labels)
  pred_labels.append(pred_label)

# Flatten outputs
tokenized_texts = [item for sublist in tokenized_texts for item in sublist]
pred_labels = [item for sublist in pred_labels for item in sublist]
true_labels = [item for sublist in true_labels for item in sublist]
# Converting flattened binary values to boolean values
true_bools = [tl==1 for tl in true_labels]

We need to threshold our sigmoid function outputs which range from [0, 1]. Below I use 0.50 as a threshold.

In [32]:
pred_bools = [pl>0.50 for pl in pred_labels] #boolean output after thresholding

# Print and save classification report
print('Test F1 Accuracy: ', f1_score(true_bools, pred_bools,average='micro'))
print('Test Flat Accuracy: ', accuracy_score(true_bools, pred_bools),'\n')
clf_report = classification_report(true_bools,pred_bools,target_names=test_label_cols)
pickle.dump(clf_report, open('classification_report.txt','wb')) #save report
print(clf_report)

Test F1 Accuracy:  0.06060606060606061
Test Flat Accuracy:  0.0 

                   precision    recall  f1-score   support

        Measuring       0.00      0.00      0.00         0
          Plating       0.00      0.00      0.00         0
          Smoking       0.00      0.00      0.00         0
         Toasting       0.00      0.00      0.00         0
      Microwaving       0.00      0.00      0.00         0
       Air Frying       0.00      0.00      0.00         0
    Double Boiler       0.00      0.00      0.00         0
       Bain Marie       0.00      0.00      0.00         0
         Reducing       0.00      0.00      0.00         0
    Water Bathing       0.00      0.00      0.00         0
        Deglazing       0.00      0.00      0.00         0
     Caramelising       0.00      0.00      0.00         0
         Poaching       0.00      0.00      0.00         0
        Simmering       0.00      0.00      0.00         0
          Boiling       0.00      0.00      0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Output Dataframe

In [33]:
idx2label = dict(zip(range(52),label_cols))
print(idx2label)

{0: 'Measuring', 1: 'Plating', 2: 'Smoking', 3: 'Toasting', 4: 'Microwaving', 5: 'Air Frying', 6: 'Double Boiler', 7: 'Bain Marie', 8: 'Reducing', 9: 'Water Bathing', 10: 'Deglazing', 11: 'Caramelising', 12: 'Poaching', 13: 'Simmering', 14: 'Boiling', 15: 'Steaming', 16: 'Stir Frying', 17: 'Deep Frying', 18: 'Flambing', 19: 'Braising', 20: 'Searing', 21: 'Sauteeing', 22: 'Grilling', 23: 'Pan Frying', 24: 'Grating', 25: 'Baking', 26: 'Roasting', 27: 'Squeezing', 28: 'Mashing', 29: 'Rehidrating', 30: 'Drying', 31: 'Kneading', 32: 'Tenderizing', 33: 'Whisking', 34: 'Mixing', 35: 'Blending', 36: 'Refrigerating', 37: 'Pickling', 38: 'Curing', 39: 'Freezing', 40: 'Infusing', 41: 'Marinating', 42: 'Seasoning', 43: 'Salting', 44: 'Slicing', 45: 'Chopping Fruits', 46: 'Chopping Mushroom', 47: 'Chopping Herbs', 48: 'Mincing', 49: 'Batonnet', 50: 'Dicing', 51: 'Roughly Chopping'}


In [34]:
# Getting indices of where boolean one hot vector true_bools is True so we can use idx2label to gather label names
true_label_idxs, pred_label_idxs=[],[]
for vals in true_bools:
  true_label_idxs.append(np.where(vals)[0].flatten().tolist())
for vals in pred_bools:
  pred_label_idxs.append(np.where(vals)[0].flatten().tolist())

In [35]:
# Gathering vectors of label names using idx2label
true_label_texts, pred_label_texts = [], []
for vals in true_label_idxs:
  if vals:
    true_label_texts.append([idx2label[val] for val in vals])
  else:
    true_label_texts.append(vals)

for vals in pred_label_idxs:
  if vals:
    pred_label_texts.append([idx2label[val] for val in vals])
  else:
    pred_label_texts.append(vals)

In [36]:
# Decoding input ids to comment text
description_texts = [tokenizer.decode(text,skip_special_tokens=True,clean_up_tokenization_spaces=False) for text in tokenized_texts]

In [39]:
# Converting lists to df
comparisons_df = pd.DataFrame({'description_text': description_texts, 'true_labels': true_label_texts, 'pred_labels':pred_label_texts})
comparisons_df.to_csv('comparisons.csv')
comparisons_df

Unnamed: 0,description_text,true_labels,pred_labels
0,"add the onion and cook , stirring occasionally...","[Whisking, Slicing]","[Toasting, Microwaving, Air Frying, Double Boi..."
1,"add the salt and pepper , taste and add a litt...",[Grilling],"[Microwaving, Air Frying, Double Boiler, Reduc..."
2,"put the mushrooms , chard , oil , garlic , chi...",[Baking],"[Microwaving, Air Frying, Double Boiler, Reduc..."


# Exporting model

In [44]:
torch.save(model.state_dict(), 'BERT_model')