# BERT Classifier

## Imports

In [2]:
import numpy as np
import pandas as pd
import random
import torch

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm.notebook import tqdm

## Data Preparation

In [3]:
# load preprocessed dataaset
df = pd.read_csv('preprocessed.csv')

# encode the labels (categories) in the dataframe
# create a dictionary of enumerated categories
cat_dict = {}
for idx, category in enumerate(df.primary_category.unique()):
    cat_dict[category] = idx
    
# replace categories with their corresponding label
df['label'] = df['primary_category'].replace(cat_dict)
df.head()

Unnamed: 0,primary_category,description,label
0,Clothing,key features alisha solid womens cycling short...,0
1,Footwear,key features aw bellies sandals wedges heel ca...,1
2,Clothing,key features alisha solid womens cycling short...,0
3,Clothing,key features alisha solid womens cycling short...,0
4,Footwear,key features dilli bazaaar bellies corporate c...,1


In [4]:
# create train and test splits
# the given dataset is imbalanced (some categories have a lot of products whereas some don't)
# so we split based on the values of the respective categories
x_train, x_test, y_train, y_test = train_test_split(df.index.values, df.primary_category.values, 
                                                  test_size=0.2, random_state=42, stratify=df.primary_category.values)

# create a new column indicating type of data split
df['split'] = ['no'] * df.shape[0]

# set data splits
df.loc[x_train, 'split'] = 'train'
df.loc[x_test, 'split'] = 'test'

df.groupby(['primary_category', 'split']).count().sort_values(by='primary_category')

Unnamed: 0_level_0,Unnamed: 1_level_0,description,label
primary_category,split,Unnamed: 2_level_1,Unnamed: 3_level_1
Automotive,test,202,202
Automotive,train,810,810
Baby Care,test,97,97
Baby Care,train,386,386
Beauty and Personal Care,test,142,142
Beauty and Personal Care,train,568,568
Clothing,test,1240,1240
Clothing,train,4957,4957
Computers,test,116,116
Computers,train,462,462


In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                          do_lower_case=True)
                                          
encoded_data_train = tokenizer.batch_encode_plus(
    df[df.split=='train'].description.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    padding='max_length', 
    max_length=256, 
    return_tensors='pt',
    truncation=True
)

encoded_data_test = tokenizer.batch_encode_plus(
    df[df.split=='test'].description.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    padding='max_length', 
    max_length=256, 
    return_tensors='pt',
    truncation=True
)


input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.split=='train'].label.values)

input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(df[df.split=='test'].label.values)

dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

In [6]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(cat_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [7]:
batch_size = 32

dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_test = DataLoader(dataset_test, 
                                   sampler=SequentialSampler(dataset_test), 
                                   batch_size=batch_size)

In [8]:
optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
                  
epochs = 1

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [13]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in cat_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

def mean_accuracy(preds, labels):
    total_true = 0
    total = 0

    label_dict_inverse = {v: k for k, v in cat_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        total_true += len(y_preds[y_preds==label])
        total += len(y_true)
    
    print(f'Mean Accuracy: {total_true/total * 100:.3f}\n')

In [10]:
# check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# seeds
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# fine tune bert
def evaluate(dataloader_test):

    model.eval()
    
    loss_test_total = 0
    predictions, true_test = [], []
    
    for batch in dataloader_test:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_test_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_test.append(label_ids)
    
    loss_test_avg = loss_test_total/len(dataloader_test) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_test = np.concatenate(true_test, axis=0)
            
    return loss_test_avg, predictions, true_test
    
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    test_loss, predictions, true_test = evaluate(dataloader_test)
    test_f1 = f1_score_func(predictions, true_test)
    tqdm.write(f'Test loss: {test_loss}')
    tqdm.write(f'F1 Score (Weighted): {test_f1}')

torch.save(model.state_dict(), f'finetuned_BERT.model')

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch 1', max=467.0, style=ProgressStyle(description_widt…


Epoch 1
Training loss: 1.0051120670245086
Test loss: 0.46043157873627466
F1 Score (Weighted): 0.93660682315436



In [14]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(cat_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device)

model.load_state_dict(torch.load('finetuned_BERT.model', map_location=torch.device('cpu')))

_, predictions, true_test = evaluate(dataloader_test)
accuracy_per_class(predictions, true_test)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Class: Clothing
Accuracy: 1232/1240

Class: Footwear
Accuracy: 241/245

Class: Pens & Stationery
Accuracy: 14/63

Class: Beauty and Personal Care
Accuracy: 119/142

Class: Home Decor & Festive Needs
Accuracy: 175/186

Class: Automotive
Accuracy: 197/202

Class: Tools & Hardware
Accuracy: 66/78

Class: Home Furnishing
Accuracy: 127/140

Class: Baby Care
Accuracy: 63/97

Class: Mobiles & Accessories
Accuracy: 204/220

Class: Watches
Accuracy: 106/106

Class: Toys & School Supplies
Accuracy: 53/66

Class: Jewellery
Accuracy: 706/706

Class: Kitchen & Dining
Accuracy: 111/129

Class: Computers
Accuracy: 100/116



In [15]:
mean_accuracy(predictions, true_test)

Mean Accuracy: 94.058

