# BERT

We start by loading the necessary libraries

In [1]:
import tensorflow 
import numpy as np
import time
import matplotlib
import matplotlib.image as mpimg
import pandas as pd
from torch import optim
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
nltk.download('wordnet')
from sklearn.feature_extraction.text import TfidfTransformer
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('punkt')
from sklearn.feature_extraction.text import CountVectorizer
import re
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
import spacy
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, SpatialDropout1D, Conv1D, MaxPooling1D, GRU, BatchNormalization
from tensorflow.keras.layers import Input, Bidirectional, GlobalAveragePooling1D, concatenate, LeakyReLU, GlobalMaxPooling1D, Flatten
from tensorflow.keras import regularizers
from tensorflow.keras import backend as K
import torch
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import os
import pandas as pd

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Also we install required packages

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 11.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 45.4 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 397 kB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 38.7 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 48.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting

We confirm if the GPU is detected

In [3]:
import tensorflow as tf

# Device name
device_name = tf.test.gpu_device_name()

# Check if the GPU is detected
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


Identify the GPU

In [4]:
import torch


if torch.cuda.is_available():    

       
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

    
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla K80


Load the data

In [5]:
# Mount drive

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [6]:
# Train data 
traindata_all = pd.read_csv('/content/gdrive/MyDrive/TFM/multimodal_train.tsv',sep='\t')
# Validation data 
validata_all = pd.read_csv('/content/gdrive/MyDrive/TFM/multimodal_validate.tsv',sep='\t')
# Test data 
testdata_all = pd.read_csv('/content/gdrive/MyDrive/TFM/multimodal_test_public.tsv',sep='\t')

We select a subset of the dataframe with no missing values in the 'clean_title' column

In [7]:
# Train data with no missing values
train_data = traindata_all[traindata_all['clean_title'].notnull().to_numpy()]
# Validation data with no missing values
valid_data = validata_all[validata_all['clean_title'].notnull().to_numpy()]
# Test data with no missing values
test_data = testdata_all[testdata_all['clean_title'].notnull().to_numpy()]

We separate the datasets into text and labels.

In [8]:
## Train data
train_news = list(train_data['clean_title'])
train_labels = list(train_data['6_way_label'])
## Valid data
valid_news = list(valid_data['clean_title'])
valid_labels = list(valid_data['6_way_label'])
## Test data
test_news = list(test_data['clean_title'])
test_labels = list(test_data['6_way_label'])

## Preprocessing

We will remove multiple spaces, punctuations and numbers.

In [9]:
# Funtion to clean the data

def preprocess_text(sen):
    #Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sen)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence

In [10]:
# Remove puntuations and numbers and multiple spaces

train_news_clean_1 = []
valid_news_clean_1 = []
test_news_clean_1 = []
# Train
for new in train_news:
    train_news_clean_1.append(preprocess_text(new))
# Validation
for new in valid_news:
    valid_news_clean_1.append(preprocess_text(new))
# Test
for new in test_news:
    test_news_clean_1.append(preprocess_text(new))

We define a function to remove stop words and perform lemmatization.

In [11]:
# Initialize  lemmatizer and  stop_words

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english')) 

# Function to remove stopwords
def remove_stopwords_lem(text):
    text = word_tokenize(text)
    # Remove stopwords
    text = [word for word in text if word not in stop_words]
    # Lematization
    lemmatized_text = []
    for word in text:
        word1 = lemmatizer.lemmatize(word, pos = "n")
        word2 = lemmatizer.lemmatize(word1, pos = "v")
        word3 = lemmatizer.lemmatize(word2, pos = ("a"))
        lemmatized_text.append(word3)
        
    text_done = ' '.join(lemmatized_text)
    return text_done

In [12]:
# Remove stop words and perform lemmatization
train_stwrd_lem = []
valid_stwrd_lem = []
test_stwrd_lem = []

# Train
for new in train_news_clean_1:
    train_stwrd_lem.append(remove_stopwords_lem(new))
# Validation
for new in valid_news_clean_1:
    valid_stwrd_lem.append(remove_stopwords_lem(new))
# Test
for new in test_news_clean_1:
    test_stwrd_lem.append(remove_stopwords_lem(new))

Now we tokenize the news. In order to do this we load the tokenizer which is included with BERT. We will use the 'uncased' version.

In [13]:
from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

We define a function that will tokenize each sentence and map the tokens to their corresponding IDs in the tokenizer vocabulary.

In [14]:
# Tokenize the sentence and add the '[CLS]' and '[SEP]' tokens.

def bert_tokenize(text):

    encoded_text = tokenizer.encode(
                        text,                      
                        add_special_tokens = True 

                       )
    return encoded_text

Now we can tokenize each new.

In [15]:
# Tokenize news

# Train
train_tokenized = [bert_tokenize(new) for new in train_stwrd_lem]
# Validation
valid_tokenized = [bert_tokenize(new) for new in valid_stwrd_lem]
# Test
test_tokenized = [bert_tokenize(new) for new in test_stwrd_lem]

Once we have tokenized each new we need to pad the sequences so that all have the same length. The maximum length allowed by BERT is 512. In order to choose the appropiate length to pad the sequences we will take a look at what % of news have a length smaller than a given number.

In [16]:
lengths_train = np.array([len(new) for new in train_tokenized])
lengths_valid = np.array([len(new) for new in valid_tokenized])
lengths_test = np.array([len(new) for new in test_tokenized])                        

length = 15
# Train
print("The ",sum(lengths_train < length)/len(lengths_train),"% of the tokenized news in the train partition has length smaller than ", length )
# Validation
print("The ",sum(lengths_valid < length)/len(lengths_valid),"% of the tokenized news in the validation partition has length smaller than ", length )
# Test
print("The ",sum(lengths_test < length)/len(lengths_test),"% of the tokenized news in the test partition has length smaller than ", length )

The  0.9383705673758865 % of the tokenized news in the train partition has length smaller than  15
The  0.9391493377371845 % of the tokenized news in the validation partition has length smaller than  15
The  0.936698191136061 % of the tokenized news in the test partition has length smaller than  15


As we can see in all partitions the **%** of news that has length smaller than **15** is above the **90 %** therefore by padding the tokenized news with this length we are not losing much information.


In [17]:
# Pad the sequences
from keras.preprocessing.sequence import pad_sequences
MAX_LEN = 15

# Train
train_padded = pad_sequences(train_tokenized, maxlen=MAX_LEN, dtype="long", 
                          value=0, truncating="post", padding="post")
# Validation
valid_padded = pad_sequences(valid_tokenized, maxlen=MAX_LEN, dtype="long", 
                          value=0, truncating="post", padding="post")
# Test
test_padded = pad_sequences(test_tokenized, maxlen=MAX_LEN, dtype="long", 
                          value=0, truncating="post", padding="post")

Now we need to create the attention mask for each sentence. The attention mask consists on a sequence of 0's and 1's indicating wheter the corresponding token is a padding token or not.

In [18]:
# Function to obtain the attention mask of each sequence

def attention_mask(text):
  mask = [int(token_id > 0) for token_id in text]
  return mask

Now we create the masks for each sentence.

In [19]:
# Create masks

# Train
train_mask = [attention_mask(new) for new in train_padded]
# Validation
valid_mask = [attention_mask(new) for new in valid_padded]
# Test
test_mask = [attention_mask(new) for new in test_padded]

Now we concatenate the train and validation padded sequeces, masks and labels.

In [20]:
# Concatenate train and validation padded sequences
arrays = [train_padded, valid_padded]
train_valid_padded = np.concatenate(arrays, axis = 0)
# Concatenate train and validation attention masks
train_valid_mask = train_mask + valid_mask
# Concatenate train and validation labels
train_valid_labels = train_labels + valid_labels


Now we convert our inputs to torch tensors

In [21]:
# Convert inputs to torch tensor

# Train
train_inputs_tensor = torch.tensor(train_padded)
train_masks_tensor = torch.tensor(train_mask)
train_labels_tensor = torch.tensor(train_labels)

# Validation
valid_inputs_tensor = torch.tensor(valid_padded)
valid_masks_tensor = torch.tensor(valid_mask)
valid_labels_tensor = torch.tensor(valid_labels)

# Train + Validation
train_valid_inputs_tensor = torch.tensor(train_valid_padded)
train_valid_masks_tensor = torch.tensor(train_valid_mask)
train_valid_labels_tensor = torch.tensor(train_valid_labels)
# Test
test_inputs_tensor = torch.tensor(test_padded)
test_masks_tensor = torch.tensor(test_mask)
test_labels_tensor = torch.tensor(test_labels)

Now we create dataloader objects

In [22]:
# Create DataLoader objects
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
batch_size = 32

# Train
train_dataload = TensorDataset(train_inputs_tensor, train_masks_tensor, train_labels_tensor)
train_sampler = RandomSampler(train_dataload)
trainloader = DataLoader(train_dataload, sampler=train_sampler, batch_size=batch_size)

# Validation
valid_dataload = TensorDataset(valid_inputs_tensor, valid_masks_tensor, valid_labels_tensor)
valid_sampler = RandomSampler(valid_dataload)
validloader = DataLoader(valid_dataload, sampler=valid_sampler, batch_size=batch_size)

# Train + Validation
train_valid_dataload = TensorDataset(train_valid_inputs_tensor, train_valid_masks_tensor, train_valid_labels_tensor)
train_valid_sampler = RandomSampler(train_valid_dataload)
train_valid_loader = DataLoader(train_valid_dataload, sampler=train_valid_sampler, batch_size=batch_size)

# Test
test_dataload = TensorDataset(test_inputs_tensor, test_masks_tensor, test_labels_tensor)
test_sampler = RandomSampler(test_dataload)
testloader = DataLoader(test_dataload, sampler=test_sampler, batch_size=batch_size)


### Training the model

We start by defining the model

In [23]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", 
    num_labels = 6,
                       
    output_attentions = False, 
    output_hidden_states = False,
)

# Run the model on the GPU
model.cuda()

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

We define the optimizer

In [24]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5, 
                  eps = 1e-8 
                )

We train the model with **1**, **2** and **3** epochs and we select the number of epochs that delivers the highest accuracy over the validation set.

In [29]:
from transformers import get_linear_schedule_with_warmup

# Number of training epochs 
epochs = 3

# Total number of training steps (batches * number of epochs).
total_steps = len(train_valid_loader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, 
                                            num_training_steps = total_steps)

We also define a function to obtain the accuracy

In [30]:
import numpy as np

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

And a function to obtain the elapsed times.

In [31]:
import time
import datetime

def format_time(elapsed):
    
    elapsed_rounded = int(round((elapsed)))
    
    return str(datetime.timedelta(seconds=elapsed_rounded))

### Tuning the optimal number of epochs



In [None]:
import random


# Set seed for reproducibility.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# Store the average loss after each epoch so we can plot them.
loss_values = []

# For each epoch...
for epoch_i in range(0, epochs):
    
    ############ Training #############

    
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure time for each epoch.
    t0 = time.time()

    # Reset loss 
    total_loss = 0

    # Training mode
    model.train()

    
    for step, batch in enumerate(trainloader):

        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time
            elapsed = format_time(time.time() - t0)
            
            
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(trainloader), elapsed))

        # Unpack the training batch
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)


        # Clear gradients
        model.zero_grad()
        
        # Forward pass
        outputs = model(b_input_ids, 
                    token_type_ids=None, 
                    attention_mask=b_input_mask, 
                    labels=b_labels)
        
        # Get loss value
        loss = outputs[0]

        # Store the loss
        total_loss += loss.item()

        # Calculate gradients (backward pass)
        loss.backward()

        # Clip gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters 
        optimizer.step()

        # Update learning rate
        scheduler.step()

    # Average loss
    avg_train_loss = total_loss / len(trainloader)            
    
    # Store the loss value.
    loss_values.append(avg_train_loss)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))
     
    # After each training epoch, obtain validation accuracy
    accuracy_valid = 0
    repetitions_valid = 0

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Evaluation mode
    model.eval()

    # Tracking variables 
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    # Evaluate data for one epoch
    for batch in validloader:

         repetitions_valid += 1
        
         # Add batch to GPU
         batch = tuple(t.to(device) for t in batch)
        
         # Unpack the inputs from our dataloader
         b_input_ids = batch[0]
         b_input_mask = batch[1]
         b_labels = batch[2]
        

         with torch.no_grad():        

             # Forward pass
             outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)
        
             # Get the logits
             logits = outputs[0]

             # Move logits and labels to CPU
             logits = logits.detach().cpu().numpy()
             label_ids = b_labels.to('cpu').numpy()
             # Compute accuracy
             accuracy_valid += flat_accuracy(logits, label_ids)

        

    print("Validation accuracy:", accuracy_valid/repetitions_valid)   
        


print("")
print("Training completed")



The following table shows the accuracy that we obtain for each number of epochs.

| Epochs | Accuracy |
|--------|----------|
|  1     | 0.76988  |
|  2     | 0.77556  |
|  3     | 0.76935  |

The model obtains the highest accuracy over the validation set when it is trained for **2** epochs therefore this is the number of epochs that we will use.

## Testing performance

In order to test the performance we train the model with the **train** and **validation** partitions and we evaluate it with the **test** set. We train the model for **2** epochs.

In [None]:
from transformers import get_linear_schedule_with_warmup

# Select the number of epochs
epochs = 2

# Total number of training steps (batches * number of epochs).
total_steps = len(train_valid_loader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, 
                                            num_training_steps = total_steps)

In [None]:
import random

# Set seed for reproducibility
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)


# For each epoch...
for epoch_i in range(0, epochs):
    
    ############ Training #############
    
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the loss
    total_loss = 0

    # Training mode
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_valid_loader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Elapsed time
            elapsed = format_time(time.time() - t0)
            
            # Report progress
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_valid_loader), elapsed))

        # Unpack the training batch and copy each tensor to the GPU. 
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Clear gradients
        model.zero_grad()        

        # Forward pass
        outputs = model(b_input_ids, 
                    token_type_ids=None, 
                    attention_mask=b_input_mask, 
                    labels=b_labels)
        
        # Get the loss value
        loss = outputs[0]

        # Accumulate the training loss
        total_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters
        optimizer.step()

        # Update learning rate
        scheduler.step()

    # Calculate the average loss
    avg_train_loss = total_loss / len(train_valid_loader)            
    

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))
    
# Save the model
torch.save(model.state_dict(), '/content/gdrive/MyDrive/TFM/BERT_model.pt')      

print("")
print("Training completed")



And we evaluate the model.

In [None]:
############ Validation #############

# Obtain performance over the validation set

# Load model
model.load_state_dict(torch.load('/content/gdrive/MyDrive/TFM/BERT_model.pt'))

predictions = np.array([], dtype= int)
labels_test  = np.array([], dtype = int)

print("")
print("Running Validation...")

t0 = time.time()

# Evaluation mode
model.eval()

# Tracking variables 
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

# Evaluate data for one epoch
for batch in testloader:
        
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
        
    # Unpack the inputs from our dataloader
    b_input_ids = batch[0]
    b_input_mask = batch[1]
    b_labels = batch[2]
        
    # Don't compute and store gradients
    with torch.no_grad():        

        # Forward pass
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)
        
        # Get the "logits"
        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        # Obtain predictions
        pred_flat = np.argmax(logits, axis=1).flatten()

        # Store labels and predictions
        predictions = np.concatenate((predictions, pred_flat))
        labels_test = np.concatenate((labels_test, label_ids))

        

In [None]:
print(classification_report(test_labs, predictions))

              precision    recall  f1-score   support

           0       0.81      0.86      0.83     23507
           1       0.70      0.53      0.61      3514
           2       0.72      0.64      0.68     11297
           3       0.61      0.28      0.38      1224
           4       0.80      0.86      0.83     17472
           5       0.77      0.70      0.73      2305

    accuracy                           0.78     59319
   macro avg       0.73      0.65      0.68     59319
weighted avg       0.78      0.78      0.78     59319



In [None]:
# Classification report without 'true' label
print(classification_report(np.array(test_labs).reshape(len(test_labs),1), predictions, labels = [1,2,3,4,5]))

              precision    recall  f1-score   support

           1       0.70      0.53      0.61      3514
           2       0.72      0.64      0.68     11297
           3       0.61      0.28      0.38      1224
           4       0.80      0.86      0.83     17472
           5       0.77      0.70      0.73      2305

   micro avg       0.76      0.73      0.74     35812
   macro avg       0.72      0.60      0.65     35812
weighted avg       0.76      0.73      0.74     35812



In [None]:
# Confusion matrix
print(confusion_matrix(np.array(labels_test).reshape(len(labels_test),1), predictions))

[[37963   351  1914   179  1602   317]
 [ 1028  2491   215    38   599    75]
 [ 2809   165  9941    54  1871    88]
 [  679    41   123  1351   219    58]
 [ 1439   263  1103    87 14544    36]
 [  441    45   114    35   132  2071]]
