# NLP Text Classification

In this project we look at several ways of classifying texts:
- Naive Bayes
- Logistic Regression
- Multinomial Regression

We will use two datasets for binary label classification (sentiment analysis) and multinomial classification (topic analysis):
- [IMDb movie review sentiment](http://ai.stanford.edu/~amaas/data/sentiment/)
- [AG News topics](https://huggingface.co/datasets/ag_news)

# Set up

Import packages.

In [1]:
import numpy as np
import os
import pandas as pd
import re
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import classification_report
import sys
import traceback
import nltk

# Functions for cleaning up raw texts and tokenizing the corpus

We perform text preprocessing that includes: removing HTML tags, making text lower case, stemming, and disposing of stopwords.
In the end, we will split the entire dataset into training, validation and test sets.

In [2]:
# Stemming the text
def simple_stemmer(text):
    ps=nltk.porter.PorterStemmer()
    text= [ps.stem(word) for word in text]
    return text

In [None]:
stopwords_english = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
# print(stopwords_english)

#removing the stopwords
def remove_stopwords(text, stopword_list):
    tokens = [token.strip() for token in text]
    filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
    return filtered_tokens

In [4]:
def tokenize_and_clean(line, stem_and_remove_stop_words = True):

    line = re.sub(r"<.*?>", "", line).strip() # remove all HTML tags
    line = re.sub(r'[^a-zA-Z0-9]', ' ', line) # remove punc
    line = line.lower().split()  # lower case
    if stem_and_remove_stop_words:
        line = remove_stopwords(line, stopwords_english)
        line = simple_stemmer(line)

    return line

# Download and unpack the sentiment data



We are using IMDb Dataset for binary sentiment classification that provides a set of 25K highly polar reviews for training, and 25K for testing
(each set contains an equal number of positive and negative examples).

Dataset folder structure is as follows:

dataset/ \
├── test/ \
│     ├── pos/ \
│     ├── neg/ \
├── train/ \
      ├── pos/ \
      └── neg/

In [None]:
# check if dataset is downloaded
if not os.path.isfile('aclImdb_v1.tar'):
    print("Downloading dataset...")
    !wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
    !gunzip aclImdb_v1.tar.gz
    !tar -xvf aclImdb_v1.tar

Load in the text from the folders.

In [5]:
def load_text_from_folders(path, file_list, dataset, samples = 25000, stem_and_remove_stop_words = True):
    """Read set of files from given directory and save returned lines to list.

    Parameters
    ----------
    path : str
        Absolute or relative path to given file (or set of files).
    file_list: list
        List of files names to read.
    dataset: list
        List that stores read lines.
    samples: int
        Number of samples in the output
    """
    for i, file in enumerate(file_list):
        if i >= samples:
            break
        with open(os.path.join(path, file), 'r', encoding='utf8') as text:
            contents = text.read()
            contents_tokenized = tokenize_and_clean(contents, stem_and_remove_stop_words=stem_and_remove_stop_words)
            dataset.append(contents_tokenized)

# Creating training and test sets

This creates four arrays:


*   ```train_pos``` -- instances in the training set with positive sentiment labels
*   ```train_neg``` -- instances in the training set with negative sentiment labels
*   ```test_pos``` -- instances in the testing set with positive sentiment labels
*   ```test_neg``` -- instances in the testing set with negative sentiment labels





In [8]:
# Path to dataset location
path = 'aclImdb/'

# Create lists that will contain read lines
train_pos, train_neg, test_pos, test_neg = [], [], [], []

# Create a dictionary of paths and lists that store lines (key: value = path: list)
sets_dict = {'train/pos/': train_pos, 'train/neg/': train_neg,
             'test/pos/': test_pos, 'test/neg/': test_neg}

# Load the data
for dataset in sets_dict:
  file_list = [f for f in sorted(os.listdir(os.path.join(path, dataset))) if f.endswith('.txt')]
  load_text_from_folders(os.path.join(path, dataset), file_list, sets_dict[dataset])

Convert into Pandas dataframes. Pandas is a virtual spreadsheet with a programmatic API. A ```DataFrame``` is a spreadsheet. We will make a spreadsheet of training data and one for testing data and one with everything together.

In [9]:
# Concatenate training and testing examples into one dataset
TRAIN = pd.concat([pd.DataFrame({'review': train_pos, 'label':1}),
                     pd.DataFrame({'review': train_neg, 'label':0})],
                     axis=0, ignore_index=True)

TEST = pd.concat([pd.DataFrame({'review': test_pos, 'label':1}),
                    pd.DataFrame({'review': test_neg, 'label':0})],
                    axis=0, ignore_index=True)

ALL = pd.concat([TRAIN, TEST])

In [10]:
TRAIN.label.value_counts()

label
1    17505
0    17411
Name: count, dtype: int64

In [11]:
TRAIN.head()

Unnamed: 0,review,label
0,"[bromwel, high, cartoon, comedi, ran, time, pr...",1
1,"[bromwel, high, cartoon, comedi, ran, time, pr...",1
2,"[homeless, houseless, georg, carlin, state, is...",1
3,"[brilliant, act, lesley, ann, warren, best, dr...",1
4,"[easili, underr, film, inn, brook, cannon, sur...",1


# Creating a vocabulary file

Next, we have to build a vocabulary. This is effectively a look-up table where every unique word in your data set has a corresponding index (an integer).
We do this as our machine learning model cannot operate on strings, but only numbers. Each index is used to construct a one-hot vector for each word.

In [12]:
class Vocab:
    def __init__(self, name):
        self.name = name
        self._word2index = {}
        self._word2count = {}
        self._index2word = {}
        self._n_words = 0

    def get_words(self):
      return list(self._word2count.keys())

    def num_words(self):
      return self._n_words

    def word2index(self, word):
      return self._word2index[word]

    def index2word(self, word):
      return self._index2word[word]

    def word2count(self, word):
      return self._word2count[word]

    def add_sentence(self, sentence):
        for word in sentence.split(' '):
            self.add_word(word)

    def add_word(self, word):
        if word not in self._word2index:
            self._word2index[word] = self._n_words
            self._word2count[word] = 1
            self._index2word[self._n_words] = word
            self._n_words += 1
        else:
            self._word2count[word] += 1

Make a vocab object.

In [13]:
VOCAB = Vocab("imdb")
VOCAB_SIZE = 1000
NUM_LABELS = 2

Load the first ```n``` frequent words in the vocabulary. Do this by sorting by frequency and then truncating.

In [14]:
# Get word frequency counts
word_freq_dict = {}   # key = word, value = frequency
for review in ALL['review']:
  for word in review:
    if word in word_freq_dict:
      word_freq_dict[word] += 1
    else:
      word_freq_dict[word] = 1

# Get a list of (word, freq) tuples sorted by frequency
kv_list = []  # list of word-freq tuples so can sort
for (k,v) in word_freq_dict.items():
  kv_list.append((k,v))
sorted_kv_list = sorted(kv_list, key=lambda x: x[1], reverse=True)

# Load top n words in to vocab object
for word, freq in sorted_kv_list[:VOCAB_SIZE]:
  VOCAB.add_word(word)

# Naive Bayes
Naive Bayes Algorithm is based on the Bayes Rule which describes the probability of an event,
based on prior knowledge of conditions that might be related to the event.

According to Bayes theorem:


```Posterior = likelihood * proposition/evidence```

or

```P(A|B) = P(B|A) * P(A)/P(B)```


Using word presence as features, create an array of features for each review. Each review will thus be an array of size ```len(vocab)``` where each index in the array is a token number and the value in that position is whether the token is present in the review. There will be ```num_rows``` arrays, making a ```num_rows x len(vocab)``` 2D array.

This function creates a bag of words. It returns a vector where each element is a count of the words in the sentence corresponding to the word index.

In [15]:
def make_bow(sentence):
    vec = torch.zeros(VOCAB_SIZE, dtype=torch.float64)
    for word in sentence:
        if word not in VOCAB.get_words():
            continue
        vec[VOCAB.word2index(word)] += 1
    return vec.view(1, -1)

Prepare data ```X_TRAIN``` is a 2D array of size ```num_reviews x vocab_size``` that contains training data. Each row will be a bag of words, except each index contains a 1 or 0 based on word presence in the example. Each row is a vector of features $\phi_1 ... \phi_{|V|}$ assumed to be independent, where $|V|$ is size of the vocabulary. We don't need to know what the features are, only whether they are present in each example in the training set.

```X_TEST``` is the same as above but containing testing data.



In [16]:
# Vectorize text reviews to numbers
# Make empty vectors
X_TRAIN = np.zeros((len(TRAIN), VOCAB_SIZE))
X_TEST = np.zeros((len(TEST), VOCAB_SIZE))

# Load in frequency counts
for i, row in TRAIN.iterrows():
    X_TRAIN[i] = np.array(make_bow(row['review'])) > 0 # The > 0 converts to presence instead of counts

for i, row in TEST.iterrows():
    X_TEST[i] = np.array(make_bow(row['review'])) > 0 # The > 0 converts to presence instead of counts

# The labels
Y_TRAIN = np.array(TRAIN['label'])
Y_TEST = np.array(TEST['label'])

We will compute probabilities over the training data and then apply those probabilities to the testing examples. Use the Bayes formula to compute $P_{\rm test}(L_{+}|\phi_{0:|V|})$ and $P_{\rm test}(L_{-}|\phi_{0:|V|})$ for each review. Classify examples based on whether one probability is higher than another. That is, $sign(P_{\rm test}(L_{+}|\phi_{0:|V|}) - P_{\rm test}(L_{-}|\phi_{0:|V|}))$ indicates a positive review when greater than 0 and a negative review when less than 0.

In [17]:
X_TRAIN.shape

(34916, 1000)

In [18]:
Y_TRAIN.shape

(34916,)

Step 1: Compute the positive label condition:
$P(L_{+}|\phi_{0:|V|}) = P(\phi_{0:|V|}|L_{+})P(L_{+}) / P(\phi_{0:|V|})$ 
and the negative label condition:
$P(L_{-}|\phi_{0:|V|}) = P(\phi_{0:|V|}|L_{-})P(L_{-}) / P(\phi_{0:|V|})$

In [19]:
def prob_given_features(x_train, y_train):
  log_probs = np.array([0] * x_train.shape[1])
  # Likelihood and prior probability for positive labels
  num_pos = np.sum(y_train==1)
  x_train_pos = x_train[y_train==1]
  likelihood_pos = (1+np.sum(x_train_pos,axis=0))/(1+num_pos)
  prior_pos = np.mean(y_train==1)

  # Likelihood and prior probability for negative labels
  num_neg = np.sum(y_train==0)
  x_train_neg = x_train[y_train==0]
  likelihood_neg = (1+np.sum(x_train_neg,axis=0))/(1+num_neg)
  prior_neg = np.mean(y_train==0)
  
  # calculate the frequency of each feature (the sum of each column)
  col_sums = np.sum(x_train, axis=0)
  # total number of features and then add 1 to smooth
  total = np.sum(x_train)
  # denominator: probability of each feature, add 1 to smooth
  evidence = (1+col_sums) / (1+total)
    
  # log scale posterior probs
  log_pos_probs = np.log(likelihood_pos * prior_pos / evidence)
  log_neg_probs = np.log(likelihood_neg * prior_neg / evidence)
  return log_pos_probs, log_neg_probs

In [21]:
pos_probs, neg_probs = prob_given_features(X_TRAIN, Y_TRAIN)

Step 2: Make a label prediction. Subtract (in log scale) the positive from the negative. If the result is greater than zero then it is a prediction of `+` label. If the result is less thn zero then we make a prediction of `-` label.

In [23]:
def naive_bayes(x, pos_probs, neg_probs):
  label = 0
  x_pos_probs = np.dot(x, pos_probs)
  x_neg_probs = np.dot(x, neg_probs)
  # Subtract the positive from the negative
  x_probs = np.sum(x_pos_probs) - np.sum(x_neg_probs)
  if x_probs > 0:
      label = 1
  else:
      lable = 0
  return label

In [26]:
def test_naive_bayes(x_train, y_train, x_test, y_test):
    try:
        # Get the positive and negative feature probabilities
        pos_probs, neg_probs  = prob_given_features(x_train, y_train)
        correct = 0  # How many tests are correct
        # Iterate through the test set
        for x, y in zip(x_test, y_test):
            # Get the naive_bayes label
            label = naive_bayes(x, pos_probs, neg_probs)
            # Compare the label against the true label
            correct = correct + int(label == y)
        print('Accuracy: ', correct / x_test.shape[0])
    except Exception as e:
        print('Error during execution of Test:')
        # print traceback
        traceback.print_exc()
    return

In [27]:
# Naive Bayes Test
test_naive_bayes(X_TRAIN, Y_TRAIN, X_TEST, Y_TEST)

Accuracy:  0.8328486681400489


# Logistic Regression

Reload the data, but use word counts instead of word presence.

In [28]:
# Randomize the data
TRAIN = TRAIN.sample(frac=1).reset_index(drop=True)
TEST = TEST.sample(frac=1).reset_index(drop=True)

# Vectorize text reviews to numbers
X_TRAIN = np.zeros((len(TRAIN), VOCAB_SIZE))
X_TEST = np.zeros((len(TEST), VOCAB_SIZE))

for i, row in TRAIN.iterrows():
  X_TRAIN[i] = np.array(make_bow(row['review']))

for i, row in TEST.iterrows():
  X_TEST[i] = np.array(make_bow(row['review']))

Y_TRAIN = np.array(TRAIN['label'])
Y_TEST = np.array(TEST['label'])

Make a logistic classifier torch neural network.

The net will take an arbitrary number of outputs, but for binary logistic regression, only one is needed because the single output neuron can take a value that is between 0 and 1, with 0 meaning negative sentiment and 1 meaning positive sentiment. There should only be as many parameters as ```num_features x (num_labels-1)``` in binary logistic regression and ```num_features x num_labels``` for multinomial logistic regression.

The input will be a one-hot vector of size `vocab_size`.

# Logistic Regression - The model

In [29]:
# Defining neural network structure
class BoWClassifier(nn.Module):  # inheriting from nn.Module!

  def __init__(self, num_labels, vocab_size):
    super(BoWClassifier, self).__init__()
    # initialize one linear layer
    # input dimension is vocab_size and the output is num_labels
    self.linear = nn.Linear(vocab_size, num_labels)

  def forward(self, bow_vec):
    # Pass the input through the linear layer, then pass that through sigmoid (for non-linearity).
    out = nn.functional.sigmoid(self.linear(bow_vec))

    return out

In [30]:
# Initialize the model
# Use one label because the head can signify a 1 or 0 because of the sigmoid.
bow_nn_model = BoWClassifier(NUM_LABELS-1, VOCAB_SIZE)

This function should return two tensors. The first, containing training data, shoud be of size ```batch_size x vocab_size``` for the ```i```th batch. The second should be a list of labels of size ```batch_size```. Both tensors should be of type ```dtype=torch.float```.

In [31]:
def get_batch(i, batch_size, x_data, y_data):
  x = torch.tensor(x_data[batch_size * i : batch_size * (i+1)], dtype=torch.float)
  y = torch.tensor(y_data[batch_size * i : batch_size * (i+1)], dtype=torch.float)
  return x, y

In [32]:
# Train the model
def train(model, train_data, test_data, epochs, batch_size):
  n_iter = len(train_data) // batch_size
  print(n_iter, 'batches per epoch')
  # Loss Function
  loss_function = nn.BCELoss()
  # Optimizer initlialization
  optimizer = optim.SGD(bow_nn_model.parameters(), lr=0.1)

  for epoch in range(epochs):
    # Make BOW vector for input features and target label
    for i in range(n_iter):
      x, y = get_batch(i, batch_size, train_data, test_data)

      # Step 3. Run the forward pass.
      y_hat = model(x)
      y_hat = y_hat.reshape(-1)

      # Step 4. Compute the loss, gradients, and update the parameters by
      loss = loss_function(y_hat,y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (epoch+1)%10 == 0 and (i+1) == n_iter:
        print('epoch:', epoch+1,',loss =',loss.item(), ', training accuracy =',(torch.round(y_hat)==y).float().mean())
  return model

In [33]:
# It's ok to modify this cell.
BATCH_SIZE = 100
N_EPOCHS = 1000

In [None]:
try:
    bow_nn_model = train(bow_nn_model, X_TRAIN, Y_TRAIN, N_EPOCHS, BATCH_SIZE)
except:
    print("Training failed. Please check your code.")

In [37]:
def test_model_accuracy_lr(TEST, bow_nn_model):
    try:
        bow_nn_predictions = []
        with torch.no_grad():
            for index, row in TEST.iterrows():
                bow_vec = make_bow(row['review'])
                probs = bow_nn_model(bow_vec.float())
                pred = 1 if probs[0][0] > 0.5 else 0
                bow_nn_predictions.append(pred)
        accuracy = round((bow_nn_predictions == TEST['label']).mean(), 1)
        print(classification_report(TEST['label'], bow_nn_predictions))
        print('Accuracy: ', accuracy)
    except Exception as e:
        print('Error during execution of Test:')
        # print traceback
        traceback.print_exc()
        
    return

In [38]:
# Test the model
test_model_accuracy_lr(TEST, bow_nn_model)

              precision    recall  f1-score   support

           0       0.86      0.86      0.86     16694
           1       0.87      0.86      0.87     17694

    accuracy                           0.86     34388
   macro avg       0.86      0.86      0.86     34388
weighted avg       0.86      0.86      0.86     34388

Accuracy:  0.9


# Multinomial Regression

Load data.

In [None]:
!pip install datasets

In [39]:
from datasets import load_dataset

Unlike earlier, we will use a pre-defined set of embeddings, called [GLoVe](https://nlp.stanford.edu/projects/glove/). GLoVe replaces every word with a 100-dimensional vector of floating point values. The advantage of this is that words with similar semantic meanings will have similar vectors. This is important because the vocabulary size of the corpus we will use is 400,000.

For the assigment, instead of getting a one-hot vector for each word, the neural network will get a `batch_size x num_words x 100` tensor containing floating point values.

Download the GLoVe embedding vectors.

In [40]:
import gensim.downloader

In [41]:
glove_vectors = gensim.downloader.load('glove-wiki-gigaword-100')
VOCAB_SIZE = len(glove_vectors.vectors)
EMBEDDING_DIM = 100

In [42]:
news_data_train = load_dataset("ag_news", split="train").shuffle()
news_data_test = load_dataset("ag_news", split="test").shuffle()
NEWS_TRAIN = pd.DataFrame(news_data_train)
NEWS_TEST = pd.DataFrame(news_data_test)
NUM_LABELS = 4

In [43]:
NEWS_TEST.head()

Unnamed: 0,text,label
0,"PeopleSoft, SAP make bid for manufacturing dol...",3
1,Socialites unite dolphin groups Dolphin groups...,3
2,"Israel Destroys Refugee Homes, Kills One GAZA ...",0
3,EU set to launch 'transit camps' EU ministers ...,0
4,Earthquakes Shake Central Japan; Bullet Train ...,0


Train/Test Sets using GloVe embeddings.

In [44]:
news_data_test.shape

(7600, 2)

In [37]:
NEWS_TRAIN = pd.DataFrame(news_data_train)[:10000]
NEWS_TEST = pd.DataFrame(news_data_test)[:10000]

In [45]:
# pad dataset to a maximum review length in words
MAX_LEN = 200

This function will embed the dataset into sequences of 100-dimension vectors.

In [46]:
def get_glove_seq(review, max_len):
  seq = np.zeros((max_len, 100))
  for i, word in enumerate(review):
    if i < max_len and word in glove_vectors:
      seq[i] = glove_vectors[word]
  return seq

In [47]:
# Vectorize text reviews to numbers
X_NEWS_TRAIN = np.zeros((len(NEWS_TRAIN), MAX_LEN, 100))
X_NEWS_TEST = np.zeros((len(NEWS_TEST), MAX_LEN, 100))

for i, row in NEWS_TRAIN.iterrows():
  X_NEWS_TRAIN[i] = get_glove_seq(tokenize_and_clean(row['text'], stem_and_remove_stop_words=False), MAX_LEN)

for i, row in NEWS_TEST.iterrows():
  X_NEWS_TEST[i] = get_glove_seq(tokenize_and_clean(row['text'], stem_and_remove_stop_words=False), MAX_LEN)

Y_NEWS_TRAIN = np.array(NEWS_TRAIN['label'])
Y_NEWS_TEST = np.array(NEWS_TEST['label'])
NUM_LABELS = 4

# Multinomial Regression - The model 

In [69]:
# Defining neural network structure
class MultinomialBoWClassifier(nn.Module):  # inheriting from nn.Module!
  def __init__(self, max_word_len, embedding_dim, num_labels):
    super(MultinomialBoWClassifier, self).__init__()
    self.max_word_len = max_word_len
    self.embedding_dim = embedding_dim
    self.num_labels = num_labels

    self.linear = nn.Linear(max_word_len * embedding_dim, num_labels)

  def forward(self, x):
    out = None
    # flatten each individual sample while keeping the batch size dimension
    x = x.view(x.size(0), -1)
    out = self.linear(x)
    out = nn.functional.softmax(out, dim=1)
    return out

In [70]:
multibow_model = MultinomialBoWClassifier(max_word_len=MAX_LEN, embedding_dim=EMBEDDING_DIM, num_labels=NUM_LABELS)

In [71]:
# Train the model
def train(model, x_train_data, y_train_data, epochs, batch_size, lr, weight_decay):
  print('Training Started!')
  optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
  criterion = nn.CrossEntropyLoss()
  n_iter = len(x_train_data) // batch_size
  print(n_iter, 'batches per epoch')

  for epoch in range(epochs):
    num_correct = 0
    total_loss = 0.0
    model.train()

    for i in range(n_iter):
      x, y = get_batch(i, batch_size, x_train_data, y_train_data)
      x = x
      y = y.long()

      y_hat = model(x)
      loss = criterion(y_hat, y)
      total_loss += loss
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (epoch+1)%10 == 0 and (i+1) == n_iter:
        print('epoch:', epoch+1,',loss =',loss.item(), ', training accuracy =',(y_hat.argmax(dim=1)==y).float().mean().item())

In [72]:
BATCH_SIZE = 1000
N_EPOCHS = 100
LEARNING_RATE = 2e-3
WEIGHT_DECAY = 1e-2

In [None]:
try:
    train(multibow_model, X_NEWS_TRAIN, Y_NEWS_TRAIN, N_EPOCHS, BATCH_SIZE, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
except:
    print("Training failed. Please check your code.")

In [74]:
def test_model_accuracy_mr(X_NEWS_TEST, Y_NEWS_TEST, multibow_model):
    try:
        multibow_model.eval()
        with torch.no_grad():
            text_vec = torch.tensor(X_NEWS_TEST, dtype=torch.float)
            probs = multibow_model(text_vec)
            pred = probs.argmax(dim=1)
        targets = torch.tensor(Y_NEWS_TEST)
        accuracy = (pred == targets).float().mean().item()
        print(classification_report(targets, pred))
        print('Accuracy: ', accuracy)
    except Exception as e:
        print('Error during execution of Test:')
        # print traceback
        traceback.print_exc()
        
    return

In [75]:
# Test the model
test_model_accuracy_mr(X_NEWS_TEST, Y_NEWS_TEST, multibow_model)

              precision    recall  f1-score   support

           0       0.90      0.88      0.89      1900
           1       0.93      0.97      0.95      1900
           2       0.87      0.79      0.83      1900
           3       0.83      0.88      0.85      1900

    accuracy                           0.88      7600
   macro avg       0.88      0.88      0.88      7600
weighted avg       0.88      0.88      0.88      7600

Accuracy:  0.8802631497383118
