The provided datasets were sub-sampled from https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
The data was collected form IMDb movie reviews by webscraping text data from the website. 
(200 reviews, 54599 tokens in dev set, 1600 reviews, 425345 tokens in train set)
(i.e. newswire, tweets, books, blogs, etc) Movie reviews
Distribution of labels in the data (1: 105, 0: 95 in dev set, 1: 804, 0: 796 in train set)
(8953 words in dev set, 30705 words in train set)
6574 words overlap between train and dev set

Logistic Regression Model

In [9]:
import nltk
#nltk.download('punkt')
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import f1_score
from collections import Counter
import time
from nltk.corpus import stopwords

from scipy.sparse import lil_matrix
from sklearn.preprocessing import normalize
from nltk.util import ngrams
from sklearn.metrics import accuracy_score

#nltk.download('stopwords')
stopwords = stopwords.words('english')

In [10]:
def get_lists(input_file):
    f=open(input_file, 'r')
    lines = [line.split('\t')[1:] for line in f.readlines()]
    X = [row[0] for row in lines]
    y=np.array([int(row[1]) for row in lines])
    return X, y

def filter_tokens(list, stopwords):
    return [token for token in list.lower().split() if token not in stopwords and len(token) > 1]

def generate_ngrams(tokens):
    # Generate unigrams from tokens
    return (' '.join(ngram) for n in range(1, 2) for ngram in ngrams(tokens, n))

def apply_tfidf(tf_matrix):
    # Calculate IDF and apply to TF matrix
    num_docs, vocab_size = tf_matrix.shape
    # Document frequency
    df = np.bincount(tf_matrix.nonzero()[1], minlength=vocab_size)
    # Inverse document frequency
    idf = np.log((1 + num_docs) / (1 + df)) + 1
    # Element-wise multiplication of TF matrix with IDF
    tfidf_matrix = tf_matrix.multiply(idf).tocsr()
    # L2 normalization
    return normalize(tfidf_matrix, norm='l2', axis=1)

def get_tfidf_vectors(token_lists, max_features=None, vocabulary=None):
    stopwords_set = set(stopwords)
    filtered_token_lists = [filter_tokens(token_list, stopwords_set) for token_list in token_lists]

    if vocabulary is None:
        # No pre-existing vocabulary supplied: Generate a new vocabulary from the corpus
        # List of n-grams produced from the token lists
        ngrams_list = (ngram for tokens in filtered_token_lists for ngram in generate_ngrams(tokens))
        # Counter to count occurrences of each n-gram
        vocabulary_counter = Counter(ngrams_list)
        
        if max_features is not None:
            # Limit on the number of features is specified: Select only the top 'max_features' most common n-grams
            high_freq_ngrams = vocabulary_counter.most_common(max_features)
        else:
            # No limit on the number of features: Utilizes all unique n-grams
            high_freq_ngrams = vocabulary_counter.items()
        # Sort the chosen n-grams alphabetically
        sorted_vocab = sorted(ngram for ngram, _ in high_freq_ngrams)
        # Create dictionary mapping each n-gram to a unique index
        vocabulary = {ngram: i for i, ngram in enumerate(sorted_vocab)}
    
    # Initialize matrix (number of documents x vocabulary size)
    matrix = lil_matrix((len(token_lists), len(vocabulary)), dtype=np.float64)
    vocab_indices = {word: vocabulary[word] for word in vocabulary}
    
    for i, tokens in enumerate(token_lists):
        # Generate n-grams from the tokens, filter them and count the frequency of each n-gram
        token_counts = Counter(generate_ngrams(filter_tokens(tokens, stopwords_set)))
        for token, count in token_counts.items():
            # Retrieve the column index for the current token
            j = vocab_indices.get(token)
            if j is not None:
                # If token is in the vocabulary update the matrix
                matrix[i, j] = count
    
    # Convert TF matrix to TFIDF matrix
    tfidf_matrix = apply_tfidf(matrix.tocsr())
    return tfidf_matrix.toarray(), vocabulary


In [11]:
# define constants for the files we are using
TRAIN_FILE = "movie_reviews_train.txt"
TEST_FILE = "movie_reviews_test.txt"

train_corpus, y_train = get_lists(TRAIN_FILE)

# First we will use our custom vectorizer to convert words to features, and time it.

print("Custom TF-IDF Vectorizer")
start = time.time()
custom_features, custom_vocabulary = get_tfidf_vectors(train_corpus)
end = time.time()
print("Time taken: ", end-start, " seconds")

# Next we will use sklearn's TfidfVectorizer to load in the data, and time it.

print("\nsklearn's TF-IDF Vectorizer")
start = time.time()
sklearn_vectorizer = TfidfVectorizer(stop_words=stopwords)
sklearn_features = sklearn_vectorizer.fit_transform(train_corpus)
sklearn_vocabulary = sklearn_vectorizer.vocabulary_
end = time.time()
print("Time taken: ", end-start, " seconds")

Custom TF-IDF Vectorizer
Time taken:  0.3628091812133789  seconds

sklearn's TF-IDF Vectorizer
Time taken:  0.12508106231689453  seconds


In [12]:
#1.
print(f"Custom TF-IDF Vectorizer vocabulary size: {len(custom_vocabulary)}")

#2.
print(f"sklearn TF-IDF Vectorizer vocabulary size: {len(sklearn_vocabulary)}")

#5.
custom_nz = np.count_nonzero(custom_features)
custom_total = custom_features.shape[0] * custom_features.shape[1]
custom_sparsity = (1 - custom_nz / custom_total) * 100
print("Custom TF-IDF Features Sparsity (% of zeros):", custom_sparsity)

# 6.
sklearn_nz = sklearn_features.nnz
sklearn_total = sklearn_features.shape[0] * sklearn_features.shape[1]
sklearn_sparsity = (1 - sklearn_nz / sklearn_total) * 100
print("Sklearn TF-IDF Features Sparsity (% of zeros):", sklearn_sparsity)


Custom TF-IDF Vectorizer vocabulary size: 43822
sklearn TF-IDF Vectorizer vocabulary size: 22460
Custom TF-IDF Features Sparsity (% of zeros): 99.75368821596459
Sklearn TF-IDF Features Sparsity (% of zeros): 99.562071460374


The vocabulary generated by the custom vectorizer is larger compared to the vocabulary generated by the sklearn TfidfVectorizer. The custom vectorizer implementation includes generating n-grams up to 2, encompassing both unigrams and bigrams, which utilizes individual words as well as combinations of words. In comparison, the default setting of the sklearn TfidfVectorizer only utilizes unigrams.The TFIDF implementation may also lead to a larger vocabulary produced by the custom vectorizer because its IDF computation adds smoothing terms to the log and an additional +1 to handle division by zero scenarios (np.log((1 + num_docs) / (1 + df)) + 1). This adjustment may affect the scaling of IDF values, increasing them and consequently enhancing the importance of all terms in general.In comparison, the sklearn TfidfVectorizer's IDF computation, only adds a smoothing term inside the logarithm effectively handling the importance of terms.

N-grams were utilized because they capture more contextual information compared to individual tokens or unigrams, which assists in context-specific information tasks such as sentiment analysis using a prebuilt NLTK ngram generator to reduce memory consumption and increase processing efficiency.SciPy lil_matrix is utilized for constructing the TF matrix as it allows for more efficient additions of terms where the matrix dimensions are not predefined and for non-sequential updates. For the TFIDF calculation, SciPy sparse is utilized for directly multiplying the IDF by the TF, without converting the sparse matrix into a dense format. L2 normalization is utilized for handling variable-lengths in similarity computations as it scales each term by the overall length of the token list, enabling a more accurate comparison of variable length token lists.

Logistic Regression

In [13]:
# First use sklearn's LogisticRegression classifier to do sentiment analysis using your custom feature vectors:
custom_classifier = LogisticRegression()

# Load the test data, extract features using your custom vectorizer, and test the performance of the LR classifier
TRAIN_FILE = "movie_reviews_train.txt"
TEST_FILE = "movie_reviews_test.txt"
train_corpus, y_train = get_lists(TRAIN_FILE)
test_corpus, y_test = get_lists(TEST_FILE)

custom_features, custom_vocabulary = get_tfidf_vectors(train_corpus)
custom_test_features, _ = get_tfidf_vectors(test_corpus, vocabulary=custom_vocabulary)

custom_classifier.fit(custom_features, y_train)
custom_predictions = custom_classifier.predict(custom_test_features)


# Print the accuracy of your model on the test data
custom_accuracy = accuracy_score(y_test, custom_predictions)
f1_custom = f1_score(y_test, custom_predictions)

print(f"Custom TFIDF Vectorizer - Accuracy: {custom_accuracy:.4f}, F1 Score: {f1_custom:.4f}")

# Now repeat the above steps, but this time using features extracted by sklearn's Tfidfvectorizer
sklearn_classifier = LogisticRegression()

sklearn_vectorizer = TfidfVectorizer(stop_words=stopwords)
sklearn_features = sklearn_vectorizer.fit_transform(train_corpus)
sklearn_test_features = sklearn_vectorizer.transform(test_corpus)

sklearn_classifier.fit(sklearn_features, y_train)
sklearn_predictions = sklearn_classifier.predict(sklearn_test_features)

sklearn_accuracy = accuracy_score(y_test, sklearn_predictions)
sklearn_f1 = f1_score(y_test, sklearn_predictions)

print(f"Sklean TFIDF Vectorizer - Accuracy: {sklearn_accuracy:.4f}, F1 Score: {sklearn_f1:.4f}")


Custom TFIDF Vectorizer - Accuracy: 0.8200, F1 Score: 0.8302
Sklean TFIDF Vectorizer - Accuracy: 0.8150, F1 Score: 0.8295


In [14]:
# First use sklearn's LogisticRegression classifier to do sentiment analysis using your custom feature vectors:
custom_classifier = LogisticRegression()

# Load the test data, extract features using your custom vectorizer, and test the performance of the LR classifier
TRAIN_FILE = "movie_reviews_train.txt"
TEST_FILE = "movie_reviews_test.txt"
train_corpus, y_train = get_lists(TRAIN_FILE)
test_corpus, y_test = get_lists(TEST_FILE)

custom_features, custom_vocabulary = get_tfidf_vectors(train_corpus, max_features=1000)
custom_test_features, _ = get_tfidf_vectors(test_corpus, vocabulary=custom_vocabulary)

custom_classifier.fit(custom_features, y_train)
custom_predictions = custom_classifier.predict(custom_test_features)


# Print the accuracy of your model on the test data
custom_accuracy = accuracy_score(y_test, custom_predictions)
f1_custom = f1_score(y_test, custom_predictions)

print(f"Custom TFIDF Vectorizer - Accuracy: {custom_accuracy:.4f}, F1 Score: {f1_custom:.4f}")

# Now repeat the above steps, but this time using features extracted by sklearn's Tfidfvectorizer
sklearn_classifier = LogisticRegression()

sklearn_vectorizer = TfidfVectorizer(stop_words=stopwords, max_features=1000)
sklearn_features = sklearn_vectorizer.fit_transform(train_corpus)
sklearn_test_features = sklearn_vectorizer.transform(test_corpus)

sklearn_classifier.fit(sklearn_features, y_train)
sklearn_predictions = sklearn_classifier.predict(sklearn_test_features)

sklearn_accuracy = accuracy_score(y_test, sklearn_predictions)
sklearn_f1 = f1_score(y_test, sklearn_predictions)

print(f"Sklean TFIDF Vectorizer - Accuracy: {sklearn_accuracy:.4f}, F1 Score: {sklearn_f1:.4f}")


Custom TFIDF Vectorizer - Accuracy: 0.7850, F1 Score: 0.8037
Sklean TFIDF Vectorizer - Accuracy: 0.8050, F1 Score: 0.8219


The difference between the custom TFIDF vectorizer and the sklearn TFIDF vectorizer results is minimal indicating that both vectorizers are similarly effective when selecting the 1000 most relevant terms from the data.

According to sklearn's documentation for the Tfidfvectorizer, the smoothing parameter smooth_idf (default behavior True) essentially increases the document frequency by 1 preventing division by zero and reducing variance in IDF values, consequently allowing the vectorizer to not be biased towards high frequency terms and able to pick up outlier or uncommon terms, making the vectorization process more robust.As high frequency terms do not bias or skew the feature weights, classification performance is more accurate.

Feedforward Neural Network Model

In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset
from scipy.sparse import issparse
relu = nn.ReLU()

# if torch.backends.mps.is_available():
# 	device = torch.device("mps")
if torch.cuda.is_available():
	device = torch.device("cuda")
else:
	device = torch.device("cpu")

In [33]:
class feedforward(nn.Module):
    def __init__(self):
        super().__init__()
        # First fully connected layer with input size 10000 and output size 512
        self.fc1 = nn.Linear(10000, 512)
        # Activation function preventing dead neurons
        self.act1 = nn.LeakyReLU(negative_slope=0.01)
        # Batch normalization for first layer to stabilize learning by normalizing the layer input
        self.bn1 = nn.BatchNorm1d(512)
        # Dropout layer to reduce overfitting by randomly setting some activations to zero
        self.dropout1 = nn.Dropout(0.3)
        # Second fully connected layer reducing dimension from 512 to 256
        self.fc2 = nn.Linear(512, 256)
        # Batch normalization for second layer
        self.bn2 = nn.BatchNorm1d(256)
        # Dropout layer to further reduce overfitting
        self.dropout2 = nn.Dropout(0.3)
        # Final fully connected layer that outputs to a single unit
        self.fc3 = nn.Linear(256, 1)

    def forward(self, X):
        # Processing input through the first layer
        X = self.fc1(X)
        # Applying batch normalization
        X = self.bn1(X)
        # Activation function
        X = self.act1(X)
        # Applying dropout
        X = self.dropout1(X)
        # Second layer processing
        X = self.fc2(X)
        # Second layer batch normalization
        X = self.bn2(X)
        # Activation function for the second layer
        X = relu(X)
        # Second dropout
        X = self.dropout2(X)
        # Final layer processing to produce output
        X = self.fc3(X)
        return X

    def predict(self, X):
        # Forward pass through the network
        X = self.forward(X)
        # Applying sigmoid activation to output probabilities
        return torch.sigmoid(X)

In [34]:
# Load the data using custom and sklearn vectors

TRAIN_FILE = "movie_reviews_train.txt"
TEST_FILE = "movie_reviews_test.txt"
VALIDATION_FILE = "movie_reviews_dev.txt"

train_corpus, y_train =  get_lists(TRAIN_FILE)
test_corpus, y_test = get_lists(TEST_FILE)
validation_corpus, y_validation = get_lists(VALIDATION_FILE)

custom_features, custom_vocabulary = get_tfidf_vectors(train_corpus, max_features=10000)
custom_features_validation, _ = get_tfidf_vectors(validation_corpus, vocabulary=custom_vocabulary)
custom_features_test, _ = get_tfidf_vectors(test_corpus, vocabulary=custom_vocabulary)

sklearn_vectorizer = TfidfVectorizer(stop_words=stopwords, max_features=10000)
sklearn_features = sklearn_vectorizer.fit_transform(train_corpus)
sklearn_features_validation = sklearn_vectorizer.transform(validation_corpus)
sklearn_features_test = sklearn_vectorizer.transform(test_corpus)
sklearn_vocabulary = sklearn_vectorizer.vocabulary_


In [35]:
custom_model = feedforward()
custom_model_loss_fn = nn.BCEWithLogitsLoss()
custom_model_optimizer = optim.Adam(custom_model.parameters(), lr=1e-3, weight_decay=1e-4)
custom_model_scheduler = ReduceLROnPlateau(custom_model_optimizer, 'min', patience=5, factor=0.5, verbose=True)

sklearn_model = feedforward()
sklearn_model_loss_fn = nn.BCEWithLogitsLoss()
sklearn_model_optimizer = optim.Adam(sklearn_model.parameters(), lr=1e-3, weight_decay=1e-4)
sklearn_model_scheduler = ReduceLROnPlateau(sklearn_model_optimizer, 'min', patience=5, factor=0.5, verbose=True)


In [40]:
# Train the model for 50 epochs on both custom and sklearn vectors
def create_data_loader(features, labels, batch_size, shuffle, num_workers=4):
    # Create a dataset from tensors. Converts features and labels into tensors.
    tensor_dataset = TensorDataset(torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32).unsqueeze(1))
    # Return a DataLoader which is an iterable over the dataset.
    return DataLoader(tensor_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

def sparse_matrix_to_tensor(matrix):
    # Check if the input matrix is a sparse matrix.
    if issparse(matrix):
        # Convert the sparse matrix to a dense array
        dense_array = matrix.toarray()
        # Convert the dense array into a torch tensor
        return torch.tensor(dense_array, dtype=torch.float32)
    else:
        # Convert the dense array into a torch tensor
        return torch.tensor(matrix, dtype=torch.float32)

def train_model(train_loader, validation_loader, model, loss_fn, optimizer, device, scheduler, epochs, patience):
    model.to(device)

    validation_loss_limit = float('inf')  # Initialize the best validation loss to infinity for comparison
    early_stop_limit = 0  # Counter to track the number of epochs without improvement

    for epoch in range(epochs):  # Loop over the dataset multiple times
        model.train()
        train_loss = 0  # Initialize loss for the epoch

        for inputs, labels in train_loader:  # Iterate over batches of data
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the same device as the model
            optimizer.zero_grad()  # Clear gradients before each backward pass
            outputs = model(inputs)  # Forward pass: compute predicted outputs by passing inputs to the model
            loss = loss_fn(outputs, labels)  # Calculate the loss based on model output and real labels
            loss.backward()  # Backward pass: compute gradient of the loss with respect to model parameters
            optimizer.step()  # Perform a single optimization step (parameter update)
            train_loss += loss.item()  # Sum up losses for the epoch

        train_loss /= len(train_loader)  # Calculate average training loss over the epoch
        print(f'Epoch {epoch+1}: Training Loss = {train_loss}')

        model.eval()  # Set the model to evaluation mode
        validation_loss = 0  # Initialize loss for validation
        with torch.no_grad():
            for inputs, labels in validation_loader:  # Iterate over validation data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)  # Forward pass
                validation_loss += loss_fn(outputs, labels).item()  # Accumulate the validation loss

        validation_loss /= len(validation_loader)  # Calculate average validation loss
        print(f'Epoch {epoch+1}: Validation Loss = {validation_loss}')

        # Adjust the learning rate based on the validation loss
        scheduler.step(validation_loss)

        # Check for improvement in validation loss to decide on early stopping
        if validation_loss < validation_loss_limit:
            validation_loss_limit = validation_loss  # Update the best found validation loss
            early_stop_limit = 0  # Reset early stopping counter
        else:
            early_stop_limit += 1  # Increment the counter when no improvement

        if early_stop_limit >= patience:  # If no improvement for 'patience' consecutive epochs, stop training
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

    return model

custom_features_tensor = sparse_matrix_to_tensor(custom_features)
custom_features_validation_tensor = sparse_matrix_to_tensor(custom_features_validation)
custom_features_test_tensor = sparse_matrix_to_tensor(custom_features_test)

custom_train_dataset = TensorDataset(custom_features_tensor, torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
custom_validation_dataset = TensorDataset(custom_features_validation_tensor, torch.tensor(y_validation, dtype=torch.float32).unsqueeze(1))
custom_test_dataset = TensorDataset(custom_features_test_tensor, torch.tensor(y_test, dtype=torch.float32).unsqueeze(1))

custom_train_loader = DataLoader(custom_train_dataset, batch_size=32, shuffle=True, num_workers=4)
custom_validation_loader = DataLoader(custom_validation_dataset, batch_size=32, shuffle=False, num_workers=4)
custom_test_loader = DataLoader(custom_test_dataset, batch_size=32, shuffle=False, num_workers=4)

sklearn_features_tensor = sparse_matrix_to_tensor(sklearn_features)
sklearn_features_validation_tensor = sparse_matrix_to_tensor(sklearn_features_validation)
sklearn_features_test_tensor = sparse_matrix_to_tensor(sklearn_features_test)

sklearn_train_dataset = TensorDataset(sklearn_features_tensor, torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
sklearn_validation_dataset = TensorDataset(sklearn_features_validation_tensor, torch.tensor(y_validation, dtype=torch.float32).unsqueeze(1))
sklearn_test_dataset = TensorDataset(sklearn_features_test_tensor, torch.tensor(y_test, dtype=torch.float32).unsqueeze(1))

sklearn_train_loader = DataLoader(sklearn_train_dataset, batch_size=32, shuffle=True, num_workers=4)
sklearn_validation_loader = DataLoader(sklearn_validation_dataset, batch_size=32, shuffle=False, num_workers=4)
sklearn_test_loader = DataLoader(sklearn_test_dataset, batch_size=32, shuffle=False, num_workers=4)

trained_custom_model = train_model(
    train_loader=custom_train_loader,
    validation_loader=custom_validation_loader,
    model=custom_model,
    loss_fn=custom_model_loss_fn,
    optimizer=custom_model_optimizer,
    device=device,
    scheduler=custom_model_scheduler,
    epochs=50,
    patience=5
)

trained_sklearn_model = train_model(
    train_loader=sklearn_train_loader,
    validation_loader=sklearn_validation_loader,
    model=sklearn_model,
    loss_fn=sklearn_model_loss_fn,
    optimizer=sklearn_model_optimizer,
    device=device,
    scheduler=sklearn_model_scheduler,
    epochs=50,
    patience=5
)

Epoch 1: Training Loss = 0.0009568023291649297
Epoch 1: Validation Loss = 0.608829357794353
Epoch 00014: reducing learning rate of group 0 to 2.5000e-04.
Epoch 2: Training Loss = 0.001053819251828827
Epoch 2: Validation Loss = 0.5844955359186444
Epoch 3: Training Loss = 0.0016933093522675335
Epoch 3: Validation Loss = 0.5978603192738124
Epoch 4: Training Loss = 0.000992202702909708
Epoch 4: Validation Loss = 0.6012100534779685
Epoch 5: Training Loss = 0.0010565883934032171
Epoch 5: Validation Loss = 0.596877498286111
Epoch 6: Training Loss = 0.0008121662749908864
Epoch 6: Validation Loss = 0.5838739063058581
Epoch 7: Training Loss = 0.0009749176539480686
Epoch 7: Validation Loss = 0.6092507881777627
Epoch 00020: reducing learning rate of group 0 to 1.2500e-04.
Epoch 8: Training Loss = 0.0010520043026190252
Epoch 8: Validation Loss = 0.6309189328125545
Epoch 9: Training Loss = 0.0007125991614884697
Epoch 9: Validation Loss = 0.6017114009175982
Epoch 10: Training Loss = 0.000775118314195

In [41]:
!pip install torcheval

from torcheval.metrics.functional import binary_f1_score
from torcheval.metrics import BinaryAUROC, BinaryAccuracy

def evaluate_model(model, data_loader, device):
    model.eval()
    auroc_fn = BinaryAUROC()
    accuracy_fn = BinaryAccuracy()
    predictions, true_labels = [], []

    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            labels = labels.squeeze(1)
            outputs = model(inputs)
            probabilities = torch.sigmoid(outputs).squeeze(1)
            predicted_labels = torch.round(probabilities)

            predictions.extend(predicted_labels.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
            accuracy_fn.update(predicted_labels, labels)
            auroc_fn.update(probabilities, labels)

    f1_score_ = binary_f1_score(torch.tensor(predictions), torch.tensor(true_labels))

    auroc = auroc_fn.compute()
    accuracy = accuracy_fn.compute()

    return f1_score_.item(), auroc.item(), accuracy.item()

f1_custom, auroc_custom, accuracy_custom = evaluate_model(trained_custom_model, custom_test_loader, device)
f1_sklearn, auroc_sklearn, accuracy_sklearn = evaluate_model(trained_sklearn_model, sklearn_test_loader, device)

print(f"Custom TFIDF Vectorizer - F1 Score: {f1_custom:.4f}, AUROC: {auroc_custom:.4f}, Accuracy: {accuracy_custom:.4f}")
print(f"Sklean TFIDF Vectorizer - F1 Score: {f1_sklearn:.4f}, AUROC: {auroc_sklearn:.4f}, Accuracy: {accuracy_sklearn:.4f}")

Defaulting to user installation because normal site-packages is not writeable
Custom TFIDF Vectorizer - F1 Score: 0.8169, AUROC: 0.8911, Accuracy: 0.8050
Sklean TFIDF Vectorizer - F1 Score: 0.8195, AUROC: 0.8903, Accuracy: 0.8150
