# Overview

We will load the model and infer data from it.

# Hyperparameters

In [16]:
# Model Artifacts Folder
save_directory = './models/bert3'

# Tokenize and pad the text data
max_len = 100  # Maximum length of input sequences

# Set a threshold to filter out near-zero probabilities
threshold = 0.01

# Load Dependencies

In [2]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import pickle
import numpy as np
import json

# Load Model and Objects



In [3]:
# Load the model and tokenizer
model = BertForSequenceClassification.from_pretrained(save_directory)
tokenizer = BertTokenizer.from_pretrained(save_directory)

# Load the label encoder
label_encoder_path = f'{save_directory}/label_encoder.pkl'
with open(label_encoder_path, 'rb') as f:
    label_encoder = pickle.load(f)

# Ensure the model is in evaluation mode
model.eval()

# Function to make predictions with probabilities and a threshold
def predict(text, threshold=0.001):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probabilities = torch.nn.functional.softmax(logits, dim=-1).squeeze().numpy()

    # Map the probabilities to class labels and apply threshold
    class_probabilities = {label: float(prob) for label, prob in zip(label_encoder.classes_, probabilities) if prob > threshold}

    # Sort the probabilities in descending order
    sorted_class_probabilities = dict(sorted(class_probabilities.items(), key=lambda item: item[1], reverse=True))

    # Format the output
    formatted_output = "\n".join([f"{label}: {prob:.4f}" for label, prob in sorted_class_probabilities.items()])

    return formatted_output


# Prediction

In [20]:
# Example usage
text = "When I see green, I sell. Simple. My issue is when I see red, I hold and panic and cry and scream and run and drink whisky."
print(predict(text, threshold))

Clean: 0.9999
