<a href="https://colab.research.google.com/github/adc257/AmEx-Project/blob/Ye_branch/LSTM_implementation__Noise_DE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM Implementation

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers import TextClassificationPipeline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import BertForSequenceClassification, BertTokenizer, AdamW, Trainer, TrainingArguments
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR
from tqdm import tqdm

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Load Data

In [9]:
# Import train and test data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Separate train labels and text
train_labels = train['category']
train_text = train['text']
train_labels_list = train_labels.tolist()

# Separate test labels and text
test_labels = test['category']
test_text = test['text']
test_labels_list = test_labels.tolist()

### Randomly select 10% of the dataset to be noised

In [10]:
# Set the random seed for reproducibility
np.random.seed(42)

# Calculate the number of samples for 10% of the dataset
sample_size = int(len(train) * 0.1)

# Conduct sampling
sampled_data = train.sample(n=sample_size, replace=False)

sampled_data

Unnamed: 0,text,category
6883,Is it possible for me to change my PIN number?,change_pin
5836,I'm not sure why my card didn't work,declined_card_payment
8601,I don't think my top up worked,top_up_failed
2545,Can you explain why my payment was charged a fee?,card_payment_fee_charged
8697,How long does a transfer from a UK account tak...,balance_not_updated_after_bank_transfer
...,...,...
7089,I have been trying to exchange this for crypto...,beneficiary_not_allowed
4238,I want to transfer money using my credit card.,topping_up_by_card
4901,I can't remember what my password is,passcode_forgotten
3458,My cheque is taking a while,balance_not_updated_after_cheque_or_cash_deposit


### Shuffle 10% of Labels in Dn to Create Noise

In [11]:
# Randomly sample 10% of the rows from the Dn
sampled_rows = sampled_data.sample(frac=0.1, random_state=42)

# Add new column 'k' and initialize with 0
sampled_data['k'] = 0

# Store the original labels ('l') before shuffling the 'category' column
sampled_data['l'] = sampled_data['category']  # Initialize 'l' column with original category values
sampled_data.loc[sampled_rows.index, 'l'] = sampled_data.loc[sampled_rows.index, 'category']

# Convert the selected rows' 'category' column to a numpy array
sampled_category_values = np.array(sampled_rows['category'])

# Shuffle the array
np.random.shuffle(sampled_category_values)

# Update the 'category' column with the shuffled values in the original DataFrame
sampled_data.loc[sampled_rows.index, 'category'] = sampled_category_values

# Update 'k' to 1 for the randomly selected 10% of rows
sampled_data.loc[sampled_rows.index, 'k'] = 1

# Update 'l\'' for the shuffled rows
sampled_data.loc[sampled_rows.index, 'l\''] = sampled_category_values

# Display the DataFrame to verify the changes
sampled_data.head(25)


Unnamed: 0,text,category,k,l,l'
6883,Is it possible for me to change my PIN number?,change_pin,0,change_pin,
5836,I'm not sure why my card didn't work,declined_card_payment,0,declined_card_payment,
8601,I don't think my top up worked,top_up_failed,0,top_up_failed,
2545,Can you explain why my payment was charged a fee?,card_payment_fee_charged,0,card_payment_fee_charged,
8697,How long does a transfer from a UK account tak...,balance_not_updated_after_bank_transfer,0,balance_not_updated_after_bank_transfer,
5573,Why am I getting declines when trying to make ...,declined_transfer,0,declined_transfer,
576,What is the $1 transaction on my account?,extra_charge_on_statement,0,extra_charge_on_statement,
6832,It looks like my card payment was sent back.,reverted_card_payment?,0,reverted_card_payment?,
7111,Why am I unable to transfer money when I was a...,beneficiary_not_allowed,0,beneficiary_not_allowed,
439,What if there is an error on the exchange rate?,card_payment_wrong_exchange_rate,0,card_payment_wrong_exchange_rate,


## Step 1: Define and Train Deep Model

In [12]:
device_name = 'cuda'
model_name = 'bert-base-uncased'

# Input Text
input_text = np.array(sampled_data['text'])
labels = np.array(sampled_data['category'])

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define labels
unique_labels = set(label for label in labels)
label2id = {label: id for id, label in enumerate(unique_labels)}
id2label = {id: label for label, id in label2id.items()}

# Tokenize input text
input_ids = tokenizer.batch_encode_plus(input_text, padding=True, truncation=True, return_tensors='pt')['input_ids']
attention_mask = tokenizer.batch_encode_plus(input_text, padding=True, truncation=True, return_tensors='pt')['attention_mask']

# Tokenize input labels
labels_encoded = [label2id[label] for label in labels]
labels_tensor = torch.tensor(labels_encoded)


# labels_tensor with DataLoader
dataset = torch.utils.data.TensorDataset(input_ids, attention_mask, labels_tensor)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [13]:
# Load pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=77).to(device_name)

# Define optimizer with SGD
optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

# Define learning rate scheduler
scheduler = MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)

# Define training parameters
num_epochs = 100
batch_size = 50

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device_name), attention_mask.to(device_name), labels.to(device_name)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    # Update learning rate
    scheduler.step()

    print(f"Epoch {epoch+1}, Loss: {total_loss}")

# Save the trained model
model.save_pretrained("path_to_save_model")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 500/500 [00:29<00:00, 17.20it/s]


Epoch 1, Loss: 135882.14225816727


Epoch 2: 100%|██████████| 500/500 [00:27<00:00, 18.00it/s]


Epoch 2, Loss: 159696.84820365906


Epoch 3: 100%|██████████| 500/500 [00:27<00:00, 18.01it/s]


Epoch 3, Loss: 158685.65904300846


Epoch 4: 100%|██████████| 500/500 [00:27<00:00, 18.04it/s]


Epoch 4, Loss: 165535.74538046122


Epoch 5: 100%|██████████| 500/500 [00:27<00:00, 18.07it/s]


Epoch 5, Loss: 159486.94065856934


Epoch 6: 100%|██████████| 500/500 [00:27<00:00, 17.90it/s]


Epoch 6, Loss: 168385.94680023193


Epoch 7: 100%|██████████| 500/500 [00:28<00:00, 17.78it/s]


Epoch 7, Loss: 165065.66522216797


Epoch 8: 100%|██████████| 500/500 [00:28<00:00, 17.73it/s]


Epoch 8, Loss: 174554.4289636612


Epoch 9: 100%|██████████| 500/500 [00:28<00:00, 17.36it/s]


Epoch 9, Loss: 171601.96059799194


Epoch 10: 100%|██████████| 500/500 [00:28<00:00, 17.74it/s]


Epoch 10, Loss: 166311.5939731598


Epoch 11: 100%|██████████| 500/500 [00:27<00:00, 17.95it/s]


Epoch 11, Loss: 164962.24010753632


Epoch 12: 100%|██████████| 500/500 [00:27<00:00, 17.87it/s]


Epoch 12, Loss: 163684.98950004578


Epoch 13: 100%|██████████| 500/500 [00:34<00:00, 14.64it/s]


Epoch 13, Loss: 169049.0961303711


Epoch 14: 100%|██████████| 500/500 [00:28<00:00, 17.71it/s]


Epoch 14, Loss: 168253.52703475952


Epoch 15: 100%|██████████| 500/500 [00:27<00:00, 17.91it/s]


Epoch 15, Loss: 172254.82001113892


Epoch 16: 100%|██████████| 500/500 [00:29<00:00, 17.03it/s]


Epoch 16, Loss: 166394.5466594696


Epoch 17: 100%|██████████| 500/500 [00:27<00:00, 17.93it/s]


Epoch 17, Loss: 168593.4414577484


Epoch 18: 100%|██████████| 500/500 [00:27<00:00, 17.87it/s]


Epoch 18, Loss: 168246.68621826172


Epoch 19: 100%|██████████| 500/500 [00:27<00:00, 17.98it/s]


Epoch 19, Loss: 170426.86190199852


Epoch 20: 100%|██████████| 500/500 [00:27<00:00, 17.90it/s]


Epoch 20, Loss: 168699.17707061768


Epoch 21: 100%|██████████| 500/500 [00:27<00:00, 17.93it/s]


Epoch 21, Loss: 172906.2579345703


Epoch 22: 100%|██████████| 500/500 [00:28<00:00, 17.82it/s]


Epoch 22, Loss: 173439.89863967896


Epoch 23: 100%|██████████| 500/500 [00:28<00:00, 17.77it/s]


Epoch 23, Loss: 167520.81421339512


Epoch 24: 100%|██████████| 500/500 [00:28<00:00, 17.77it/s]


Epoch 24, Loss: 171987.4768562317


Epoch 25: 100%|██████████| 500/500 [00:27<00:00, 18.01it/s]


Epoch 25, Loss: 170706.38022387028


Epoch 26: 100%|██████████| 500/500 [00:27<00:00, 17.94it/s]


Epoch 26, Loss: 169598.26949310303


Epoch 27: 100%|██████████| 500/500 [00:27<00:00, 17.93it/s]


Epoch 27, Loss: 167340.26289367676


Epoch 28: 100%|██████████| 500/500 [00:27<00:00, 18.02it/s]


Epoch 28, Loss: 167461.28379821777


Epoch 29: 100%|██████████| 500/500 [00:27<00:00, 17.97it/s]


Epoch 29, Loss: 173399.42975234985


Epoch 30: 100%|██████████| 500/500 [00:27<00:00, 17.98it/s]


Epoch 30, Loss: 165096.9937019348


Epoch 31: 100%|██████████| 500/500 [00:27<00:00, 17.89it/s]


Epoch 31, Loss: 168497.20964050293


Epoch 32: 100%|██████████| 500/500 [00:27<00:00, 17.94it/s]


Epoch 32, Loss: 172433.14237594604


Epoch 33: 100%|██████████| 500/500 [00:28<00:00, 17.84it/s]


Epoch 33, Loss: 170231.92121124268


Epoch 34: 100%|██████████| 500/500 [00:27<00:00, 17.91it/s]


Epoch 34, Loss: 172716.5365540022


Epoch 35: 100%|██████████| 500/500 [00:28<00:00, 17.79it/s]


Epoch 35, Loss: 165548.472407341


Epoch 36: 100%|██████████| 500/500 [00:27<00:00, 17.98it/s]


Epoch 36, Loss: 170839.52249145508


Epoch 37: 100%|██████████| 500/500 [00:27<00:00, 18.06it/s]


Epoch 37, Loss: 170530.05793380737


Epoch 38: 100%|██████████| 500/500 [00:27<00:00, 18.05it/s]


Epoch 38, Loss: 166152.53896093369


Epoch 39: 100%|██████████| 500/500 [00:27<00:00, 18.02it/s]


Epoch 39, Loss: 172025.16402435303


Epoch 40: 100%|██████████| 500/500 [00:27<00:00, 18.02it/s]


Epoch 40, Loss: 163099.47993469238


Epoch 41: 100%|██████████| 500/500 [00:27<00:00, 18.12it/s]


Epoch 41, Loss: 170289.93268871307


Epoch 42: 100%|██████████| 500/500 [00:27<00:00, 18.04it/s]


Epoch 42, Loss: 171580.57163273776


Epoch 43: 100%|██████████| 500/500 [00:27<00:00, 17.99it/s]


Epoch 43, Loss: 165810.70091629028


Epoch 44: 100%|██████████| 500/500 [00:27<00:00, 17.87it/s]


Epoch 44, Loss: 166641.25768089294


Epoch 45: 100%|██████████| 500/500 [00:27<00:00, 17.88it/s]


Epoch 45, Loss: 168450.23046326637


Epoch 46: 100%|██████████| 500/500 [00:28<00:00, 17.79it/s]


Epoch 46, Loss: 164639.26379466057


Epoch 47: 100%|██████████| 500/500 [00:27<00:00, 17.93it/s]


Epoch 47, Loss: 168434.36044883728


Epoch 48: 100%|██████████| 500/500 [00:27<00:00, 17.97it/s]


Epoch 48, Loss: 169781.57149648666


Epoch 49: 100%|██████████| 500/500 [00:28<00:00, 17.45it/s]


Epoch 49, Loss: 172175.8016986847


Epoch 50: 100%|██████████| 500/500 [00:28<00:00, 17.74it/s]


Epoch 50, Loss: 170994.36574983597


Epoch 51: 100%|██████████| 500/500 [00:27<00:00, 18.09it/s]


Epoch 51, Loss: 166446.96229171753


Epoch 52: 100%|██████████| 500/500 [00:27<00:00, 18.10it/s]


Epoch 52, Loss: 166252.3349018097


Epoch 53: 100%|██████████| 500/500 [00:27<00:00, 17.96it/s]


Epoch 53, Loss: 168955.7211186029


Epoch 54: 100%|██████████| 500/500 [00:27<00:00, 18.13it/s]


Epoch 54, Loss: 168934.42636108398


Epoch 55: 100%|██████████| 500/500 [00:27<00:00, 17.96it/s]


Epoch 55, Loss: 169859.80139923096


Epoch 56: 100%|██████████| 500/500 [00:28<00:00, 17.78it/s]


Epoch 56, Loss: 174092.20697402954


Epoch 57: 100%|██████████| 500/500 [00:28<00:00, 17.75it/s]


Epoch 57, Loss: 164463.9260406494


Epoch 58: 100%|██████████| 500/500 [00:27<00:00, 18.09it/s]


Epoch 58, Loss: 168318.6302509308


Epoch 59: 100%|██████████| 500/500 [00:27<00:00, 18.04it/s]


Epoch 59, Loss: 168817.62939071655


Epoch 60: 100%|██████████| 500/500 [00:27<00:00, 17.99it/s]


Epoch 60, Loss: 172291.8719482422


Epoch 61: 100%|██████████| 500/500 [00:27<00:00, 18.10it/s]


Epoch 61, Loss: 172459.9544582367


Epoch 62: 100%|██████████| 500/500 [00:27<00:00, 18.01it/s]


Epoch 62, Loss: 174863.25401878357


Epoch 63: 100%|██████████| 500/500 [00:27<00:00, 17.97it/s]


Epoch 63, Loss: 160813.07939887047


Epoch 64: 100%|██████████| 500/500 [00:27<00:00, 18.07it/s]


Epoch 64, Loss: 171083.98214304447


Epoch 65: 100%|██████████| 500/500 [00:27<00:00, 18.10it/s]


Epoch 65, Loss: 171576.51558685303


Epoch 66: 100%|██████████| 500/500 [00:27<00:00, 18.16it/s]


Epoch 66, Loss: 164400.50782585144


Epoch 67: 100%|██████████| 500/500 [00:27<00:00, 18.00it/s]


Epoch 67, Loss: 167862.67500305176


Epoch 68: 100%|██████████| 500/500 [00:27<00:00, 17.99it/s]


Epoch 68, Loss: 166125.7930650711


Epoch 69: 100%|██████████| 500/500 [00:27<00:00, 18.01it/s]


Epoch 69, Loss: 164963.63619232178


Epoch 70: 100%|██████████| 500/500 [00:27<00:00, 18.03it/s]


Epoch 70, Loss: 170641.3184261322


Epoch 71: 100%|██████████| 500/500 [00:27<00:00, 18.11it/s]


Epoch 71, Loss: 166198.6039505005


Epoch 72: 100%|██████████| 500/500 [00:27<00:00, 18.17it/s]


Epoch 72, Loss: 168059.20245552063


Epoch 73: 100%|██████████| 500/500 [00:27<00:00, 18.15it/s]


Epoch 73, Loss: 165303.70518112183


Epoch 74: 100%|██████████| 500/500 [00:27<00:00, 18.11it/s]


Epoch 74, Loss: 170996.82860565186


Epoch 75: 100%|██████████| 500/500 [00:27<00:00, 18.08it/s]


Epoch 75, Loss: 171481.3138961792


Epoch 76: 100%|██████████| 500/500 [00:27<00:00, 18.13it/s]


Epoch 76, Loss: 173712.34839630127


Epoch 77: 100%|██████████| 500/500 [00:32<00:00, 15.38it/s]


Epoch 77, Loss: 177047.20823860168


Epoch 78: 100%|██████████| 500/500 [00:27<00:00, 18.04it/s]


Epoch 78, Loss: 168445.62436676025


Epoch 79: 100%|██████████| 500/500 [00:27<00:00, 18.05it/s]


Epoch 79, Loss: 175383.97985076904


Epoch 80: 100%|██████████| 500/500 [00:28<00:00, 17.27it/s]


Epoch 80, Loss: 169606.91276359558


Epoch 81: 100%|██████████| 500/500 [00:27<00:00, 18.16it/s]


Epoch 81, Loss: 169451.0635547638


Epoch 82: 100%|██████████| 500/500 [00:27<00:00, 18.15it/s]


Epoch 82, Loss: 166447.6078696251


Epoch 83: 100%|██████████| 500/500 [00:27<00:00, 18.07it/s]


Epoch 83, Loss: 169084.3870382309


Epoch 84: 100%|██████████| 500/500 [00:27<00:00, 18.05it/s]


Epoch 84, Loss: 173791.02991509438


Epoch 85: 100%|██████████| 500/500 [00:27<00:00, 18.12it/s]


Epoch 85, Loss: 175667.66849899292


Epoch 86: 100%|██████████| 500/500 [00:27<00:00, 18.02it/s]


Epoch 86, Loss: 177236.73831176758


Epoch 87: 100%|██████████| 500/500 [00:27<00:00, 18.20it/s]


Epoch 87, Loss: 165348.83143995702


Epoch 88: 100%|██████████| 500/500 [00:27<00:00, 18.01it/s]


Epoch 88, Loss: 167809.55675411224


Epoch 89: 100%|██████████| 500/500 [00:27<00:00, 18.08it/s]


Epoch 89, Loss: 163691.43366527557


Epoch 90: 100%|██████████| 500/500 [00:27<00:00, 17.97it/s]


Epoch 90, Loss: 169979.633518219


Epoch 91: 100%|██████████| 500/500 [00:27<00:00, 18.20it/s]


Epoch 91, Loss: 168124.07614135742


Epoch 92: 100%|██████████| 500/500 [00:27<00:00, 18.26it/s]


Epoch 92, Loss: 166688.58967590332


Epoch 93: 100%|██████████| 500/500 [00:27<00:00, 18.15it/s]


Epoch 93, Loss: 168789.3540802002


Epoch 94: 100%|██████████| 500/500 [00:27<00:00, 18.13it/s]


Epoch 94, Loss: 176751.67847412563


Epoch 95: 100%|██████████| 500/500 [00:27<00:00, 18.17it/s]


Epoch 95, Loss: 164316.020860672


Epoch 96: 100%|██████████| 500/500 [00:27<00:00, 18.23it/s]


Epoch 96, Loss: 173061.5731496811


Epoch 97: 100%|██████████| 500/500 [00:27<00:00, 18.28it/s]


Epoch 97, Loss: 166442.23373794556


Epoch 98: 100%|██████████| 500/500 [00:27<00:00, 18.14it/s]


Epoch 98, Loss: 168034.5842552185


Epoch 99: 100%|██████████| 500/500 [00:27<00:00, 18.26it/s]


Epoch 99, Loss: 163481.53737068176


Epoch 100: 100%|██████████| 500/500 [00:27<00:00, 18.19it/s]


Epoch 100, Loss: 175849.05043029785


In [28]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch import nn
import torch.optim as optim
from transformers import BertForSequenceClassification, BertTokenizer, AdamW


class NoiseDataset(Dataset):
    """ Dataset for loading softmax outputs and noise labels """
    def __init__(self, softmax_outputs, labels):
        self.softmax_outputs = softmax_outputs
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.softmax_outputs[idx], self.labels[idx]

# Placeholder for softmax probability extraction method
def extract_softmax_probabilities(model, dataloader, device):
    model.eval()
    softmax_probs = []
    with torch.no_grad():
        for batch in dataloader:
            batch = tuple(item.to(device) for item in batch)
            inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=-1)
            softmax_probs.append(probs.cpu().numpy())
    return np.vstack(softmax_probs)

# Assuming model_name, tokenizer, input_ids, attention_mask, labels_tensor, and device_name are defined
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(unique_labels))
model.to(device_name)

# Then, extract softmax probabilities from the trained model for Dn
softmax_probabilities = extract_softmax_probabilities(model, dataloader, device_name)

# Convert to tensor for NoiseDataset
softmax_probabilities_tensor = torch.tensor(softmax_probabilities, dtype=torch.float32)
noise_labels_tensor = torch.tensor(sampled_data['k'].values, dtype=torch.long)  # Assuming 'k' marks noise

# Prepare DataLoader for noise detector training
noise_dataset = NoiseDataset(softmax_probabilities_tensor, noise_labels_tensor)
noise_loader = DataLoader(noise_dataset, batch_size=64, shuffle=True)

# Noise Detector Model (simplified version)
class NoiseDetector(nn.Module):
    def __init__(self, input_size=77, hidden_size=50):
        super(NoiseDetector, self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)


    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize noise detector
number_of_classes = len(set(train_labels))
detector_model = NoiseDetector(input_size=number_of_classes)
detector_model.to(device_name)

# Training the noise detector
detector_optimizer = AdamW(detector_model.parameters(), lr=0.3)  # Use lr=0.03 for fine-tuning
criterion = nn.BCEWithLogitsLoss()

for epoch in range(10):
    for inputs, labels in noise_loader:
        inputs, labels = inputs.to(device_name), labels.to(device_name).float().unsqueeze(1)
        outputs = detector_model(inputs)
        loss = criterion(outputs, labels)

        detector_optimizer.zero_grad()
        loss.backward()
        detector_optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.4100641906261444
Epoch 2, Loss: 0.11801809072494507
Epoch 3, Loss: 0.20146818459033966
Epoch 4, Loss: 0.2693168818950653
Epoch 5, Loss: 0.3785228431224823
Epoch 6, Loss: 0.32573503255844116
Epoch 7, Loss: 0.38005587458610535
Epoch 8, Loss: 0.2172146886587143
Epoch 9, Loss: 0.3251130282878876
Epoch 10, Loss: 0.16406142711639404
