# Bert Model

Fine-tune a Bert model to classify sentiment.

-----
```
: 25.05.24
: Zach Wolpe
: zachcolinwolpe@gmail.com
```
-----

In [1]:
# importing neccessary libraries 
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, BertPreTrainedModel, BertModel,AdamW
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import seaborn as sns
import pandas as pd 
import numpy as np
import torch
import nltk
nltk.download('stopwords')

from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from collections import Counter
import sklearn


from nltk.corpus import stopwords
import argparse
import logging
import string
import re
import os

from sklearn.model_selection import train_test_split, KFold
import torch.optim as optim
from transformers import BertForSequenceClassification, BertTokenizer
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
from transformers import BertModel
import os


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/zachwolpe/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
from Bert_classifier import (BertWithActivationAndRegularization, Bert_tokenize)
from ML_training_code import (generate_K_Fold_data, torch_tensorize, plot_training_validation, training_loop)


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/zachwolpe/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


---
## Config
---

In [3]:
# Config ---------------------------------------->>
PATH_TO_DATA = '../data/train_test/'
SAVE_LOC = '../model-artifacts/'
BATCH_SIZE = 10
DEBUG_MODE = True
EPOCHS = 1
CROSS_VALIDATION = True
K_FOLDS = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Config ---------------------------------------->>

# Load data
X_train = pd.read_csv(PATH_TO_DATA + 'X_train.csv')
y_train = pd.read_csv(PATH_TO_DATA + 'y_train.csv')
X_test = pd.read_csv(PATH_TO_DATA + 'X_test.csv')
y_test = pd.read_csv(PATH_TO_DATA + 'y_test.csv')


# Downsample for testing
if DEBUG_MODE:
    X_train = X_train[:10]
    y_train = y_train[:10]


---
## Instantiate Bert
---

In [4]:
# Specify the pretrained BERT model name
model_name = 'bert-base-uncased'

# Create an instance of the model
model = BertWithActivationAndRegularization(pretrained_model_name='bert-base-uncased', num_labels=3)

# Move the model to GPU if available
model.to(device)

BertWithActivationAndRegularization(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), 

----
## Training Runtime Hyperparameters
----

In [5]:
# Define the optimizer and criterion
learning_rate = 2e-06
weight_decay = 0.0001  
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

---
## Run without K-Fold Cross Validation
----

In [6]:
# Generate 1 fold
if not CROSS_VALIDATION:
    gkfd = generate_K_Fold_data(X_train, y_train, num_splits=K_FOLDS)
    X_train_fold, y_train_fold, X_val_fold, y_val_fold, train_index, val_index = next(gkfd)

---
## Tokenize
---

In [7]:
if not CROSS_VALIDATION:
    input_ids_train, attention_masks_train, y_train = Bert_tokenize(X_train_fold, y_train_fold)
    input_ids_val, attention_masks_val, y_val = Bert_tokenize(X_val_fold, y_val_fold)


---
## Torch Tensors
---

In [8]:
if not CROSS_VALIDATION:
        (train_dataloader,
        train_dataloader,
        val_dataset,
        val_dataloader) = torch_tensorize(input_ids_train,
                attention_masks_train,
                y_train_fold,
                input_ids_val,
                attention_masks_val,
                y_val_fold,
                BATCH_SIZE=BATCH_SIZE)

---
## Training Loop
---

In [9]:
if not CROSS_VALIDATION:
    # Training loop
    train_losses_fold, valid_losses_fold, accuracies_fold = training_loop(model,
        optimizer,
        criterion,
        train_dataloader,
        val_dataloader,
        device,
        epochs=EPOCHS)


---
## Plot training and validation loss
---

In [10]:
if not CROSS_VALIDATION:
    plot_training_validation(train_losses_fold, valid_losses_fold)

---
## K-Fold Cross-Validation
---

In [11]:
if CROSS_VALIDATION:
    
    # Build generator
    gkfd = generate_K_Fold_data(X_train, y_train, num_splits=K_FOLDS)
    
    results = []
    for i, _gkfd in enumerate(gkfd):
        print(f'Fold {i+1}/{K_FOLDS}')
        X_train_fold, y_train_fold, X_val_fold, y_val_fold, train_index, val_index = _gkfd

        # Tokenize
        input_ids_train, attention_masks_train, y_train = Bert_tokenize(X_train_fold, y_train_fold)
        input_ids_val, attention_masks_val, y_val = Bert_tokenize(X_val_fold, y_val_fold)


        # Create Torch Tensors
        (train_dataloader,
        train_dataloader,
        val_dataset,
        val_dataloader) = torch_tensorize(input_ids_train,
                attention_masks_train,
                y_train_fold,
                input_ids_val,
                attention_masks_val,
                y_val_fold,
                BATCH_SIZE=BATCH_SIZE)

        # Define lists to store metrics for each fold
        train_losses_per_fold = []
        valid_losses_per_fold = []
        accuracies_per_fold = []

        # Training loop
        train_losses_fold, valid_losses_fold, accuracies_fold = training_loop(model,
            optimizer,
            criterion,
            train_dataloader,
            val_dataloader,
            device,
            epochs=EPOCHS)
        
        # Save metrics for this fold
        train_losses_per_fold.append(train_losses_fold)
        valid_losses_per_fold.append(valid_losses_fold)
        accuracies_per_fold.append(accuracies_fold)

        # save results
        results.append({
            'train_losses': train_losses_fold,
            'valid_losses': valid_losses_fold,
            'accuracies': accuracies_fold
        })

Fold 1/5
Training the model...
Epoch 1/1 - Training Loss: 1.2094 - Validation Loss: 1.0610 - Training Accuracy: 0.2500 - Validation Accuracy: 0.5000
Finished Training.
Fold 2/5
Training the model...
Epoch 1/1 - Training Loss: 1.2460 - Validation Loss: 1.3418 - Training Accuracy: 0.0000 - Validation Accuracy: 0.0000
Finished Training.
Fold 3/5
Training the model...
Epoch 1/1 - Training Loss: 1.2759 - Validation Loss: 1.1019 - Training Accuracy: 0.1250 - Validation Accuracy: 0.5000
Finished Training.
Fold 4/5
Training the model...
Epoch 1/1 - Training Loss: 1.2752 - Validation Loss: 1.1393 - Training Accuracy: 0.2500 - Validation Accuracy: 0.5000
Finished Training.
Fold 5/5
Training the model...
Epoch 1/1 - Training Loss: 1.1027 - Validation Loss: 1.3828 - Training Accuracy: 0.3750 - Validation Accuracy: 0.0000
Finished Training.


---
## Save Model
---

In [13]:
# Save Bert Model
if not DEBUG_MODE:
    torch.save(model.state_dict(), f'{SAVE_LOC}bert_model.pth')

else:
    torch.save(model.state_dict(), f'{SAVE_LOC}bert_model_debug_model.pth')