In [1]:
import torch
torch.backends.cuda.matmul.allow_tf32 = True
import torch.optim as optim
import random
from tqdm import tqdm
import re
from accelerate import Accelerator
accelerator = Accelerator() 


[2024-02-05 14:27:46,046] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
import os
import wandb

wandb.login(key = "5c0f1505d0af16a0dda3f3d031310d45e9a3f07b")

[34m[1mwandb[0m: Currently logged in as: [33mtoibazar903[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/toibazd/.netrc


True

In [3]:
from pynvml import *


def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")
import psutil

# Function to get free CPU memory
def get_free_memory():
    memory = psutil.virtual_memory()
    return memory.available / (1024.0 ** 3)  # Convert bytes to gigabytes

# Display free CPU memory
print(f"Free CPU Memory: {get_free_memory():.2f} GB")

Free CPU Memory: 684.84 GB


In [4]:
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import WhitespaceSplit

tokenizer = Tokenizer(WordLevel(unk_token="[UNK]"))
tokenizer.pre_tokenizer = WhitespaceSplit()

In [5]:
tokenizer_path = "WordLevel_tokenizer_trained_InterPro.json"
tokenizer = tokenizer.from_file(tokenizer_path)
tokenizer.enable_truncation(512)

In [6]:
from datasets import Dataset
train_dataset = Dataset.load_from_disk('BERT_train_dataset')
val_dataset = Dataset.load_from_disk('BERT_val_dataset')

In [7]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)
accelerator.print(len(train_dataloader))

60981


In [8]:
wandb.init(
    # set the wandb project where this run will be logged
    project="InterPro_BERT_training_final",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 1e-3,
    "architecture": "BERT",
    "dataset": "InterPro_genomes",
    "epochs": 10,
    }
)

In [9]:
from transformers import AutoModelForMaskedLM, BertConfig

config = BertConfig(vocab_size = tokenizer.get_vocab_size(), hidden_size = 256, num_hidden_layers = 3, num_attention_heads = 8, intermediate_size = 256)
model = AutoModelForMaskedLM.from_config(config)

In [10]:
from transformers import get_linear_schedule_with_warmup

epochs = 5
optimizer = optim.AdamW(model.parameters(),lr=1e-3, weight_decay=2e-5)

num_training_steps = epochs * len(train_dataloader) 
num_warmup_steps = int(num_training_steps*0.05)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)

2024-02-05 14:28:12.003282: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-05 14:28:12.422719: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-05 14:28:12.503794: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-05 14:28:14.373634: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [11]:
accelerator.print("LOADING MODEL")
model, optimizer, scheduler, train_dataloader, val_dataloader = accelerator.prepare(model,optimizer, scheduler, train_dataloader, val_dataloader)

LOADING MODEL


In [12]:
print_gpu_utilization()

GPU memory occupied: 1300 MB.


In [15]:
accelerator.print("NOW WILL START TRAINING")
training_loss = []
validation_loss = []
val_acc = []

# best_val_loss = float('inf')  
# patience = 3 
for epoch in tqdm(range(epochs)):
    if accelerator.is_main_process:
        total_correct = 0
        total_tokens = 0
        train_loss = 0
        val_loss = 0
    model.train()
    accelerator.print(f"training epoch {epoch}")
    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        
        outputs = model(input_ids,attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        train_loss+=loss.item()
        accelerator.backward(loss)
        optimizer.step()
        scheduler.step()
        wandb.log({"train_loss": loss.item()})
    print_gpu_utilization()
    accelerator.print(f"evaluation epoch {epoch}")
    model.eval()
    for step, batch in enumerate(val_dataloader):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=-1)
        # Mask out labels where input_ids != 4
        mask = (input_ids == 4)
        masked_labels = labels[mask]
        masked_predicted_labels = predicted_labels[mask]

        correct = torch.sum(masked_predicted_labels == masked_labels).item()
        total_correct += correct
        total_tokens += masked_labels.numel()

        val_loss += loss.item()
        wandb.log({"val_loss":loss.item(), "val_acc":correct/masked_labels.numel()})
    accuracy = total_correct / total_tokens if total_tokens > 0 else 0.0

    avg_train_loss = train_loss / len(train_dataloader)
    avg_val_loss = val_loss / len(val_dataloader)
    
    training_loss.append(avg_train_loss)
    validation_loss.append(avg_val_loss)

    accelerator.print("Train loss:", avg_train_loss)
    accelerator.print("Val loss:", avg_val_loss)
    accelerator.print("\n\n")
    
#     if avg_val_loss < best_val_loss:
#         best_val_loss = avg_val_loss
#         torch.save(model.state_dict(), '/BERT_context_pretrained_10K/BERT_best.pth')  # Save the best model
        
#     else:
#         patience -=1
#         if patience== 0:
#             # Stop training if validation loss doesn't improve after patience epochs
#             print(f"Stopping early as validation loss didn't improve for {patience} epochs.")
#             break  # Break out of the training loop


wandb.finish()

NOW WILL START TRAINING


  0%|          | 0/5 [00:00<?, ?it/s]

training epoch 0


  0%|          | 0/5 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 33.27 GiB (GPU 0; 31.74 GiB total capacity; 1.95 GiB already allocated; 29.28 GiB free; 1.99 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
    'BERT_context_pretrained_InterPro_final',
    is_main_process=accelerator.is_main_process,
    save_function=accelerator.save)
print("Saved pre_trained model here: BERT_context_pretrained_InterPro_final")

In [None]:
import matplotlib.pyplot as plt


plt.plot(list(range(len(training_loss))), training_loss, linestyle='dotted', label='Training Loss')
plt.plot(list(range(len(validation_loss))), validation_loss, marker='o', linestyle='solid', label='Validation Loss')

plt.title('Training and Validation Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()


plt.savefig('loss_plot_final.png', dpi=300) 

