In [None]:
import numpy as np
import seaborn as sns
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import json
import time
import wandb
import torch
from torch import cuda
from torch.utils.data import DataLoader
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt

from utils.classes import (
    get_AGNews_datasets,
    train, test, accuracy,
    dynamic_masking,
    RobertaMLM_with_classifier,
    visualize_layers
)
%env WANDB_PROJECT=TAPT_roberta
%env WANDB_LOG_MODEL='end'

# Import relevant models, tokenizers, and related libs
from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline
import datasets

# Statics
DEVICE = 'cuda' if cuda.is_available() else 'cpu'
# DEVICE = 'cpu'
SEED = 69
SEEDED_GEN = torch.Generator().manual_seed(SEED)

# Confirm device type, should say CUDA if you have a GPU
print(DEVICE)

In [None]:
hyperparams_TAPT = {
    "EPOCHS" : 20,
    "MASK_PROB" : 0.1,
    'TRAINING_BATCH_SIZE' : 32,
    "MAX_LEN" : 77
}

hyperparams = {
    "TRAIN_PCT" : 0.9,
    "TRAIN_BATCH_SIZE" : 200,
    "VALID_BATCH_SIZE" : 200,
    "TEST_BATCH_SIZE" : 200,
    "MAX_LEN" : 77,
    "EPOCHS" : 25,
    "LR" : 0.005,
    "L2_REG" : 0.000000,
    "ADAM_BETAS" : (0.87, 0.98),
    "ADAM_EPS" : 1e-6,
    "FC_HIDDEN" : 768,
    "FC_DROPOUT" : 0.05,
    "SCH_ENDFACTOR" : 0.1,
    "RUN_SUFFIX" : "6"
}

# Choose either 1) fine-tuned or 2) pre-trained MLM Model

## 1) Load Fine-Tuned model pytorch saved
### Load model and data

In [None]:
model_type = "distilroberta-base"
PATH = f"models/distilroberta-base_base_finetuned_1682486148.pt"
MLM_layers = AutoModelForMaskedLM.from_pretrained(model_type).roberta
tokenizer = AutoTokenizer.from_pretrained(model_type)
lazarus_model = RobertaMLM_with_classifier(MLM_layers, fc_hidden=hyperparams['FC_HIDDEN'], fc_dropout=hyperparams['FC_DROPOUT'])
lazarus_model.load_state_dict(torch.load(PATH))

lazarus_model.to(DEVICE)
for param in lazarus_model.parameters():
    param.requires_grad = False
lazarus_model.eval()

_, _, test_dataset = get_AGNews_datasets(
    tokenizer,
    DEVICE,
    max_length=hyperparams['MAX_LEN'],
    train_pct=hyperparams['TRAIN_PCT'],
    generator=SEEDED_GEN
)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=hyperparams['TEST_BATCH_SIZE'], shuffle=True)
t_sne = TSNE(2)

### Visualize layers

In [None]:
vised_layers = visualize_layers(
    lazarus_model.mlm, 
    test_dataloader, 
    hyperparams['MAX_LEN'], 
    DEVICE, 
    layers=range(0,7), 
    saved_model_name="baseline"
)

## 2) Load pretrained model
### Load model and data

In [None]:
model_type = "distilroberta-base"
huggingmodel = "checkpoints/TAPT_Roberta_DAPT_TAPT/checkpoint-70000"
lazarus_model = AutoModelForMaskedLM.from_pretrained(huggingmodel).roberta
tokenizer = AutoTokenizer.from_pretrained(model_type)

lazarus_model.to(DEVICE)
for param in lazarus_model.parameters():
    param.requires_grad = False
lazarus_model.eval()

_, _, test_dataset = get_AGNews_datasets(
    tokenizer,
    DEVICE,
    max_length=hyperparams['MAX_LEN'],
    train_pct=hyperparams['TRAIN_PCT'],
    generator=SEEDED_GEN
)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=hyperparams['TEST_BATCH_SIZE'], shuffle=True)
t_sne = TSNE(2)

### Visualize layers

In [None]:
vised_layers = visualize_layers(
    lazarus_model, 
    test_dataloader,
    hyperparams['MAX_LEN'], 
    DEVICE, 
    layers=range(0,7), 
    saved_model_name="TAPT_Roberta_DAPT_TAPT_ckpt_7"
)