## Freezing layers

In this notebook we will try to freeze layers and check if that improves performance

In [1]:
%load_ext autoreload
%autoreload 2
import os
from datetime import datetime
import fire
import torch
import pandas as pd
from torchtext import data
import torch.nn as nn
from transformers import (
    AdamW, BertForSequenceClassification, BertTokenizer,
    get_constant_schedule_with_warmup
)

from offenseval.nn import (
    Tokenizer,
    train, evaluate, train_cycle, save_model, load_model, evaluate_dataset
)
from offenseval.datasets import datasets

pd.options.display.max_rows = 200
pd.options.display.max_colwidth = 300

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


Create fields and some other boilerplate

In [2]:
from offenseval.datasets import datasets, build_dataset
from offenseval.nn import create_bert_fields
from transformers import BertModel, BertTokenizer

bert_name = "bert-base-multilingual-cased"
bert_model = BertModel.from_pretrained(bert_name)
bert_tokenizer = BertTokenizer.from_pretrained(bert_name)

ID, SUBTASK_A, TEXT = create_bert_fields(bert_tokenizer)

Get the predictions

In [None]:
from offenseval.datasets import datasets, build_datasets, build_examples

fields = {
    "id": ('id', ID),
    "text": ('text', TEXT),
    "subtask_a": ("subtask_a", SUBTASK_A)
}

train_dataset, dev_dataset, test_dataset = build_datasets(fields, lang="all")


Building from langs olid danish turkish arabic greek


HBox(children=(FloatProgress(value=0.0, max=13240.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2368.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25021.0), HTML(value='')))

In [None]:
SUBTASK_A.build_vocab(dev_dataset)
assert SUBTASK_A.vocab.itos == ["NOT", "OFF"]

In [None]:
print("Building iterators")

BATCH_SIZE = 64

train_it, dev_it = data.BucketIterator.splits(
    (train_dataset, dev_dataset), batch_size=BATCH_SIZE, device=device,
    sort_key = lambda x: len(x.text), sort_within_batch = True,
)

In [None]:
from offenseval.nn import create_criterion
from offenseval.nn.models import BertSeqModel
from transformers import get_linear_schedule_with_warmup

model = BertSeqModel(bert_model, dropout=0.10).to(device)
epochs = 10

criterion = create_criterion(device)# weight_with=train_dataset)
optimizer = AdamW(model.parameters(), lr=5e-5)

num_training_steps = epochs * len(train_it)
num_warmup_steps = num_training_steps // 10
warmup_proportion = float(num_warmup_steps) / float(num_training_steps)  # 0.1

scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps
)

Freeze some parameters

In [None]:
#for param in model.bert.embeddings.parameters():
#    param.requires_grad = False

for i in range(3):
    layer = model.bert.encoder.layer[i]
    for param in layer.parameters():
        param.requires_grad = False

Construct dataset for better visualization

In [None]:
from offenseval.nn import train_cycle

def get_target(batch):
    return batch.subtask_a.double()

output_path = "../../models/bert_cased.all.freeze.pt"

train_cycle(
    model, optimizer, criterion, scheduler, 
    train_it, dev_it, epochs, get_target=get_target, monitor="f1",
    model_path=output_path, early_stopping_tolerance=5, ncols=700
)

In [None]:
model.load_state_dict(torch.load(output_path))


In [None]:
report = evaluate(
    model, 
    dev_it, 
    criterion, 
    get_target=lambda batch: batch.subtask_a)

print(f'Val {report}')


In [None]:
save_model(model, TEXT, output_path)