In [34]:
# Pickle
import pickle
# Pandas
import pandas as pd
# Hugging Face
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# PyTorch
import torch 
import torch.optim as optim
from torch.utils.data import DataLoader
# SkLearn
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

In [37]:
## Datasets 
# Hate Xplain
hate_xplain = pd.read_csv(r'data\hate_xplain.csv')

# Implicit Hate 
implicit_hate = pd.read_csv(r'data\implicit_hate_v1_stg2_posts.tsv', delimiter='\t')
label_map = {
    'white_grievance': 0, 'incitement': 1, 'inferiority': 2,
    'irony': 3, 'stereotypical': 4, 'threatening': 5, 'other': 6
}
implicit_hate['class_label'] = implicit_hate['implicit_class'].map(label_map)
implicit_hate.drop("extra_implicit_class", axis=1, inplace=True)

# hate_xplain = hate_xplain.sample(n=1000, random_state=42)
hate_xplain

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
...,...,...,...,...,...,...,...
24778,25291,3,0,2,1,1,you's a muthaf***in lie &#8220;@LifeAsKing: @2...
24779,25292,3,0,1,2,2,"you've gone and broke the wrong heart baby, an..."
24780,25294,3,0,3,0,1,young buck wanna eat!!.. dat nigguh like I ain...
24781,25295,6,0,6,0,1,youu got wild bitches tellin you lies


In [38]:
def tokenize_data(texts, labels, tokenizer, max_length):
    if isinstance(texts, pd.Series):
        texts = texts.tolist()
    texts = [str(text) for text in texts] 

    if isinstance(labels, pd.Series):
        labels = labels.tolist()
    labels = torch.tensor(labels, dtype=torch.long)
    
    encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length, return_tensors="pt")
    dataset = torch.utils.data.TensorDataset(encodings["input_ids"], encodings["attention_mask"], labels)
    return dataset

def train(model, data_loader, optimizer, epochs, device=None):
    device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()

    for epoch in range(epochs):
        for input_ids, attention_mask, labels in data_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1} Loss: {loss.item()}")

def evaluate(model, data_loader, device=None):
    model.eval()
    model.to(device)

    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids, attention_mask, labels = tuple(t.to(device) for t in batch)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return all_preds, all_labels

def freeze_bert_layers(model, layers: list):
    for name, param in model.named_parameters():
        # only unfreeze the layers in the list
        for layer in layers:
            if layer in name:
                param.requires_grad = True
            else:
                param.requires_grad = False
    return model

In [39]:
bert = 'bert-base-uncased'
new_bert = AutoModelForSequenceClassification.from_pretrained(bert, num_labels=3)
# with open('model_str.txt', 'w') as f:
#     f.write(str(new_bert))


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [40]:
new_bert = freeze_bert_layers(new_bert, ['encoder.layer.11'])

In [41]:
hx_train_text, hx_test_text, hx_train_labels, hx_test_labels = train_test_split(hate_xplain['tweet'], hate_xplain['class'], test_size=0.2)

tokenizer = AutoTokenizer.from_pretrained(bert, clean_up_tokenization_spaces=True)
hx_train = tokenize_data(hx_train_text, hx_train_labels, tokenizer, 512)
hx_test = tokenize_data(hx_test_text, hx_test_labels, tokenizer, 512)

hx_train_loader = DataLoader(hx_train, batch_size=16, shuffle=True)
hx_test_loader = DataLoader(hx_test, batch_size=16, shuffle=True)

hx_train_loader.__len__(), hx_test_loader.__len__()

(1240, 310)

In [42]:
optimizer = optim.AdamW(new_bert.parameters(), lr=5e-5)
train(new_bert, hx_train_loader, optimizer, 5, 'cuda')
pickle.dump(new_bert, open("BERT/new_bert.pkl", 'wb'))

Epoch 1 Loss: 0.15766772627830505
Epoch 2 Loss: 0.2517176866531372
Epoch 3 Loss: 0.05289705470204353
Epoch 4 Loss: 0.0454862006008625
Epoch 5 Loss: 0.033316388726234436


In [45]:
# TODO: BertSelfOutput???
hx_preds, hx_labels = evaluate(new_bert, hx_test_loader, 'cuda')
hx_report = accuracy_score(hx_labels, hx_preds)
print(f'Baseline Accuracy: {hx_report}')

Baseline Accuracy: 0.9126487795037321


In [52]:
def evaluate_and_zero_head(model, data_loader, head:int, device=None): 
    model.eval()
    model.to(device)
    for input_ids, attention_mask, labels in data_loader:
        input_ids, attention_mask, labels = input_ids.to('cuda'), attention_mask.to('cuda'), labels.to('cuda')
        all_preds, all_labels = [], []    
        with torch.no_grad(): 
            new_bert.bert.encoder.layer[head].output.dense.weight[0][0] = 0.0
            outputs = new_bert(input_ids, attention_mask=attention_mask, labels=labels)
            _, preds = torch.max(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return all_preds, all_labels

In [53]:
accuracies = []
for i in range(12): 
    print(f"Zeroing out attention head {i}.")
    preds, labels = evaluate_and_zero_head(new_bert, hx_test_loader, i, 'cuda')

    accuracy = accuracy_score(labels, preds)
    accuracies.append(accuracy)
    print(f"  - Accuracy: {accuracy}")


Zeroing out attention head 0.
  - Accuracy: 0.9230769230769231
Zeroing out attention head 1.
  - Accuracy: 1.0
Zeroing out attention head 2.
  - Accuracy: 0.8461538461538461
Zeroing out attention head 3.
  - Accuracy: 1.0
Zeroing out attention head 4.
  - Accuracy: 0.8461538461538461
Zeroing out attention head 5.
  - Accuracy: 0.9230769230769231
Zeroing out attention head 6.
  - Accuracy: 0.7692307692307693
Zeroing out attention head 7.
  - Accuracy: 1.0
Zeroing out attention head 8.
  - Accuracy: 0.8461538461538461
Zeroing out attention head 9.
  - Accuracy: 1.0
Zeroing out attention head 10.
  - Accuracy: 1.0
Zeroing out attention head 11.
  - Accuracy: 1.0
