In [19]:
# Pickle
import pickle
# Pandas
import pandas as pd
# Hugging Face
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# PyTorch
import torch 
import torch.optim as optim
from torch.utils.data import DataLoader
# SkLearn
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [27]:
## Datasets 
# Hate Xplain
hate_xplain = pd.read_csv(r'project\data\hate_xplain.csv')

# Implicit Hate 
implicit_hate = pd.read_csv(r'project\data\implicit-hate-corpus\implicit_hate_v1_stg2_posts.tsv', delimiter='\t')
label_map = {
    'white_grievance': 0, 'incitement': 1, 'inferiority': 2,
    'irony': 3, 'stereotypical': 4, 'threatening': 5, 'other': 6
}
implicit_hate['class_label'] = implicit_hate['implicit_class'].map(label_map)
implicit_hate.drop("extra_implicit_class", axis=1, inplace=True)

# Toxic-Spans
annotations = pd.read_csv(r'project\data\toxic-spans\annotations.csv')
comments = pd.read_csv(r'project\data\toxic-spans\comments.csv')
toxic_spans = pd.merge(annotations, comments, on='comment_id')

hate_xplain = hate_xplain.sample(n=1000, random_state=42)
hate_xplain

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
2281,2326,3,0,3,0,1,934 8616\r\ni got a missed call from yo bitch
15914,16283,3,0,3,0,1,RT @KINGTUNCHI_: Fucking with a bad bitch you ...
18943,19362,3,0,1,2,2,RT @eanahS__: @1inkkofrosess lol my credit ain...
16407,16780,3,0,3,0,1,RT @Maxin_Betha Wipe the cum out of them faggo...
13326,13654,3,1,2,0,1,Niggas cheat on they bitch and don't expect no...
...,...,...,...,...,...,...,...
2647,2710,3,0,3,0,1,@Bombfantasyyy @Angela_Mastr lmao angela you l...
13037,13357,3,0,3,0,1,"My favorite is #picslip that's a dumb bitch, w..."
5982,6153,3,1,2,0,1,"@freddurst shut up faggot, no one cares"
16621,17004,3,0,3,0,1,RT @Neonte: We was wearing shorts about a week...


In [32]:
bert_model = pickle.load(open(r'project\BERT\bert.pkl', 'rb'))
bert_model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [40]:
with open('model_description.txt', 'w') as f:
    f.write(str(bert_model))

In [29]:
def tokenize_data(texts, labels, tokenizer, max_length):
    if isinstance(texts, pd.Series):
        texts = texts.tolist()
    texts = [str(text) for text in texts] 

    if isinstance(labels, pd.Series):
        labels = labels.tolist()
    labels = torch.tensor(labels, dtype=torch.long)
    
    encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length, return_tensors="pt")
    dataset = torch.utils.data.TensorDataset(encodings["input_ids"], encodings["attention_mask"], labels)
    return dataset


In [30]:
hx_train_text, hx_test_text, hx_train_labels, hx_test_labels = train_test_split(hate_xplain['tweet'], hate_xplain['class'], test_size=0.2)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', clean_up_tokenization_spaces=True)
hx_train = tokenize_data(hx_train_text, hx_train_labels, tokenizer, 512)
hx_test = tokenize_data(hx_test_text, hx_test_labels, tokenizer, 512)

hx_train_loader = DataLoader(hx_train, batch_size=16, shuffle=True)
hx_test_loader = DataLoader(hx_test, batch_size=16, shuffle=True)

hx_train_loader.__len__(), hx_test_loader.__len__()

(50, 13)

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
def baseline_evaluation(model, data_loader, device=None):
    model.eval()
    model.to(device)

    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids, attention_mask, labels = tuple(t.to(device) for t in batch)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return all_preds, all_labels

hx_preds, hx_labels = baseline_evaluation(bert_model, hx_test_loader, device)
classification_report(hx_labels, hx_preds, target_names=hate_xplain['class'].unique(), output_dict=True)

In [None]:
def evaluate_and_zero_out_heads(model, data_loader, zeroed_heads=None, device=None):
    device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    def zero_out_attention_head(module, input, output): 
        attention_probs = output[0]
        for head_idx in zeroed_heads.get(layer_idx, []):
            attention_probs[:, head_idx, :, :] = 0
        return clear_output
    
    hooks = []
    for layer_idx, heads in (zeroed_heads or {}).items():
        layer = model.bert.encoder.layer[layer_idx].attention.self
        hooks.append(layer.register_forward_hook(zero_out_attention_head))

    all_labels = []
    all_preds = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in data_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    
    for hook in hooks:
        hook.remove()

    return all_preds, all_labels

In [None]:
bert_model.eval()
for i, (data, label) in enumerate(hx_test_loader):
    if device == 'cuda':
        data, label = data.to(device), label.to(device)
    with torch.no_grad():
        attention_heads = bert_model.bert.encoder.layer[0].attention.self
        
    


In [39]:
hate_xplain.iloc[0]
bert_model.eval()

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
inputs = tokenizer(hate_xplain.iloc[0]['tweet'], return_tensors='pt', max_length=512, padding='max_length', truncation=True)
attention_heads = bert_model.bert.encoder.layer[0].attention.self
attention_heads
# for i in inputs:
#     print(i, inputs[i].shape)

BertSdpaSelfAttention(
  (query): Linear(in_features=768, out_features=768, bias=True)
  (key): Linear(in_features=768, out_features=768, bias=True)
  (value): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [None]:
attention_heads

In [None]:
print(attention_heads.query)
print(attention_heads.key)
print(attention_heads.value)
print(attention_heads.dropout)