In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as nnf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import numpy as np
from collections import defaultdict
from transformers import *

# (logits, hidden_states, attentions)
import seaborn as sns; sns.set()

In /home/fatma/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/fatma/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/fatma/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /home/fatma/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/fatma/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylel

In [2]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: GeForce RTX 2080 with Max-Q Design


In [3]:
def get_model_attention_for_sentence(model, tokenizer,sentences,single_sentence=False, add_special_token=False):
  # Tokenize all of the sentences and map the tokens to thier word IDs.
  attention_weights_per_sentence = []
  sentences_tokens = []
  if single_sentence == True:
    token = [tokenizer.cls_token] + tokenizer.tokenize(sentences) + [tokenizer.sep_token]
    inputids = torch.LongTensor([tokenizer.encode(token, add_special_tokens=add_special_token)])   
    # Add the encoded sentence to the list.    
    outputs = model(inputids)   # Forward pass, calculate logit predictions
    sentences_tokens.append(token)
    attention_weights_per_sentence.append(outputs[2])
  else:
    # For every sentence...
    for sent in sentences:
        token = [tokenizer.cls_token] + tokenizer.tokenize(sent) + [tokenizer.sep_token]
        inputids = torch.LongTensor([tokenizer.encode(token, add_special_tokens=add_special_token)])   
        # Add the encoded sentence to the list.    
        outputs = model(inputids)   # Forward pass, calculate logit predictions
        sentences_tokens.append(token)
        attention_weights_per_sentence.append(outputs[2])
  return sentences_tokens, attention_weights_per_sentence

In [4]:
def get_attention_map_per_word_as_df (sentences_tokenized, attention_list_per_sentence):
  #get attention map for words in sentences
  attention_map_per_word = defaultdict(list)
  for token_sentence in sentences_tokenized:
    sentence_index = sentences_tokenized.index(token_sentence)
    #print(token_sentence)  
    for token in token_sentence:
      word_index = token_sentence.index(token)
      #print(token)
      for layer in range(0,12):
        for head in range(0,12):
          attention_weight_per_layer_per_head_per_word = attention_list_per_sentence[sentence_index][layer][0][head]
          normalized_attention_weight_per_layer_per_head_per_word = torch.sum(attention_weight_per_layer_per_head_per_word,dim=0)/attention_weight_per_layer_per_head_per_word.shape[0]
          #print(len(normalized_attention_weight_per_layer_per_head_per_word))
          attention_map_per_word["word"].append(token)
          attention_map_per_word["attention_map"].append({layer+1:{head+1:float(normalized_attention_weight_per_layer_per_head_per_word[word_index])}})
  attention_map_per_word_df = pd.DataFrame.from_dict(attention_map_per_word, orient='index')
  attention_map_per_word_df = attention_map_per_word_df.transpose()       
  return attention_map_per_word_df           

In [5]:
def generate_attention_attention_layers_per_head_for_word(word,attention_map_df):
  import ast
  frames = []
  #len(attention_map_df[attention_map_df["word"]==word]["attention_map"].values)
  for i in attention_map_df[attention_map_df["word"]==word]["attention_map"].values:
    frames.append(pd.DataFrame.from_dict(i, orient='index'))
  res = pd.concat(frames)
  res = res.fillna(0)
  res = res.groupby(res.index).mean()
  return res
def generate_attention_attention_layers_per_head_for_word(word,attention_map_df):
  frames = []
  #len(attention_map_df[attention_map_df["word"]==word]["attention_map"].values)
  for i in attention_map_df[attention_map_df["word"]==word]["attention_map"].values:
    frames.append(pd.DataFrame.from_dict(i, orient='index'))
  res = pd.concat(frames)
  res = res.fillna(0)
  res = res.groupby(res.index).mean()
  return res
def generate_attention_heat_map_for_word(word,attention_map_df):
  frames = []
  for i in attention_map_df[attention_map_df["word"]==word]["attention_map"].values:
    frames.append(pd.DataFrame.from_dict(i, orient='index'))
  res = pd.concat(frames)
  res = res.fillna(0)
  res = res.groupby(res.index).mean()
  ax = sns.heatmap(res, cbar=True)
  ax.set(xlabel='head', ylabel='layer')
  ax.set_title("word '"+str(word)+"'")
  ax.invert_yaxis()


In [6]:
def generate_attention_layer_avg_line_plot_for_word(word,attention_map_df):
  import seaborn as sns; sns.set()
  frames = []
  for i in attention_map_df[attention_map_df["word"]==word]["attention_map"].values:
    frames.append(pd.DataFrame.from_dict(i, orient='index'))
  res = pd.concat(frames)
  df = res.fillna(0)
  res = res.groupby(res.index).mean()
  avg_head_res = res.mean(axis=1).values
  layers = res.index.values
  plt.title("word '"+str(word)+"'")
  plt.xticks(np.arange(12))
  plt.xlabel("layers")
  plt.ylabel("avg. attention weight")
  #print(avg_head_res)
  plt.plot(layers,avg_head_res)
  plt.grid(False)
  plt.show()
  #ax = sns.lmplot("x","y",data=res)
  #ax.set(xlabel='head', ylabel='layer')
  #ax=sns.catplot(kind="swarm",x=res.index,y=res.values)
  #ax.invert_yaxis()

## Rad Data

In [7]:
#output_dir = 'bert-base-uncased'
output_dir = "../../trained_models/BERT-Fine-Tuned/Pytorch/Fine_Tune_wtp_tox_clean_text128/"

config = BertConfig.from_pretrained(output_dir, output_hidden_states=True, output_attentions=True)
tokenizer = BertTokenizer.from_pretrained(output_dir, do_lower_case=True)

FT_model = BertForSequenceClassification.from_pretrained(output_dir, config=config)


In [8]:
#output_dir = 'bert-base-uncased'
NFT_model_dir = "../../trained_models/bert-base-uncased/"

NFT_config = BertConfig.from_pretrained(NFT_model_dir, output_hidden_states=True, output_attentions=True)
NFT_tokenizer = BertTokenizer.from_pretrained(NFT_model_dir, do_lower_case=True)

NFT_model = BertForSequenceClassification.from_pretrained(NFT_model_dir, config=NFT_config)

In [9]:
train_df = pd.read_csv("../../../Data/wikipedia_toxicity/wp_tox_data_train.csv)
test_df = pd.read_csv("../../../Data/wikipedia_toxicity/wp_tox_data_test.csv)


## important words for pos class

In [13]:
important_pos_words = ["pussy", "sucks", "bastards", "sigh", "stalin"]

In [14]:
data_text_contain_pos_words = []
for i in important_pos_words:
  data_text_contain_pos_words.append(test_df[test_df.Text_clean_for_BERT.str.contains(i)])
data_text_contain_pos_words = pd.concat(data_text_contain_pos_words)  
data_text_contain_pos_words = data_text_contain_pos_words.dropna()

In [15]:
len(data_text_contain_pos_words)

93

In [17]:
sentences = data_text_contain_pos_words.Text_clean_for_BERT.values

In [None]:
sentences = [" ".join(i.split(" ")[:128]) for i in sentences]

## get attention weights of most important words + fine_tned_bert

In [18]:
FT_BERT_sentences_tokenized, FT_BERT_attention_list_per_sentence = get_model_attention_for_sentence(FT_model, tokenizer, sentences)
FT_BERT_attention_map_per_word_df = get_attention_map_per_word_as_df(FT_BERT_sentences_tokenized, FT_BERT_attention_list_per_sentence )

In [19]:
FT_BERT_attention_map_per_word_df.to_csv("attention_maps/bert_with_fine_tuning/Twitter_sexism_BERT_Fine_Tuning_most_pos_words_attetnion_weights_IG")

## get attention weights of most important words + non_fine_tned_bert

In [20]:
NFT_BERT_sentences_tokenized, NFT_BERT_attention_list_per_sentence = get_model_attention_for_sentence(NFT_model, NFT_tokenizer, sentences)
NFT_BERT_attention_map_per_word_df = get_attention_map_per_word_as_df(NFT_BERT_sentences_tokenized, NFT_BERT_attention_list_per_sentence )

In [21]:
NFT_BERT_attention_map_per_word_df.to_csv("attention_maps/bert_without_fine_tuning/Twitter_sexism_without_Fine_tuning_most_pos_words_attetnion_weights_IG")

## important words for negative class

In [22]:
important_neg_words = ['interactions', 'bully', 'divorce', 'genuine', 'atrocities']

In [23]:
data_text_contain_neg_words = []
for i in important_neg_words:
  data_text_contain_neg_words.append(test_df[test_df.Text_clean_for_BERT.str.contains(i)])
data_text_contain_neg_words = pd.concat(data_text_contain_neg_words)  
data_text_contain_neg_words = data_text_contain_neg_words.dropna()

In [24]:
len(data_text_contain_neg_words)

13

In [25]:
sentences_neg = data_text_contain_neg_words.Text_clean_for_BERT.values

In [None]:
sentences_neg = [" ".join(i.split(" ")[:128]) for i in sentences_neg]

## get attention weights of fine-tuned-bert for important neg words

In [26]:
FT_BERT_sentences_tokenized, FT_BERT_attention_list_per_sentence = get_model_attention_for_sentence(FT_model, tokenizer, sentences_neg)
FT_BERT_attention_map_per_word_df = get_attention_map_per_word_as_df(FT_BERT_sentences_tokenized, FT_BERT_attention_list_per_sentence )

In [27]:
FT_BERT_attention_map_per_word_df.to_csv("attention_maps/bert_with_fine_tuning/Twitter_sexism_BERT_Fine_Tuning_most_neg_words_attetnion_weights_IG")

## get attention weight of BERT without fine-tuning  negative words

In [28]:
NFT_BERT_sentences_tokenized, NFT_BERT_attention_list_per_sentence = get_model_attention_for_sentence(NFT_model, NFT_tokenizer, sentences_neg)
NFT_BERT_attention_map_per_word_df = get_attention_map_per_word_as_df(NFT_BERT_sentences_tokenized, NFT_BERT_attention_list_per_sentence )

In [29]:
NFT_BERT_attention_map_per_word_df.to_csv("attention_maps/bert_without_fine_tuning/Twitter_sexism_without_Fine_tuning_most_neg_words_attetnion_weights_IG")