# LioNets: HateSpeech Dataset with Neural Networks and Embeddings-> Classification Task

In this notebook, we present how LioNets can be applied in predictive models using embeddings as inputs.

In [None]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# Testing a variety of NN architectures with Embeddings             #
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import warnings
warnings.filterwarnings("ignore")
import keras
from keras.preprocessing.sequence import pad_sequences
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, SpatialDropout1D, CuDNNLSTM, Bidirectional, Dense, \
    LSTM, Conv1D, MaxPooling1D, Dropout, concatenate, Flatten, add, RepeatVector, ConvLSTM2D, TimeDistributed, Reshape
from keras import initializers, regularizers, constraints
from keras.models import model_from_json
from keras import objectives, backend as K
from keras.engine import Layer
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import Input, Model
from keras.optimizers import Adam
from keras.models import Sequential, clone_model
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import Tokenizer
from sklearn.model_selection import StratifiedKFold, KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score
import time
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import re
from collections import OrderedDict
import pandas as pd
pd.set_option('max_colwidth',400)

from lionets import LioNets
from utilities.custom_attention import Attention
from utilities.load_dataset import Load_Dataset
from utilities.evaluation import Evaluation

from altruist.altruist import Altruist

from lime.lime_text import LimeTextExplainer
import innvestigate
import innvestigate.utils as iutils
from innvestigate.utils.keras import checks

In [None]:
import nltk
nltk.download('wordnet')
nltk.download('stopwords')

In [None]:
X, y = Load_Dataset.load_hate_data(True,False)
#X_unsup,y_unsup = Preproccesor.load_unsupervised_data(True,False)

In [None]:
class_names = ['noHateSpeech', 'hateSpeech']
X_train, X_valid, y_train, y_valid =  train_test_split(X,y,test_size=0.2, stratify = y, random_state=0)

In [None]:
"""
!wget 'https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip'
import zipfile
with zipfile.ZipFile("/content/crawl-300d-2M.vec.zip","r") as zip_ref:
    zip_ref.extractall()
    print(zip_ref.filelist)
del zip_ref
"""

In [None]:
def get_coefs(word,*arr):
    return word, np.asarray(arr, dtype='float32')

In [None]:
def build_matrix(embedding_path, tk, max_features):
    embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path, encoding = "utf-8"))

    word_index = tk.word_index
    nb_words = max_features
    embedding_matrix = np.zeros((nb_words + 1, 50))
    for word, i in word_index.items():
        if i >= max_features:
            continue
        embedding_vector = embedding_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix  

In [None]:
embedding_path1 = "embeddings/crawl-300d-2M.vec" #FastText
embedding_path1 = 'embeddings/glove.twitter.27B.50d.txt' #GloveSmall
embed_size = 50

In [None]:
max_features = 500
max_len = 50

In [None]:
tk = Tokenizer(lower = True, filters='', num_words=max_features, oov_token = True)
tk.fit_on_texts(X_train)
train_tokenized = tk.texts_to_sequences(X_train)
valid_tokenized = tk.texts_to_sequences(X_valid)
X_tr = pad_sequences(train_tokenized, maxlen=max_len)
X_va = pad_sequences(valid_tokenized, maxlen=max_len)

In [None]:
embedding_matrix = build_matrix(embedding_path1, tk, max_features)
embedding_matrix.shape

In [None]:
train_y = [0.1 if i <=0.5 else 0.9 for i in y_train]
valid_y = [0.1 if i <=0.5 else 0.9 for i in y_valid]

In [None]:
file_path = "Hate_Predictor.hdf5"
check_point = ModelCheckpoint(file_path, monitor="val_loss", verbose=2,save_best_only=True, mode="auto")
main_input = Input(shape=(max_len,), dtype='int32', name='main_input')
embedding_input = (Embedding(max_features + 1, 50, input_length=max_len,  weights=[embedding_matrix], trainable=False))(main_input)



embedding_input2 = SpatialDropout1D(0.5)(embedding_input)

x = Bidirectional(LSTM(100, return_sequences=True))(embedding_input2)
encoder_x = concatenate([
    Attention(max_len)(x),
    GlobalMaxPooling1D()(x),
])

y = Conv1D(filters=100,kernel_size=3,activation='tanh')(embedding_input)
encoder_y = GlobalMaxPooling1D()(y)

hidden = concatenate([encoder_x,encoder_y])

hidden = Dropout(0.5)(hidden)#0.5
hidden = Dense(750, activation='tanh')(hidden)
hidden = Dropout(0.7)(hidden)
hidden = Dense(500, activation='tanh')(hidden)
output_lay = Dense(1, activation='sigmoid')(hidden)
model = Model(inputs=[main_input], outputs=[output_lay])

In [None]:
#model.summary()

In [None]:
model.compile(loss="binary_crossentropy", optimizer=Adam())

In [None]:
#model.fit(X_tr, train_y, batch_size=128, epochs=200, validation_data=(X_va, valid_y), verbose=1, callbacks=[check_point], shuffle=True)

In [None]:
weights_file = 'weights/Hate_Predictor.hdf5' # choose the best checkpoint few features
model.load_weights(weights_file) # load it
model.compile(loss="binary_crossentropy", optimizer=Adam())

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, f1_score, balanced_accuracy_score, accuracy_score

temp_predo1 = model.predict(X_tr)
predictions = [0 if i[0] <=0.5 else 1 for i in temp_predo1]
print('Train:',f1_score(y_train,predictions, average='macro'),f1_score(y_train,predictions, average='weighted'),
      balanced_accuracy_score(y_train,predictions),accuracy_score(y_train,predictions))

temp_predo2 = model.predict(X_va)
predictions = [0 if i[0] <=0.5 else 1 for i in temp_predo2]
print('Train:',f1_score(y_valid,predictions, average='macro'),f1_score(y_valid,predictions, average='weighted'),
      balanced_accuracy_score(y_valid,predictions), accuracy_score(y_valid,predictions))

In [None]:
encoder = Model(input=model.input, output=[model.layers[-2].output])
encoder.trainable = False
encoder.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=["accuracy"])

In [None]:
predictor_for_encoded = Sequential()
predictor_for_encoded.add(model.layers[len(model.layers)-1])

In [None]:
encoded_x_train = encoder.predict(X_tr)
encoded_x_valid = encoder.predict(X_va)

In [None]:
iw = tk.index_word.copy()
iw[1]='UKN'
X_T = []
for i in X_tr:
    X_T.append(' '.join([iw[o] for o in i if o !=0]))
X_V = []
for i in X_va:
    X_V.append(' '.join([iw[o] for o in i if o !=0]))

In [None]:
X_train[5],X_T[5]

In [None]:
max_features_2 = max_features + 1
temp = np.zeros((X_tr.shape[0], max_len, max_features_2))
temp[np.expand_dims(np.arange(X_tr.shape[0]), axis=0).reshape(X_tr.shape[0], 1), 
     np.repeat(np.array([np.arange(max_len)]), X_tr.shape[0], axis=0), X_tr] = 1
X_train_one_hot = temp

temp = np.zeros((X_va.shape[0], max_len, max_features_2))
temp[np.expand_dims(np.arange(X_va.shape[0]), axis=0).reshape(X_va.shape[0], 1), 
     np.repeat(np.array([np.arange(max_len)]), X_va.shape[0], axis=0), X_va] = 1
X_valid_one_hot = temp

In [None]:
encoded_input = Input(shape=(encoded_x_train[0].shape))
hidden = RepeatVector(50)(encoded_input)
decoded = LSTM(350, return_sequences=True)(hidden)
decoded = LSTM(750, return_sequences=True, name='dec_lstm_2')(decoded)
decoded = TimeDistributed(Dense(max_features_2, activation='softmax'), name='decoded_mean')(decoded)

z_mean = Dense(500, name='z_mean', activation='linear')(encoded_input)
z_log_var = Dense(500, name='z_log_var', activation='linear')(encoded_input)

decoder = Model(encoded_input,decoded)
decoder.summary()
decoder.compile(optimizer="Adam",loss=['categorical_crossentropy'],metrics=['mae'])

checkpoint_name = 'Hate_Decoder.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 2, save_best_only = True, mode ='auto')

In [None]:
#decoder.fit( np.concatenate((encoded_x_train,encoded_x_valid)), np.concatenate((X_train_one_hot,X_valid_one_hot)), 
#             epochs=1000, batch_size=128, shuffle=True, 
#             validation_data=(np.concatenate((encoded_x_train,encoded_x_valid)), np.concatenate((X_train_one_hot,X_valid_one_hot))), 
#             verbose=1, callbacks=[checkpoint])

In [None]:
wights_file = 'weights/Hate_Decoder.hdf5' # choose the best checkpoint few features
decoder.load_weights(wights_file) # load it
decoder.compile(optimizer="Adam",loss=['categorical_crossentropy'],metrics=['mae'])

In [None]:
decoder.evaluate(encoded_x_train,X_train_one_hot)

In [None]:
decoder.evaluate(encoded_x_valid,X_valid_one_hot)

In [None]:
instances_evaluation = decoder.predict(encoded_x_train[40:50]) 

In [None]:
for j in range(len(instances_evaluation)):
    tempo = X_train_one_hot[j+40]
    tempo_ind = []
    tempo_str = ""
    for i in tempo:
        tempo_ind.append(np.argmax(i))
        if np.argmax(i) != 0 and np.argmax(i)!=True:
            tempo_str = tempo_str + tk.index_word[np.argmax(i)]+" "
        elif np.argmax(i) == True:
            tempo_str = tempo_str + 'UKN'+ " "
    print(" Original",tempo_str)

    tempo = instances_evaluation[j]
    tempo_ind = []
    tempo_str = ""
    for i in tempo:
        tempo_ind.append(np.argmax(i))
        if np.argmax(i) != 0 and np.argmax(i)!=True:
            #print(sorted(i,reverse=True)[:5])
            #print(i.max(),tk.index_word[np.argmax(i)])
            tempo_str = tempo_str + tk.index_word[np.argmax(i)]+" "
        elif np.argmax(i) == True:
            tempo_str = tempo_str + 'UKN'+ " "
    print("  Decoded:",tempo_str)
    print('""""""""""""""""""')

In [None]:
instances_evaluation = decoder.predict(encoded_x_valid[10:20]) 

In [None]:
for j in range(len(instances_evaluation)):
    tempo = X_valid_one_hot[j+10]
    tempo_ind = []
    tempo_str = ""
    for i in tempo:
        tempo_ind.append(np.argmax(i))
        if np.argmax(i) != 0 and np.argmax(i)!=True:
            tempo_str = tempo_str + tk.index_word[np.argmax(i)]+" "
        elif np.argmax(i) == True:
            tempo_str = tempo_str + 'UKN'+ " "
    print(" Original",tempo_str)

    tempo = instances_evaluation[j]
    tempo_ind = []
    tempo_str = ""
    for i in tempo:
        tempo_ind.append(np.argmax(i))
        if np.argmax(i) != 0 and np.argmax(i)!=True:
            tempo_str = tempo_str + tk.index_word[np.argmax(i)]+" "
        elif np.argmax(i) == True:
            tempo_str = tempo_str + 'UKN'+ " "
    print("  Decoded:",tempo_str)
    print('""""""""""""""""""')

In [None]:
tk.index_word[1]='UKN'

In [None]:
tk.word_index['UKN'] = 1

## LioNets Experiments
Having everything setted up, we are now ready to try our methodology, Gradient x Input and LIME.

In [None]:
from sklearn.linear_model import Ridge, SGDRegressor, LinearRegression
lionet = LioNets(model, decoder, encoder, X_tr, decoder_lower_threshold=0, double_detector=True, embeddings=True, tk=tk)
transparent_model = Ridge(alpha=0.02,fit_intercept=True,random_state=0)

In [None]:
import random 
random.seed(2000)
train = np.array(random.sample(list(X_tr),200))
valid = np.array(X_va[:200]) #X_V is 200 already
train.shape, valid.shape

Let's calculate the fidelity of Lime and LioNets

In [None]:
split_expression = lambda s: re.split(r'\W+', s)
explainer = LimeTextExplainer(class_names=class_names, split_expression=split_expression)
def lime_predict(text):
    i = tk.texts_to_sequences(text)
    i = pad_sequences(i, maxlen=max_len)
    a = model.predict(i)
    b = 1 - a 
    return np.column_stack((b,a))

In [None]:
def tts(text):
    sent = ''
    for i in text:    
        if i != 0:
            sent = sent + tk.index_word[i] + ' '
    sent = sent[:-1]
    return sent

In [None]:
def fi_lime(text):
    explanation = explainer.explain_instance(text_instance=tts(text), classifier_fn=lime_predict)
    local_pred = explanation.local_pred[0]
    return local_pred #This is because lime interprets class with label 1
def fi_lionets(text):
    _, _, loc_res, _, _ = lionet.explain_instance(text,2500,transparent_model)
    return loc_res[0]

In [None]:
evaluator = Evaluation(model.predict,None,None,True)

In [None]:
fidelity = evaluator.fidelity(train, [fi_lime, fi_lionets], class_n=0)
print('Train:')
print('  Lime fidelity:', fidelity[0][0])
print('  LioNets fidelity:', fidelity[1][0])
fidelity = evaluator.fidelity(valid, [fi_lime, fi_lionets], class_n=0)
print('Valid:')
print('  Lime fidelity:', fidelity[0][0])
print('  LioNets fidelity:', fidelity[1][0])

Let's calculate the non zero weights

In [None]:
Xs = iutils.to_list(model.outputs)
softmax_found = False
ret = []
for x in Xs:
    layer, node_index, tensor_index = x._keras_history
    if checks.contains_activation(layer, activation="sigmoid"):
        softmax_found = True
        if isinstance(layer, keras.layers.Activation):
            ret.append(layer.get_input_at(node_index))
        else:
            layer_wo_act = innvestigate.utils.keras.graph.copy_layer_wo_activation(layer)
            ret.append(layer_wo_act(layer.get_input_at(node_index)))

In [None]:
model2 = Model(input=model.input, output=ret)
model2.trainable = False
model2.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=["accuracy"])
analyzer = innvestigate.create_analyzer('lrp.epsilon', model2, neuron_selection_mode='max_activation', **{'epsilon': 1})

In [None]:
def fi_LRP(X_t):
    ooo = analyzer.analyze(np.array([X_t]))[0]
    ooo = ooo*np.array([0 if i == 0 else 1 for i in X_t]) #only on lrp
    return [ooo][0]

In [None]:
def fi_lime(text):
    sent=tts(text)
    explanation = explainer.explain_instance(text_instance=sent, classifier_fn=lime_predict)
    weights = OrderedDict(explanation.as_list())
    lime_w = {}
    for k,v in weights.items():
        lime_w[tk.word_index[k]] = v
    interpretation = []
    for i in text:
        if i == 0:
            interpretation.append(0)
        else:
            if i in lime_w.keys():
                interpretation.append(lime_w[i])
            else:
                interpretation.append(0)
    return np.array([interpretation])[0]

In [None]:
def fi_lionets(text):
    weights, _, _, names, _ = lionet.explain_instance(text,2500,transparent_model)
    lionets_w = {}
    for v,k in dict(zip(list(weights[0]), list(names))).items():
        if k == 'ukn':
            lionets_w[tk.word_index['UKN']] = v
        else:
            lionets_w[tk.word_index[k]] = v
    interpretation = []
    for i in text:
        if i == 0:
            interpretation.append(0)
        else:
            interpretation.append(lionets_w[i])
    return np.array([interpretation])[0]

In [None]:
non_zero = evaluator.non_zero_weights(train, [fi_LRP, fi_lime, fi_lionets])
print('Train:')
print('  LRP Non Zero:', non_zero[0][0])
print('  Lime Non Zero:', non_zero[1][0])
print('  LioNets Non Zero:', non_zero[2][0])
non_zero = evaluator.non_zero_weights(valid, [fi_LRP, fi_lime, fi_lionets])
print('Valid:')
print('  LRP Non Zero:', non_zero[0][0])
print('  Lime Non Zero:', non_zero[1][0])
print('  LioNets Non Zero:', non_zero[2][0])

Let's calculate the robustness

In [None]:
robustness = evaluator.robustness_embeddings(train,[fi_lime, fi_LRP, fi_lionets])
print('Train:')
print('  Lime Robustness:', robustness[0])
print('  LRP Robustness:', robustness[1])
print('  LioNets Robustness:', robustness[2])
robustness = evaluator.robustness_embeddings(valid,[fi_lime, fi_LRP, fi_lionets])
print('Valid:')
print('  Lime Robustness:', robustness[0])
print('  LRP Robustness:', robustness[1])
print('  LioNets Robustness:', robustness[2])

Altruist

In [None]:
features = []
for i in range(50):
    features.append(str('f'+str(i)))

In [None]:
def fi_LRP(X_t,prediction,model):
    ooo = analyzer.analyze(np.array([X_t]))[0]
    ooo = ooo*np.array([0 if i == 0 else 1 for i in X_t]) #only on lrp
    return [ooo][0]
def fi_lime(text,prediction,model):
    sent=tts(text)
    explanation = explainer.explain_instance(text_instance=sent, classifier_fn=lime_predict)
    weights = OrderedDict(explanation.as_list())
    lime_w = {}
    for k,v in weights.items():
        lime_w[tk.word_index[k]] = v
    interpretation = []
    for i in text:
        if i == 0:
            interpretation.append(0)
        else:
            if i in lime_w.keys():
                interpretation.append(lime_w[i])
            else:
                interpretation.append(0)
    return np.array([interpretation])[0]
def fi_lionets(text,prediction,model):
    weights, _, _, names, _ = lionet.explain_instance(text,2500,transparent_model)
    lionets_w = {}
    for v,k in dict(zip(list(weights[0]), list(names))).items():
        if k == 'ukn':
            lionets_w[tk.word_index['UKN']] = v
        else:
            lionets_w[tk.word_index[k]] = v
    interpretation = []
    for i in text:
        if i == 0:
            interpretation.append(0)
        else:
            interpretation.append(lionets_w[i])
    return np.array([interpretation])[0]

In [None]:
print("*Please let it run, it will take time probably*")
fi_names = {fi_LRP:'LRP',fi_lime:'Lime',fi_lionets:'LioNets'}
fis = [fi_LRP, fi_lime,fi_lionets]
fis_scores = []
for i in fis:
    fis_scores.append([])
count = 0

altruistino = Altruist(model, train, fis, features, None, True, None, True)
for instance in train:            
    if (count + 1) % 25 == 0:
        print(count+1,"/",len(valid),"..",end=", ")
    #print(len(instance))
    count = count + 1
    untruthful_features = altruistino.find_untruthful_features(instance)
    for i in range(len(untruthful_features[0])):
        fis_scores[i].append(len(untruthful_features[0][i]))
count = 0
print()
print('Train:')
for fis_score in fis_scores:
    fi = fis[count]
    count = count + 1
    print(' ',fi_names[fi],np.array(fis_score).mean())
fi_matrix = np.array(fis_scores)
count = 0
fi_all = []
for instance in train:
    fi_all.append(fi_matrix[:,count].min())
    count = count + 1
print("Altogether:",np.array(fi_all).mean())

In [None]:
fi_matrix

In [None]:
train[:4]

In [None]:
print("*Please let it run, it will take time probably*")
fi_names = {fi_LRP:'LRP',fi_lime:'Lime',fi_lionets:'LioNets'}
fis = [fi_LRP, fi_lime,fi_lionets]
fis_scores = []
for i in fis:
    fis_scores.append([])
count = 0

altruistino = Altruist(model, train, fis, features, None, True, None, True)
for instance in valid[:2]:            
    if (count + 1) % 25 == 0:
        print(count+1,"/",len(valid),"..",end=", ")
    #print(len(instance))
    count = count + 1
    untruthful_features = altruistino.find_untruthful_features(instance)
    for i in range(len(untruthful_features[0])):
        fis_scores[i].append(len(untruthful_features[0][i]))
count = 0
print()
print('Valid:')
for fis_score in fis_scores:
    fi = fis[count]
    count = count + 1
    print(' ',fi_names[fi],np.array(fis_score).mean())
fi_matrix = np.array(fis_scores)
count = 0
fi_all = []
for instance in train:
    fi_all.append(fi_matrix[:,count].min())
    count = count + 1
print("Altogether:",np.array(fi_all).mean())

## Qualitative:

Now we will test an instance and its explanations

In [None]:
X_T[22]

In [None]:
weights, a, b, names, c = lionet.explain_instance(X_tr[22],2500,transparent_model)

In [None]:
lionets_w = {}
for v,k in dict(zip(list(weights[0]), list(names))).items():
    if k == 'ukn':
        lionets_w[tk.word_index['UKN']] = v
    else:
        lionets_w[tk.word_index[k]] = v
interpretation = []
for i in X_tr[22]:
    if i != 0:
        interpretation.append([tk.index_word[i],lionets_w[i]])

In [None]:
interpretation, a, b

In [None]:
import seaborn as sns
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
i_weights = pd.DataFrame({"Features": [o[0] for o in np.array(interpretation)[:,:1]], 
                                  "Features' Weights": [float(o[0]) for o in np.array(interpretation)[:,1:]]})
i_weights = i_weights.sort_values(by="Features' Weights", ascending=False)
i_weights = i_weights.drop_duplicates()
sns.barplot(x="Features' Weights", y="Features", data=i_weights)
plt.show()

In [None]:
lime_predict(['or maybe just do not follow UKN UKN from the UKN UKN'])[0][1]

In [None]:
plt.figure(num=None, figsize=(10, 8), dpi=250, facecolor='w', edgecolor='k')
i_weights = pd.DataFrame({"Features": names, 
                                  "Features' Weights":weights[0]})
i_weights = i_weights.sort_values(by="Features' Weights", ascending=False)
i_weights = i_weights.drop_duplicates()
sns.barplot(x="Features' Weights", y="Features", data=i_weights)
plt.show()

In [None]:
counter_weights = []
counter_features = []
for i in range(len(weights[0])):
    if weights[0][i]!=0:
        if names[i] not in X_T[22].lower():
            counter_weights.append(weights[0][i])
            counter_features.append(names[i])
co_weights = pd.DataFrame({"Counter Features": list(counter_features), 
                                  "Features' Weights": list(counter_weights)})
co_weights = co_weights.sort_values(by="Features' Weights", ascending=False)
co_weights = pd.concat([co_weights.head(3),co_weights.tail(3)])
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Counter Features", data=co_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

In [None]:
lime_predict(['or maybe just do not follow me UKN religions from the UKN UKN'])[0][1]