In [None]:
!pip install Keras==2.4.3
!pip install tensorflow==2.3.0
!pip install lime==0.1.1.32
!pip install nltk==3.2.4
!pip install autokeras
!pip install keras-tuner==1.0.2rc3
!pip install autoPyTorch

In [None]:
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [None]:
import keras
from keras.layers import Dense, LSTM, Dropout, Reshape
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import Input, Model
from keras.optimizers import Adam
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.linear_model import Ridge, SGDRegressor, LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, f1_score, balanced_accuracy_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
pd.set_option('max_colwidth',400)
import matplotlib.pyplot as plt
import re
import numpy as np
from collections import OrderedDict
import nltk
import seaborn as sns
from load_dataset import Load_Dataset
from LioNets import LioNet
from evaluation import Evaluation
import autokeras as ak
import tensorflow as tf
%matplotlib inline
from autoPyTorch import AutoNetRegression

In [None]:
nltk.download('wordnet')
nltk.download('stopwords')

In [None]:
X, y, class_names = Load_Dataset.load_smsspam()
X_train, X_valid, y_train, y_valid =  train_test_split(X,y,test_size=0.2, stratify = y, random_state=0)

In [None]:
vec = TfidfVectorizer(analyzer='word',max_features=1000)
vec.fit(X_train)
x_train = vec.transform(X_train).A
x_valid = vec.transform(X_valid).A

In [None]:
input_dim = len(vec.get_feature_names())
input_dim

In [None]:
train_y = [0.1 if i <=0.5 else 0.9 for i in y_train]
valid_y = [0.1 if i <=0.5 else 0.9 for i in y_valid]

In [None]:
check_point = ModelCheckpoint("SMS_Predictor2020.hdf5", monitor="val_loss", verbose=2,save_best_only=True, mode="auto")
main_input = Input(shape=(input_dim,), dtype='float32', name='main_input')
x = Reshape((1,input_dim))(main_input)
x = LSTM(1000,activation='tanh')(x)
x = Dropout(0.75)(x)
x = Dense(500,activation='tanh')(x)
output_lay = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[main_input], outputs=[output_lay])
model.compile(optimizer="adam",loss=['binary_crossentropy'])

In [None]:
weights_file = 'weights/SMS_Predictor.hdf5' # choose the best checkpoint few features
model.load_weights(weights_file) # load it
model.compile(loss="binary_crossentropy", optimizer=Adam())

In [None]:
outputs_predictor = model.predict(x_train)

In [None]:
temp_pred = model.predict(x_train)
predictions = [0 if i[0] <=0.5 else 1 for i in temp_pred]
print('Train:',f1_score(y_train,predictions, average='macro'),f1_score(y_train,predictions, average='weighted'),
      balanced_accuracy_score(y_train,predictions),accuracy_score(y_train,predictions))

temp_pred = model.predict(x_valid)
predictions = [0 if i[0] <=0.5 else 1 for i in temp_pred]
print('Test:',f1_score(y_valid,predictions, average='macro'),f1_score(y_valid,predictions, average='weighted'),
      balanced_accuracy_score(y_valid,predictions), accuracy_score(y_valid,predictions))

In [None]:
encoder = Model(inputs=model.input, outputs=[model.layers[-2].output])
encoder.trainable = False
encoder.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=["accuracy"])

In [None]:
output_layer = model.layers[-1]

In [None]:
output_layer_weights = output_layer.get_weights()[0]

In [None]:
encoded_x_train = encoder.predict(x_train) #outputs of encoder / inputs of decoder
encoded_x_valid = encoder.predict(x_valid)
encoded_x_train

In [None]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

In [None]:
encoded_input = Input(shape=(encoded_x_train[0].shape))

x = Reshape((1,len(encoded_x_train[0])))(encoded_input)
x = LSTM(600, activation='tanh')(x)
x = Dropout(0.7)(x)
x = Dense(800, activation='tanh')(x)
decoded = Dense(input_dim, activation='sigmoid')(x)

decoder = Model(encoded_input,decoded)
decoder.compile(optimizer="Adam",loss=['binary_crossentropy'],metrics=[rmse,'mae'])

checkpoint_name = 'SMS_Decoder.hdf5' #or:'SMS_TFIDF_Decoder.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 2, save_best_only = True, mode ='auto')

In [None]:
weights_file = 'weights/SMS_Decoder.hdf5' # choose the best checkpoint few features
decoder.load_weights(weights_file) # load it
decoder.compile(optimizer="Adam",loss=['binary_crossentropy'],metrics=[rmse,'mae'])

In [None]:
outputs_decoder = decoder.predict(encoded_x_train)
outputs_decoder

In [None]:
decoder.evaluate(encoded_x_train,x_train)

In [None]:
decoder.evaluate(encoded_x_valid,x_valid)

In [None]:
instances_evaluation = decoder.predict(encoded_x_train[:5]) 

In [None]:
instances_evaluation_threshold = []
for r_m in instances_evaluation:
    a_t = [o if o > 0.045 else 0 for o in r_m]
    instances_evaluation_threshold.append(a_t)
inversed_decoded = vec.inverse_transform(instances_evaluation_threshold)
inversed_original = vec.inverse_transform(x_train[:5])
for i in range(len(inversed_original)):
    print('Original:',' '.join(sorted(inversed_original[i])))
    print(' Decoded:',' '.join(sorted(inversed_decoded[i])))
    print('""""""""""""""""""')

In [None]:
instances_evaluation = decoder.predict(encoded_x_valid[:5]) 

In [None]:
instances_evaluation_threshold = []
for r_m in instances_evaluation:
    a_t = [o if o > 0.045 else 0 for o in r_m]
    instances_evaluation_threshold.append(a_t)
inversed_decoded = vec.inverse_transform(instances_evaluation_threshold)
inversed_original = vec.inverse_transform(x_valid[:5])
for i in range(len(inversed_original)):
    print('Original:',' '.join(sorted(inversed_original[i])))
    print(' Decoded:',' '.join(sorted(inversed_decoded[i])))
    print('""""""""""""""""""')

# AutoLioNets

In [None]:
#Run LioNets using AutoLioNets (decoder="auto")
#Time = [low, medium, high, None]
lionet = LioNet(model, encoder, x_train, decoder="auto", time="low", decoder_lower_threshold=0.045, double_detector=True)
#Original decoder-lionet
lionet_original = LioNet(model, encoder, x_train, decoder, decoder_lower_threshold=0.045, double_detector=True)

In [None]:
#Evaluate decoded instances
lionet.print_examples(X_train=X_train, threshold=0.045)

In [None]:
#Set decoder with the best decoder from above evaluation
best_decoder = lionet.load_best_decoder(0)
lionet_best = LioNet(model, encoder, x_train, best_decoder, decoder_lower_threshold=0.045, double_detector=True)

In [None]:
#To compute Fidelity for every strategy
decoder1 = lionet.load_best_decoder(0)
decoder2 = lionet.load_best_decoder(1)
decoder3 = lionet.load_best_decoder(2)
decoder4 = lionet.load_best_decoder(3)
lionet_temp1 = LioNet(model, encoder, x_train, decoder1, decoder_lower_threshold=0.045, double_detector=True)
lionet_temp2 = LioNet(model, encoder, x_train, decoder2, decoder_lower_threshold=0.045, double_detector=True)
lionet_temp3 = LioNet(model, encoder, x_train, decoder3, decoder_lower_threshold=0.045, double_detector=True)
lionet_temp4 = LioNet(model, encoder, x_train, decoder4, decoder_lower_threshold=0.045, double_detector=True)

# Fidelity

In [None]:
import random 
#random.seed(2000)
random.seed(7777)
train = np.array(random.sample(X_train,100))#200
valid = np.array(random.sample(X_valid,100))
train.shape, valid.shape

In [None]:
def fi_autolionets_s1(text):
    t_text = vec.transform(np.array([text]))[0].A[0]
    weights, res, loc_res = lionet_temp1.explain_instance(t_text,2000)
    return loc_res

def fi_autolionets_s2(text):
    t_text = vec.transform(np.array([text]))[0].A[0]
    weights, res, loc_res = lionet_temp2.explain_instance(t_text,2000)
    return loc_res

def fi_autolionets_s3(text):
    t_text = vec.transform(np.array([text]))[0].A[0]
    weights, res, loc_res = lionet_temp3.explain_instance(t_text,2000)
    return loc_res

def fi_autolionets_s4(text):
    t_text = vec.transform(np.array([text]))[0].A[0]
    weights, res, loc_res = lionet_temp4.explain_instance(t_text,2000)
    return loc_res

In [None]:
evaluator = Evaluation(model.predict,None,vec.transform,True)

In [None]:
fidelity = evaluator.fidelity(train[:100], [fi_autolionets_s1, fi_autolionets_s2, fi_autolionets_s3, 
                                            fi_autolionets_s4], class_n=0)
print('AutoLioNets_S1 fidelity:', fidelity[0][0])
print('AutoLioNets_S2 fidelity:', fidelity[1][0])
print('AutoLioNets_S3 fidelity:', fidelity[2][0])
print('AutoLioNets_S4 fidelity:', fidelity[3][0])
fidelity = evaluator.fidelity(valid[:100], [fi_autolionets_s1, fi_autolionets_s2, fi_autolionets_s3, 
                                            fi_autolionets_s4], class_n=0)
print('AutoLioNets_S1 fidelity:', fidelity[0][0])
print('AutoLioNets_S2 fidelity:', fidelity[1][0])
print('AutoLioNets_S3 fidelity:', fidelity[2][0])
print('AutoLioNets_S4 fidelity:', fidelity[3][0])


# Qualitative Original Decoder

In [None]:
temp_instance = x_train[9].copy()
transparent_model = Ridge(alpha=0.0001,fit_intercept=True,random_state=0)
weights, real_prediction, local_prediction = lionet_original.explain_instance(temp_instance[0:],2000, transparent_model)

In [None]:
str('Sentence: "' + X_train[9] + '"   Class: ' + str(train_y[9]))

In [None]:
weights, real_prediction, local_prediction = lionet_original.explain_instance(x_train[9][0:], 2000, transparent_model)
print("Real prediction:",real_prediction,", Local prediction:",local_prediction)

In [None]:
model_weights = pd.DataFrame({"Features": list(vec.get_feature_names()), 
                              "Features' Weights": list(weights*x_train[9][0:])})
model_weights = model_weights.sort_values(by="Features' Weights", ascending=False)
model_weights = model_weights[(model_weights["Features' Weights"] != 0)]    
#model_weights, lime_predict([text])[0][1], rd.predict(texts)[0], weights
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Features", data=model_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

In [None]:
np.argmax(weights),np.argmin(weights),np.max(weights),np.min(weights),vec.get_feature_names()[np.argmax(weights)],vec.get_feature_names()[np.argmin(weights)]

In [None]:
counter_weights = []
counter_features = []
for i in range(len(weights)):
    if weights[i]!=0:
        if vec.get_feature_names()[i] not in X_train[9]:
            counter_weights.append(weights[i])
            counter_features.append(vec.get_feature_names()[i])
co_weights = pd.DataFrame({"Counter Features": list(counter_features), 
                                  "Features' Weights": list(counter_weights)})
co_weights = co_weights.sort_values(by="Features' Weights", ascending=False)
co_weights = pd.concat([co_weights.head(5),co_weights.tail(5)])
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Counter Features", data=co_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

In [None]:
for i in range(len(X_train)):
    if 'teach' in X_train[i]:
        print(i)

In [None]:
X_train[119]

In [None]:
weights, real_prediction, local_prediction = lionet_original.explain_instance(x_train[119][0:], 2000, transparent_model)

In [None]:
model_weights = pd.DataFrame({"Features": list(vec.get_feature_names()), 
                                "Features' Weights": list(weights*x_train[119][0:])})
model_weights = model_weights.sort_values(by="Features' Weights", ascending=False)
model_weights = model_weights[(model_weights["Features' Weights"] != 0)]    
#model_weights, lime_predict([text])[0][1], rd.predict(texts)[0], weights
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Features", data=model_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

# Qualitative AutoLioNets

In [None]:
temp_instance = x_train[9].copy()
transparent_model = Ridge(alpha=0.0001,fit_intercept=True,random_state=0)
weights, real_prediction, local_prediction = lionet_best.explain_instance(temp_instance[0:],2000, transparent_model)

In [None]:
str('Sentence: "' + X_train[9] + '"   Class: ' + str(train_y[9]))

In [None]:
weights, real_prediction, local_prediction = lionet_best.explain_instance(x_train[9][0:], 2000, transparent_model)
print("Real prediction:",real_prediction,", Local prediction:",local_prediction)

In [None]:
model_weights = pd.DataFrame({"Features": list(vec.get_feature_names()), 
                              "Features' Weights": list(weights*x_train[9][0:])})
model_weights = model_weights.sort_values(by="Features' Weights", ascending=False)
model_weights = model_weights[(model_weights["Features' Weights"] != 0)]    
#model_weights, lime_predict([text])[0][1], rd.predict(texts)[0], weights
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Features", data=model_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

In [None]:
np.argmax(weights),np.argmin(weights),np.max(weights),np.min(weights),vec.get_feature_names()[np.argmax(weights)],vec.get_feature_names()[np.argmin(weights)]

In [None]:
counter_weights = []
counter_features = []
for i in range(len(weights)):
    if weights[i]!=0:
        if vec.get_feature_names()[i] not in X_train[9]:
            counter_weights.append(weights[i])
            counter_features.append(vec.get_feature_names()[i])
co_weights = pd.DataFrame({"Counter Features": list(counter_features), 
                                  "Features' Weights": list(counter_weights)})
co_weights = co_weights.sort_values(by="Features' Weights", ascending=False)
co_weights = pd.concat([co_weights.head(5),co_weights.tail(5)])
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Counter Features", data=co_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()

In [None]:
for i in range(len(X_train)):
    if 'teach' in X_train[i]:
        print(i)

In [None]:
X_train[119]

In [None]:
weights, real_prediction, local_prediction = lionet_best.explain_instance(x_train[119][0:], 2000, transparent_model)

In [None]:
model_weights = pd.DataFrame({"Features": list(vec.get_feature_names()), 
                              "Features' Weights": list(weights*x_train[119][0:])})
model_weights = model_weights.sort_values(by="Features' Weights", ascending=False)
model_weights = model_weights[(model_weights["Features' Weights"] != 0)]    
#model_weights, lime_predict([text])[0][1], rd.predict(texts)[0], weights
plt.figure(num=None, figsize=(4, 3), dpi=200, facecolor='w', edgecolor='k')
sns.barplot(x="Features' Weights", y="Features", data=model_weights)
#plt.xticks(rotation=90)
#plt.title(str('Features not appearing in the instance'))
plt.show()