In [1]:
# Setup
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import warnings
import spacy
from orig_anchor import anchor_text
import pickle
from myUtils import *
from transformer.utils import *
from dataset.dataset_loader import *
import datetime
import os

SEED = 84
torch.manual_seed(SEED)
warnings.simplefilter("ignore")

In [2]:
plt.rcParams['font.size'] = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# can be sentiment/spam/offensive
dataset_name = 'sentiment'
review_parser, label_parser, ds_train, ds_val, _ = create_sentiment_dataset()

Number of tokens in training samples: 3307
Number of tokens in training labels: 2


In [4]:
model = load_model('gru' , f'transformer/{dataset_name}/gru.pt', review_parser)

{'embedding_dim': 100, 'batch_size': 32, 'hidden_dim': 256, 'num_layers': 2, 'dropout': 0.3, 'lr': 5e-05, 'early_stopping': 5, 'output_classes': 2}
VanillaGRU(
  (embedding_layer): Embedding(3307, 100)
  (GRU_layer): GRU(100, 256, num_layers=2, dropout=0.3)
  (dropout_layer): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=2, bias=True)
  (log_softmax): LogSoftmax(dim=1)
)


In [5]:
# 1 = pad 2=sos 3 = eos
def tokenize(text, max_len):
    sentence = review_parser.tokenize(str(text))
    input_tokens = [2] + [review_parser.vocab.stoi[word] for word in sentence] + [3] + [1]*(max_len-len(sentence))

    return input_tokens

In [6]:
def predict_sentences(sentences):
    half_length = len(sentences)//2
    if(half_length>100):
        return np.concatenate([predict_sentences(sentences[:half_length]), predict_sentences(sentences[half_length:])])
    max_len = max([len(sentence) for sentence in sentences])
    sentences = torch.tensor([tokenize(sentence, max_len) for sentence in sentences]).to(device)
    input_tokens = torch.transpose(sentences, 0, 1)
    output = model(input_tokens)

    return torch.argmax(output, dim=1).cpu().numpy()

# Anchor Part

In [7]:
nlp = spacy.load('en_core_web_sm')

In [8]:
explainer = anchor_text.AnchorText(nlp, ['positive', 'negative'], use_unk_distribution=False)

# Loading Results

In [9]:
test = np.array(pickle.load( open(  f"{dataset_name}/test.pickle", "rb" )))
test_labels = np.array(pickle.load( open(  f"{dataset_name}/test_labels.pickle", "rb" )))

explanations  = pickle.load(open(  f"{dataset_name}/exps_list.pickle", "rb" ))
anchor_examples = pickle.load( open(  f"{dataset_name}/anchor_examples.pickle", "rb" ))

In [10]:
len(anchor_examples)

2272

In [11]:
len(explanations)

27464

In [12]:
if not os.path.exists(f"{dataset_name}/extended_exps.pickle"):
    test_predictions = np.array([predict_sentences([text])[0] for text in test])
    explanations = [ExtendedExplanation(exp, anchor_examples, test, test_labels, test_predictions ,predict_sentences, explainer) for exp in explanations if len(exp.fit_examples) > 0]
    pickle.dump( explanations, open(  f"{dataset_name}/extended_exps.pickle", "wb" ))

In [13]:
explanations = pickle.load(open(  f"{dataset_name}/extended_exps.pickle", "rb" ))

In [14]:
def get_occurences(sentences, labels):
    pos_c = Counter()
    neg_c = Counter()
    for sentence, label in zip(sentences, labels):
        if explainer.class_names[label] == 'positive':
            pos_c.update(review_parser.tokenize(sentence))
        else:
            neg_c.update(review_parser.tokenize(sentence))
   
    return pos_c, neg_c

In [128]:
# get best anchor of each anchor example, it is the chosen anchor
def get_best(explanations):
    best_exps = dict()
    for exp in explanations:
        if exp.precision < 0.95:
            continue
        if exp.index not in best_exps.keys():
            best_exps[exp.index]=exp
        elif exp.precision > best_exps[exp.index].precision:
            best_exps[exp.index]=exp
    print(len(best_exps))
    return best_exps.values()

In [16]:
from collections import Counter
import numpy as np
def calculate_teta0(anchor_examples):
    num_words = sum(len(example) for example in anchor_examples)
    c = Counter()
    for example in anchor_examples:
        c.update(review_parser.tokenize(example))
    for word in c.keys():
        c[word] = c[word]/ num_words
    return c

def calculate_p_anchor(explanations):
    c = Counter()
    for exp in explanations:
        c.update([exp.names[0]])
 
    return c

In [17]:
def calculate_formula(exps, teta0, alpha = 0.95):
    teta1 = dict()
    p_anchor = calculate_p_anchor(exps)
    for anchor in p_anchor.keys():
        teta1[anchor] = (p_anchor[anchor]/len(exps) - (1-alpha)*teta0[anchor])/alpha
    return teta1, p_anchor

In [18]:
def calculate_scores():
    alphas = [0.15, 0.35, 0.55, 0.75, 0.95]
    exps = get_best(explanations)
    teta0 = calculate_teta0(anchor_examples)
    
    for alpha in alphas:
        scores, counter = calculate_formula(exps, teta0, alpha)
        df_list = []
       
        for anchor, score in scores.items():
            df_list.append([anchor, score ,counter[anchor]]) 

        df_list.sort(key=lambda exp: -exp[1])
        df = pd.DataFrame(data = df_list, columns = ['name', 'score', 'num anchors']).set_index('name')
        df.to_csv( f'{dataset_name}/formalized_scores_{alpha}.csv') 

In [19]:
def calculate_scores_double():
    alphas = [0.15, 0.35, 0.55, 0.75, 0.95]
    dfs = []
    labels = [predict_sentences([str(anchor_example)])[0] for anchor_example in anchor_examples]
    pos_occurences, neg_occurences = get_occurences(anchor_examples, labels)
    exps = get_best(explanations)
    teta0 = calculate_teta0(anchor_examples)
    
    pos_exps = [exp for exp in exps if labels[exp.index]==0]
    neg_exps = [exp for exp in exps if labels[exp.index]==1]
    
    
    for alpha in alphas:
        df_list = []
        pos_scores, pos_counter = calculate_formula(pos_exps, teta0, alpha)
        neg_scores, neg_counter = calculate_formula(neg_exps, teta0, alpha)
        
        for anchor, score in pos_scores.items():
            pos_percent = round(pos_counter[anchor]/(pos_counter[anchor]+neg_counter[anchor]), 2)
            neg_percent = 1-pos_percent
            both = pos_counter[anchor]>0 and neg_counter[anchor]>0
            df_list.append([anchor, score , '+', pos_counter[anchor], pos_counter[anchor]+ neg_counter[anchor],pos_percent, neg_percent, both, pos_occurences[anchor]-pos_counter[anchor], neg_occurences[anchor]-neg_counter[anchor]]) 
            
        
        for anchor, score in neg_scores.items():
            pos_percent = round(pos_counter[anchor]/(pos_counter[anchor]+neg_counter[anchor]),2)
            neg_percent = 1-pos_percent
            both = pos_counter[anchor]>0 and neg_counter[anchor]>0
            df_list.append([anchor, score , '-', neg_counter[anchor], pos_counter[anchor]+ neg_counter[anchor],pos_percent, neg_percent, both, pos_occurences[anchor]-pos_counter[anchor], neg_occurences[anchor]-neg_counter[anchor]]) 

        df_list.sort(key=lambda exp: -exp[1])
        df = pd.DataFrame(data = df_list, columns = ['name', 'score', 'label', 'anchor occurences', 'total anchor occurences', '+%', '-%', 'both', 'normal +', 'normal -']).set_index('name')
        
        dfs.append(df)
        
    writer = pd.ExcelWriter(f'{dataset_name}/formalized_scores_double.xlsx',engine='xlsxwriter') 
    
    workbook=writer.book
    worksheet=workbook.add_worksheet('Sheet1')
    writer.sheets['Sheet1'] = worksheet
    
    cur_col = 0
    
    for df, alpha in zip(dfs, alphas):
        worksheet.write(0, cur_col, alpha)
        df.to_excel(writer, sheet_name='Sheet1', startrow=1, startcol=cur_col)
        cur_col+=11

    writer.save()


In [101]:
def calc_min_alpha(p_anchor, num_anchors, teta0 ,normalize_factor):
    x = [(p_anchor[anchor]/num_anchors)/(teta0[anchor]/normalize_factor) for anchor in p_anchor.keys()]
    idx =x.index(min(x))
    name = list(p_anchor.keys())[idx]
    print(f' {name}: {p_anchor[name]} :{teta0[name]/normalize_factor}: {(p_anchor[name]/num_anchors)/(teta0[name]/normalize_factor)}')
    print(min([1-(p_anchor[anchor]/num_anchors)/(teta0[anchor]/normalize_factor) for anchor in p_anchor.keys()]))
    return max([1-(p_anchor[anchor]/num_anchors)/(teta0[anchor]/normalize_factor) for anchor in p_anchor.keys()])

In [102]:
def calculate_modified_formula(exps, teta0, alpha = 0.95):
    teta1 = dict()
    p_anchor = calculate_p_anchor(exps)
    normalize_factor = sum(teta0[anchor] for anchor in p_anchor.keys())
    min_alpha = calc_min_alpha(p_anchor, len(exps), teta0, normalize_factor)
    print(f'min alpha:{min_alpha}')
    for anchor in p_anchor.keys():
        teta1[anchor] = (p_anchor[anchor]/len(exps) - (1-alpha)*(teta0[anchor])/normalize_factor)/alpha
    return teta1, p_anchor

## Notice changed get_best to choose only anchors with >0.95 precision

In [130]:
def calculate_modified_scores():
    alphas = np.linspace(0.98, 1, num=5)
    dfs = []
    labels = [predict_sentences([str(anchor_example)])[0] for anchor_example in anchor_examples]
    pos_occurences, neg_occurences = get_occurences(anchor_examples, labels)
    exps = get_best(explanations)
    teta0 = calculate_teta0(anchor_examples)
    
    pos_exps = [exp for exp in exps if labels[exp.index]==0]
    neg_exps = [exp for exp in exps if labels[exp.index]==1]
    
    for alpha in alphas:
        df_list = []
        pos_scores, pos_counter = calculate_modified_formula(pos_exps, teta0, alpha)
        neg_scores, neg_counter = calculate_modified_formula(neg_exps, teta0, alpha)
        
        for anchor, score in pos_scores.items():
            pos_percent = round(pos_counter[anchor]/(pos_counter[anchor]+neg_counter[anchor]), 2)
            neg_percent = 1-pos_percent
            both = pos_counter[anchor]>0 and neg_counter[anchor]>0
            df_list.append([anchor, score , '+', pos_counter[anchor], pos_counter[anchor]+ neg_counter[anchor],pos_percent, neg_percent, both, pos_occurences[anchor]-pos_counter[anchor], neg_occurences[anchor]-neg_counter[anchor]]) 
            
        
        for anchor, score in neg_scores.items():
            pos_percent = round(pos_counter[anchor]/(pos_counter[anchor]+neg_counter[anchor]),2)
            neg_percent = 1-pos_percent
            both = pos_counter[anchor]>0 and neg_counter[anchor]>0
            df_list.append([anchor, score , '-', neg_counter[anchor], pos_counter[anchor]+ neg_counter[anchor],pos_percent, neg_percent, both, pos_occurences[anchor]-pos_counter[anchor], neg_occurences[anchor]-neg_counter[anchor]]) 

        df_list.sort(key=lambda exp: -exp[1])
        df = pd.DataFrame(data = df_list, columns = ['name', 'score', 'label', 'anchor occurences', 'total anchor occurences', '+%', '-%', 'both', 'normal +', 'normal -']).set_index('name')
        
        dfs.append(df)
        
    writer = pd.ExcelWriter(f'{dataset_name}/formalized_modified_scores.xlsx',engine='xlsxwriter') 
    
    workbook=writer.book
    worksheet=workbook.add_worksheet('Sheet1')
    writer.sheets['Sheet1'] = worksheet
    
    cur_col = 0
    
    for df, alpha in zip(dfs, alphas):
        worksheet.write(0, cur_col, alpha)
        df.to_excel(writer, sheet_name='Sheet1', startrow=1, startcol=cur_col)
        cur_col+=11

    writer.save()


In [131]:
calculate_modified_scores()

1087
 best: 1 :0.07386363636363637: 0.0830580462482303
-2.2392638036809824
min alpha:0.9169419537517697
 and: 1 :0.05154716415673969: 0.020995356387798106
-13.444805194805099
min alpha:0.9790046436122019
 best: 1 :0.07386363636363637: 0.0830580462482303
-2.2392638036809824
min alpha:0.9169419537517697
 and: 1 :0.05154716415673969: 0.020995356387798106
-13.444805194805099
min alpha:0.9790046436122019
 best: 1 :0.07386363636363637: 0.0830580462482303
-2.2392638036809824
min alpha:0.9169419537517697
 and: 1 :0.05154716415673969: 0.020995356387798106
-13.444805194805099
min alpha:0.9790046436122019
 best: 1 :0.07386363636363637: 0.0830580462482303
-2.2392638036809824
min alpha:0.9169419537517697
 and: 1 :0.05154716415673969: 0.020995356387798106
-13.444805194805099
min alpha:0.9790046436122019
 best: 1 :0.07386363636363637: 0.0830580462482303
-2.2392638036809824
min alpha:0.9169419537517697
 and: 1 :0.05154716415673969: 0.020995356387798106
-13.444805194805099
min alpha:0.9790046436122019


In [None]:
x=sum(teta1.values())/len(teta1.keys())

In [None]:
y=sum(teta0.values())/len(teta0.keys())
x/y

In [None]:
len(explanations)

In [None]:
explanations = [exp for exp in explanations if len(exp.fit_examples) > 10] 
explanations.sort(key=lambda exp: exp.test_precision)

In [None]:
filtered = [exp for exp in explanations if len(' '.join(exp.names))>=4]
best = filtered[-10:]
best.reverse()
for exp in best:
    print("------------------------")
    exp_label =  predict_sentences([str(anchor_examples[exp.index])])[0]
    print('Prediction:', explainer.class_names[exp_label])
    print('Anchor: %s' % (' AND '.join(exp.names)))
    print('Precision: %.2f' % exp.precision)
    print('Coverage: %.2f' % exp.coverage)
    print('Anchor test precision: %.2f' % exp.test_precision)
    print('Anchor test coverage: %.2f' % (exp.test_cov))
    covered_labels = counter_test_labels[exp.fit_examples]
    
    print('Anchor test REAL precision: %.2f' % exp.real_precision)
   

In [None]:
exps = explanations[len(explanations)//2:]
exps = [exp for exp in exps if len(exp.fit_examples)>10]
#trained model has the opposite label
real_precisions = [exp.real_precision for exp in exps]
test_precisions = [exp.test_precision for exp in exps]

In [None]:
plt.scatter(test_precisions, real_precisions, s = range(len(exps)), alpha = 0.5)
plt.xlabel('predicted precision')
plt.ylabel('label precision')
plt.title('LSTM')
plt.savefig("results/spam.png")

In [None]:
img = plt.imread("results/gru_on_counter.png")
plt.figure(figsize = (10,10))
plt.axis('off')
_ = plt.imshow(img)