# Import

In [None]:
import os
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from tensorflow.python.layers import core as core_layers
import tensorflow as tf
import numpy as np
from tensorflow.python.ops import array_ops
import time
import jieba

from Util import mybleu
from Util import myResidualCell
import random
import pickle
import matplotlib.pyplot as plt

def idx2str(s):
    return ' '.join([id2w[idx] for idx in s])

def str2idx(idx):
    idx = idx.strip()
    return [w2id[idxx] for idxx in idx.split()]

def pad(x, pid, move_go=False):
    if move_go:
        length_list = [len(k)-1 for k in x]
    else:
        length_list = [len(k) for k in x]
    max_length = max(length_list)
    pad_x = []
    for k in x:
        if move_go:
            pad_k = k[1:] + [pid,] * (max_length - len(k[1:]))
        else:
            pad_k = k + [pid,] * (max_length - len(k))
        pad_x.append(pad_k)
    return pad_x, length_list

def pad_maxlength(x, pid, move_go=False):
    if move_go:
        length_list = [len(k)-1 for k in x]
    else:
        length_list = [len(k) for k in x]
    max_length = 16
    pad_x = []
    for k in x:
        if move_go:
            pad_k = k[1:] + [pid,] * (max_length - len(k[1:]))
        else:
            pad_k = k[:max_length] + [pid,] * (max_length - len(k))
        pad_x.append(pad_k)
    return pad_x, length_list

import nltk
def word_overlap_edit(s1, s2):
    t1 = set(s1.split())
    t2 = set(s2.split())
    word_overlap = float(len(t1 & t2)) / len(t1 | t2)
    edit_distance = 1 - float(nltk.edit_distance(s1.split(), s2.split())) /  max(len(s1.split()), len(s2.split()))
    return word_overlap, edit_distance
tf.logging.set_verbosity(tf.logging.INFO)
sess_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))

# Data

In [None]:
w2id, id2w = pickle.load(open('/workspace/Data/yelp/w2id_id2w.pkl','rb'))
original, reference, original_noun, reference_noun = pickle.load(open('/workspace/Data/yelp/original_reference_and_noun.pkl','rb'))
C_original = [[1, 0] for i in range(500)] + [[0, 1] for i in range(500)]
assert len(original) == 1000

In [None]:
ppl_upper = 10000000

# Evaluation

## Init

In [None]:
from textBiLSTM import *

BATCH_SIZE = 256
NUM_EPOCH = 30
train_dir ='Model/YELP/TextBiLSTM-all/'
#MAX_LENGTH = 16
import pickle
w2id_all, id2w_all, X_indices_all, C_labels_all = pickle.load(open('/workspace/Data/yelp/w2id_id2w_indices_labels_all.pkl','rb'))



bilstm_dp = BiLSTM_DP(X_indices_all, C_labels_all, w2id_all,  BATCH_SIZE, n_epoch=NUM_EPOCH, test_data=None)

g_bilstm = tf.Graph()
sess_bilstm = tf.Session(graph=g_bilstm, config=sess_conf) 
with sess_bilstm.as_default():
    with sess_bilstm.graph.as_default():
        B = BiLSTM(
            dp = bilstm_dp,
            rnn_size = 512,
            n_layers = 1,
            encoder_embedding_dim = 256,
            cell_type = 'lstm',
            num_classes = 2,
            sess=sess_bilstm
        )
#B.restore('Model/YELP/TextBiLSTM-all/model-11')
B.restore('Model/YELP/TextBiLSTM-appendix/model-7')

In [None]:
import kenlm
lm = kenlm.Model('/workspace/Moses/YELP_lm/yelp.blm')

## Length

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'length_' in ff[:7] and '.pkl' in ff:
            file_name_list.append(ff)
            print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
    for s in str_list:
        str2ppl[s] = min(ppl_upper, lm.perplexity(s))
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[1000:]), np.mean(ppl_list[:1000]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    
    for i in range(1000, 2000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i-1000]):
            acc_list.append(1.)
        else:
            acc_list.append(0.)
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

### Increase

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2res[name] = dict()
    acc_list = name2acc[name]
    cnt_key = 0
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, length, cnt, maxit = o
            length_list.append(float(length) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1
                #print(s, keywords[0])
                
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
        
      
            
        
        #if '3stage' in name and is_choose_first and '0.9' in name and 'ItFalse' in name and '0.1' in name:
        #if is_choose_first and '0.1' in name:
        if True:
        #if 'BOW-04' in name and is_choose_first:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), np.mean(acc_list[1000:]), np.mean(ppl_list[1000:]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key / 10.))
    
    
    

### Decrease

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    if len(res) < 1000:
        continue
    elif len(res) > 1000:
        res = res[:1000]
    name2res[name] = dict()
    
    acc_list = name2acc[name]
    cnt_key = 0
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, length, cnt, maxit = o
            length_list.append(float(length) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), np.mean(acc_list[1000:]), np.mean(ppl_list[1000:]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key / 10.))
    
    
    

## Keywords

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'keywords_' in ff[:len('keywords_')] and '.pkl' in ff:
            file_name_list.append(ff)
            print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    assert len(res) == 1000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 1000
    for s in str_list:
        str2ppl[s] = min(ppl_upper, lm.perplexity(s))
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[500:]), np.mean(ppl_list[:500]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

### Original

In [None]:
replace_words = pickle.load(open('replace_words.pkl','rb'))
cnt = 0

for i in range(1000):
    keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
    if len(keywords) == 0:
        continue
    if keywords[0] in idx2str(original[i]):
        print(keywords[0], idx2str(original[i]))
        cnt += 1
print(cnt / float(len(original)) * 100)

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 1000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 1000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    cnt_key = 0
    res = res[:1000]
    name2res[name] = dict()
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, length, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                 
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[:1000]), np.mean(ppl_list[:1000]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/10.))
    
    
    

## Sentiment Length

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'sentiment_length_' in ff[:len('sentiment_length_')] and '.pkl' in ff:
            file_name_list.append(ff)
            print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
    for s in str_list:
        str2ppl[s] = min(ppl_upper, lm.perplexity(s))
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[1000:]), np.mean(ppl_list[:1000]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    
    for i in range(1000, 2000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i-1000]):
            acc_list.append(1.)
        else:
            acc_list.append(0.)
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

### IncreaseSentiment

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    cnt_key = 0
    res = res[:1000]
    name2res[name] = dict()
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, cnt, maxit = o
            length_list.append(float(length) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                 
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[:1000]), np.mean(ppl_list[:1000]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/10.))
    
    
    
        

### DecreaseSentiment

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    res = res[1000:]
    name2res[name] = dict()
    cnt_key = 0.
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, cnt, maxit = o
            length_list.append(float(length) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[1000:]), np.mean(ppl_list[1000:]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/ 10.))
    
    
    

## Sentiment Keywords

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'keywordssentiment_' in ff[:len('keywordssentiment_')] and '.pkl' in ff:
            file_name_list.append(ff)
            print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    assert len(res) == 1000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 1000
    for s in str_list:
        str2ppl[s] = np.min([3000, lm.perplexity(s)])
        
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[500:]), np.mean(ppl_list[:500]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 1000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 1000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    cnt_key = 0
    res = res[:1000]
    name2res[name] = dict()
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                 
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[:1000]), np.mean(ppl_list[:1000]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/10.))
    
    


## Keywords Length

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'keywordslength_' in ff[:len('keywordslength_')] and '.pkl' in ff:
            file_name_list.append(ff)
            print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
    for s in str_list:
        str2ppl[s] = np.min([3000, lm.perplexity(s)])
        
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[1000:]), np.mean(ppl_list[:1000]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    
    for i in range(1000, 2000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i-1000]):
            acc_list.append(1.)
        else:
            acc_list.append(0.)
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

### IncreaseSentiment

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    cnt_key = 0
    res = res[:1000]
    name2res[name] = dict()
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                 
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[:1000]), np.mean(ppl_list[:1000]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/10.))
    
    
    

### DecreaseSentiment

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    res = res[1000:]
    name2res[name] = dict()
    cnt_key = 0.
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[1000:]), np.mean(ppl_list[1000:]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/ 10.))
    
    
    

## Multi

In [None]:
import os
file_name_list = []
for r,t,f in os.walk('Results/Finegrained/'):
    for ff in f:
        if 'multi_' in ff[:len('multi_')] and '.pkl' in ff:
            res = pickle.load(open('Results/Finegrained/' + ff, 'rb'))
            if len(res) == 2000:
                file_name_list.append(ff)
                print(ff)
    break

In [None]:

name2ppl = dict()
str2ppl = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    name2ppl[name] = []   
    
    ppl_list = []
    str_list = []
    #print(name, len(res[0]))
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
    for s in str_list:
        str2ppl[s] = np.min([3000, lm.perplexity(s)])
        
        ppl_list.append(str2ppl[s])
    
    name2ppl[name] = (np.mean(ppl_list[1000:]), np.mean(ppl_list[:1000]), ppl_list) 
    print(name, np.mean(ppl_list))
pickle.dump(name2ppl, open('Results/Finegrained/metrics/name2ppl.pkl','wb'))

In [None]:
name2acc = dict()
for name in file_name_list:
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    acc_list = []
    str_list = []
    assert len(res) == 2000
    for i,t in enumerate(res):
        str_list.append(t[0])
        #print(t[0])
            
    assert len(str_list) == 2000
        
    idx_list = [[w2id_all[idx] for idx in s.split()] + [w2id_all['<EOS>']] for s in str_list]
    pad_x, length_list = pad(idx_list, w2id_all['<PAD>'],move_go=False)
    res_class = B.sess.run(B.predictions, {B.input_x: pad_x, 
                                           B.X_seq_len:length_list,
                                           B.output_keep_prob:1.0,
                                           B.input_keep_prob:1.0})
    acc_cnt = 0
    for i in range(1000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i]):
            acc_list.append(1.)
            
        else:
            acc_list.append(0.)
    
    
    for i in range(1000, 2000):
        #print(res_class[i], np.argmax(C_original[i]))
        if res_class[i] != np.argmax(C_original[i-1000]):
            acc_list.append(1.)
        else:
            acc_list.append(0.)
    name2acc[name] = acc_list 
    print(name, np.mean(acc_list))       
pickle.dump(name2acc, open('Results/Finegrained/metrics/name2acc.pkl','wb'))
        

### IncreaseMulti

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    cnt_key = 0
    res = res[:1000]
    name2res[name] = dict()
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, flag, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                 
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[:1000]), np.mean(ppl_list[:1000]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/10.))
    
    
   


### DecreaseMulti

In [None]:
name2ppl = pickle.load(open('Results/Finegrained/metrics/name2ppl.pkl','rb'))
name2acc = pickle.load(open('Results/Finegrained/metrics/name2acc.pkl','rb'))
name2res = dict()
name2content = dict()
for name in file_name_list:
    acc_list = name2acc[name]
    res = pickle.load(open('Results/Finegrained/' + name, 'rb'))
    
    res = res[1000:]
    name2res[name] = dict()
    cnt_key = 0.
    for is_choose_first in [True]:
        length_list = []
        max_it_list = []
        content_list = []
        content_pc_list = []
        succ_list = []
        word_overlap_list = []
        edit_distance_list = []
        if is_choose_first:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]
            #acc, acc_list = name2acc[name]
        else:
            ppl_neg, ppl_pos, ppl_list = name2ppl[name]['last']
            #acc, acc_list = name2acc[name]['last']
        for i,o in enumerate(res):
            s, auc, length, flag, cnt, maxit = o
            length_list.append(float(len(s.split())) / len(original[i]))
            str_list.append(s)
            max_it_list.append(maxit)
            word_overlap, edit_distance = word_overlap_edit(s, idx2str(original[i]))
            word_overlap_list.append(word_overlap)
            edit_distance_list.append(edit_distance)
            keywords = [replace_words[t[0]] for t in original_noun[i]][:1]
            if len(keywords) == 0:
                continue
            if keywords[0] in s:
                cnt_key += 1                                                
            if cnt > 0:
                content = 1.
                content_list.append(content)
            else:
                content = 0.
                content_list.append(content)
            if len(original_noun[i]) == 0:
                content_pc = 1.
                content = 1.
            else:
                content_pc = float(cnt) / len(original_noun[i])
            content_pc_list.append(content_pc)
            
    
        if True:
            name2content[name] = np.mean(content_list) * 100
            print('%s & %.1f & %.1f & %.1f & %.1f & %.1f & %.1f\\\\' % (name.replace('_','-'), 100.* np.mean(acc_list[1000:]), np.mean(ppl_list[1000:]), np.mean(word_overlap_list) * 100, np.mean(content_pc_list) * 100, np.mean(length_list) * 100, cnt_key/ 10.))
    
    
    
      