In [1]:
import torch 
import config
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import pandas as pd

from data_utils import *
from model import *


In [2]:
def match(outputs, labels):
    results = []
    for predictions,targets in zip(outputs,labels):
        review_match = True
        for i in range( len( predictions ) ):
            if targets[i] == config.PAD: # special padding value, ignore this
                continue

            elif targets[i] == config.bio_dict['B']: # B tag seen
                matched = True
                begin = i
                while   i < len( predictions ) and targets[i] != config.bio_dict['O'] and targets[i] != config.PAD and \
                        not ( i > begin and targets[i] == config.bio_dict['B'] ): # B tag not seen again
                    
                    if targets[i] == predictions[i]:
                        i += 1
                    elif targets[i] != predictions[i]:
                        matched= False
                        break
                
                review_match = review_match and matched
        results.append( int(review_match) )

    return pd.DataFrame({'predictions': results})

def get_correct_indices(model,loader):
    with torch.no_grad():
        model.eval()
        print('evaluating')
        for _,batch in enumerate( loader ):
            batch = { k : v.to( config.device ) for k,v in batch.items()  }

            targets = batch[ 'targets' ]
            targets = pack_padded_sequence( targets, batch['original_review_length'], batch_first= True, enforce_sorted= False )
            targets,_ = pad_packed_sequence( targets, batch_first= True, padding_value= config.PAD )
            
            mask = ( targets < config.PAD )

            batch['targets'] = targets * mask.long()

            mask = mask.unsqueeze(2).float()
            if config.use_crf:
                _, outputs = model( batch, mask= mask, get_predictions= True ) 
            else:
                outputs = model( batch, mask= mask)
                outputs = torch.argmax( outputs, dim= 1 )
        
        results = match(outputs, targets)
        return results


In [3]:
vocab = Vocab.from_files( [config.dataset_path, config.test_dataset_path], store= config.mapping_file )
train_dataset = ReviewDataset(config.dataset_path, preprocessed= False, vocab= vocab)
test_dataset = ReviewDataset(config.test_dataset_path, preprocessed= False, vocab= vocab)
dataset = ConcatDataset([train_dataset,test_dataset])
loader = DataLoader( dataset, batch_size= len( dataset ), shuffle= False, num_workers= config.num_dataset_workers)


./datasets/Restaurants_Train.xml
Number of no-aspect sentences =  1021
Number of total reviews =  3044
./datasets/Restaurants_Test.xml
Number of no-aspect sentences =  194
Number of total reviews =  800
pre-existing mapping file
Number of no-aspect sentences =  1021
Number of total reviews =  3044
vocab gen
vocab gen complete
review processing
review processing complete
Number of no-aspect sentences =  194
Number of total reviews =  800
vocab gen
vocab gen complete
review processing
review processing complete


In [4]:
f_model = FusionAttentionAspectExtractionV2( vocab, embedding_path= config.word_embedding_path, use_crf= config.use_crf )
f_model.load_state_dict(torch.load('./model_weights/'+config.dataset+'_fusionv2_'+config.embedding+'.pt')) 
f_model = f_model.to(config.device)
print(f_model)

a_model = AttentionAspectExtraction( vocab, embedding_path= config.word_embedding_path, use_crf= config.use_crf )
a_model.load_state_dict(torch.load('./model_weights/'+config.dataset+'_attention_lstm_'+config.embedding+'.pt')) 
a_model = a_model.to(config.device)
print(a_model)

g_model = GlobalAttentionAspectExtraction( vocab, embedding_path= config.word_embedding_path, use_crf= config.use_crf )
g_model.load_state_dict(torch.load('./model_weights/'+config.dataset+'_global_attention_lstm_'+config.embedding+'.pt')) 
g_model = a_model.to(config.device)
print(a_model)

b_model = BaseLineLSTM( vocab, embedding_path= config.word_embedding_path, use_crf= config.use_crf )
b_model.load_state_dict(torch.load('./model_weights/'+config.dataset+'_lstm_'+config.embedding+'.pt')) 
b_model = b_model.to(config.device)
print(b_model)

pos_dim  -1
number of tokens:  5120  embedding dimensions:  200
embedding matrix shape:  (5120, 200)
retrieved embedding weights from existing file
FusionAttentionAspectExtractionV2(
  (embedding): Embedding(5120, 200)
  (encoder): LSTM(200, 50, num_layers=2, dropout=0.3, bidirectional=True)
  (w_a): Linear(in_features=100, out_features=100, bias=True)
  (w_f): Linear(in_features=200, out_features=100, bias=False)
  (w_r): Linear(in_features=100, out_features=3, bias=False)
  (crf): CRF(num_tags=3)
)
pos_dim  -1
number of tokens:  5120  embedding dimensions:  200
embedding matrix shape:  (5120, 200)
retrieved embedding weights from existing file
AttentionAspectExtraction(
  (embedding): Embedding(5120, 200)
  (encoder): LSTM(200, 50, num_layers=2, dropout=0.3, bidirectional=True)
  (w_r): Linear(in_features=100, out_features=3, bias=True)
  (crf): CRF(num_tags=3)
)
pos_dim  -1
number of tokens:  5120  embedding dimensions:  200
embedding matrix shape:  (5120, 200)
retrieved embedding w

In [5]:
# f_results = get_correct_indices(f_model, loader)
# a_results = get_correct_indices(a_model, loader)
# b_results = get_correct_indices(b_model, loader)
g_results = get_correct_indices(g_model, loader)

# a_correct = a_results & ~b_results 
# f_correct = f_results & ~a_results & ~b_results

# f_results.to_csv('./results/'+ config.dataset +'_fusionv2_predictions.csv', index= False)
# a_results.to_csv('./results/'+ config.dataset +'_attention_lstm_predictions.csv', index= False)
# b_results.to_csv('./results/'+ config.dataset +'_bilstm_predictions.csv', index= False)
g_results.to_csv('./results/'+ config.dataset +'_global_attention_lstm_predictions.csv', index= False)

# f_correct.to_csv('./results/'+ config.dataset +'_fusionv2_correct_predictions.csv', index= False)
# a_correct.to_csv('./results/'+ config.dataset +'_attention_lstm_correct_predictions.csv', index= False)

# print(f_results)
# print(a_results)
# print(b_results)
# print(a_correct)
# print(f_correct)

evaluating


In [6]:
########## load pre generated results ############
f_results = pd.read_csv('./results/'+ config.dataset +'_fusionv2_predictions.csv')
a_results = pd.read_csv('./results/'+ config.dataset +'_attention_lstm_predictions.csv')
b_results = pd.read_csv('./results/'+ config.dataset +'_bilstm_predictions.csv')
g_results = pd.read_csv('./results/'+ config.dataset +'_global_attention_lstm_predictions.csv')


In [7]:

f_fail = np.squeeze(1 - f_results.values)
g_fail = np.squeeze(1 - g_results.values)

f_fail = np.squeeze(np.argwhere(f_fail == 1)).tolist()
g_fail = np.squeeze(np.argwhere(g_fail == 1)).tolist()

# print(f_fail)
# print(g_fail)

dataset = train_dataset.get_review_list() + test_dataset.get_review_list()
print(len(dataset))
f_reviews = [ dataset[i] for i in list(f_fail) ]
g_reviews = [ dataset[i] for i in list(g_fail) ]



with open('./results/'+ config.dataset +'_fusion_fail_sentences.txt','w') as f:
    
    with torch.no_grad():
        f_model.eval()
        print('generating scores')
        for _,batch in enumerate( loader ):
            batch = { k : v.to( config.device ) for k,v in batch.items()  }

            targets = batch[ 'targets' ]
            targets = pack_padded_sequence( targets, batch['original_review_length'], batch_first= True, enforce_sorted= False )
            targets,_ = pad_packed_sequence( targets, batch_first= True, padding_value= config.PAD )
            
            mask = ( targets < config.PAD )

            batch['targets'] = targets * mask.long()

            mask = mask.unsqueeze(2).float()
            if config.use_crf:
                _, outputs, global_context, beta = f_model( batch, mask= mask, get_predictions= True, yield_attention_weights= True ) 
            else:
                outputs, global_context, beta = f_model( batch, mask= mask, yield_attention_weights= True)
                outputs = torch.argmax( outputs, dim= 1 )
        
        betas = torch.stack([ beta[ i , :, : ] for i in f_fail ])
        global_scores = torch.stack([ global_context[ i ] for i in f_fail ])
        print(global_scores.shape)
#         for i,g in enumerate(global_scores):
#             print(i,g)
#         input()
        torch.save(betas,'./model_weights/'+config.dataset+'_fusion_fail_beta_scores.pt')
        torch.save(global_scores,'./model_weights/'+config.dataset+'_fusion_fail_global_context_scores.pt')

        for review, global_score, beta in zip(f_reviews, global_scores, betas):
            if len(review.aspect_terms) > 1 and max([len(aspect) for aspect in review.aspect_terms ]) > 1:
                f.write(review.text +' ####### '+ str(review.aspect_terms) + '\n')

with open('./results/'+ config.dataset +'_global_attention_fail_sentences.txt','w') as f:
    
    with torch.no_grad():
        g_model.eval()
        print('evaluating')
        for _,batch in enumerate( loader ):
            batch = { k : v.to( config.device ) for k,v in batch.items()  }

            targets = batch[ 'targets' ]
            targets = pack_padded_sequence( targets, batch['original_review_length'], batch_first= True, enforce_sorted= False )
            targets,_ = pad_packed_sequence( targets, batch_first= True, padding_value= config.PAD )
            
            mask = ( targets < config.PAD )

            batch['targets'] = targets * mask.long()

            mask = mask.unsqueeze(2).float()
            if config.use_crf:
                _, outputs, beta = g_model( batch, mask= mask, get_predictions= True, yield_attention_weights= True ) 
            else:
                outputs, beta = g_model( batch, mask= mask, yield_attention_weights= True)
                outputs = torch.argmax( outputs, dim= 1 )
        
        betas = torch.stack([ beta[ i , :, : ] for i in g_fail ])
        torch.save(betas,'./model_weights/'+config.dataset+'_global_attention_lstm_fail_beta_scores.pt')
        print(betas.shape)


    for review, beta in zip(g_reviews, betas):
        if len(review.aspect_terms) > 1 and max([len(aspect) for aspect in review.aspect_terms ]) > 1:
            f.write(review.text + '########' + str(review.aspect_terms) + '\n')   




3844
generating scores
torch.Size([364, 1, 79])
evaluating
torch.Size([218, 79, 79])
