In [3]:
import os
import numpy as np
from matplotlib import pyplot as plt

In [4]:
metrics = {'mse':0, 'auc':1, 'll':2, 'acc':3}

In [5]:
datasets = ['amazon', 'peerread', 'yelp', 'amazon_binary', 'samesex', 'immigration', 'deathpenalty', 'guncontrol']
out = '../out/'
num_folds = 5
exp_results = {data:(np.zeros((num_folds,len(metrics)))) for data in datasets}

for data in datasets:
    for e_idx in range(num_folds):
        resfile= out + data + '/' + 'bert.result.split'+str(e_idx)
        results = np.load(resfile + '.npy')
        exp_results[data][e_idx]=results

In [6]:
for data in datasets:
    results = exp_results[data].mean(axis=0)
    print('*'*60)
    print("Data:", data)
    print("MSE:", results[metrics['mse']], 
          "AUC:", results[metrics['auc']], 
          'Log Loss:', results[metrics['ll']],
         'Accuracy:', results[metrics['acc']])

************************************************************
Data: amazon
MSE: 0.840019966449062 AUC: 0.0 Log Loss: 0.0 Accuracy: 0.0
************************************************************
Data: peerread
MSE: 0.0 AUC: 0.7834071504543612 Log Loss: 0.8645180614692254 Accuracy: 0.7711830371556749
************************************************************
Data: yelp
MSE: 0.0 AUC: 0.9395041096915315 Log Loss: 0.6185047147259519 Accuracy: 0.8608589105173365
************************************************************
Data: amazon_binary
MSE: 0.0 AUC: 0.8992026231883161 Log Loss: 0.35729242095703717 Accuracy: 0.9202113374157019
************************************************************
Data: samesex
MSE: 0.0 AUC: 0.7746252196082237 Log Loss: 0.8034131664206512 Accuracy: 0.7637142092733733
************************************************************
Data: immigration
MSE: 0.0 AUC: 0.7325451811459354 Log Loss: 0.9730091642836497 Accuracy: 0.6848482333950123
***************************

In [23]:
data='semantic_scholar'
for model in models + ['stm']:
    c_nmpi, c_mse, c_perp, c_shuf = get_best_results_regression(exp_results, data, model)
    results = exp_results[data][model][c_mse].mean(axis=0)
    print("*"*20)
    print("Model:", model)
    print("MSE:", results[metrics['mse']], 
          "NPMI:", results[metrics['npmi']], 
          'Perplexity:', results[metrics['perp']],
         'Shuffle loss:', results[metrics['shuffle']])

********************
Model: regression
MSE: 0.842719143940325 NPMI: 0.08802336257920561 Perplexity: 1.069391942024231 Shuffle loss: 0.8718215882277832
********************
Model: adjusted_hstm
MSE: 0.8441661794251383 NPMI: 0.2194682755291649 Perplexity: 13034.4458984375 Shuffle loss: 0.8565664652441323
********************
Model: hstm
MSE: 0.856097225456289 NPMI: 0.22373644734323245 Perplexity: 26427.9658203125 Shuffle loss: 0.878407582125076
********************
Model: stm
MSE: 0.9075255576335481 NPMI: 0.22209302594656663 Perplexity: 16627.2216796875 Shuffle loss: 0.0


In [21]:
data='peerread'
for model in models + ['stm']:
    c_nmpi, c_mse, c_perp, c_shuf = get_best_results_classification(exp_results, data, model)
    results = exp_results[data][model][c_mse].mean(axis=0)
    print("*"*20)
    print("Model:", model)
    print("Accuracy:", results[metrics['acc']],
          "AUC:", results[metrics['auc']],
          "Log loss:", results[metrics['ll']],
          "NPMI:", results[metrics['npmi']], 
          'Perplexity:', results[metrics['perp']],
         'Shuffle loss:', results[metrics['shuffle']])

********************
Model: regression
Accuracy: 0.806287170773152 AUC: 0.8374643129753979 Log loss: 0.4105547804418855 NPMI: 0.08040208822064276 Perplexity: 1.064802598953247 Shuffle loss: 0.44216430214943714
********************
Model: adjusted_hstm
Accuracy: 0.8115548003398472 AUC: 0.8508820657229574 Log loss: 0.3988711813168827 NPMI: 0.18236127725837784 Perplexity: 1668.764306640625 Shuffle loss: 0.4369654746416235
********************
Model: hstm
Accuracy: 0.8083262531860662 AUC: 0.8502379923197507 Log loss: 0.4060067901010376 NPMI: 0.18430088389050353 Perplexity: 1540.1703369140625 Shuffle loss: 0.47077346908180207
********************
Model: stm
Accuracy: 0.772302463891249 AUC: 0.7946228688378122 Log loss: 0.4887632010322611 NPMI: 0.17903039171601923 Perplexity: 1876.7062744140626 Shuffle loss: 0.0


In [22]:
data='yelp'
for model in models + ['stm']:
    c_nmpi, c_mse, c_perp, c_shuf = get_best_results_classification(exp_results, data, model)
    results = exp_results[data][model][c_mse].mean(axis=0)
    print("*"*20)
    print("Model:", model)
    print("Accuracy:", results[metrics['acc']],
          "AUC:", results[metrics['auc']],
          "Log loss:", results[metrics['ll']],
          "NPMI:", results[metrics['npmi']], 
          'Perplexity:', results[metrics['perp']],
         'Shuffle loss:', results[metrics['shuffle']])

********************
Model: regression
Accuracy: 0.8885327991987981 AUC: 0.9541655494520274 Log loss: 0.2782380657428568 NPMI: 0.05703744862791323 Perplexity: 1.0594762325286866 Shuffle loss: 0.3738556071740504
********************
Model: adjusted_hstm
Accuracy: 0.9089634451677517 AUC: 0.966676404317566 Log loss: nan NPMI: 0.16815321536481825 Perplexity: 200.6930908203125 Shuffle loss: 0.3828965427787798
********************
Model: hstm
Accuracy: 0.9080620931397096 AUC: 0.9647144466001647 Log loss: 0.26879522844143244 NPMI: 0.1952095178689265 Perplexity: 182.2464141845703 Shuffle loss: 0.4694512868754023
********************
Model: stm
Accuracy: 0.8126189283925889 AUC: 0.8857453251100165 Log loss: 0.5033352986976138 NPMI: 0.17158606143018001 Perplexity: 175.9210174560547 Shuffle loss: 0.0
