In [1]:
import sys
sys.path.append('..')
import pandas as pd
from adat.models.classification_model import LogisticRegressionOnTfIdf
from sklearn.metrics import roc_auc_score, f1_score

In [2]:
train_data = pd.read_csv('../data/kaggle_transactions_data/train.csv')
test_data = pd.read_csv('../data/kaggle_transactions_data/test.csv')

In [3]:
train_x = train_data.transactions.values
train_y = train_data.label.values

test_x = test_data.transactions.values
test_y = test_data.label.values

In [4]:
model = LogisticRegressionOnTfIdf()

In [5]:
model.fit(train_x, train_y)



<adat.models.classification_model.LogisticRegressionOnTfIdf at 0x7f34b45fe278>

In [6]:
probs = model.predict(test_x)

In [7]:
auc = roc_auc_score(y_true=test_y, y_score=probs[:, 1])
print(f'ROC AUC = {auc}')

ROC AUC = 0.7425382836750724


In [8]:
acc = (test_y == probs.argmax(axis=1)).mean()

print(f'Accuracy = {acc}')

Accuracy = 0.6852959808545084


## Naive

In [9]:
import numpy as np

In [10]:
naive_auc = 0.5
print(f'Naive ROC AUC = {naive_auc}')

Naive ROC AUC = 0.5


In [11]:
naive_acc = (test_y == np.zeros(test_y.shape[0])).mean()

print(f'Naive Accuracy = {naive_acc}')

Naive Accuracy = 0.554656155416344


# Adversarial MCMC sampler vs Random sampler

In [77]:
# path to MCMC output
output = pd.read_csv('../results_2/results.csv')
generated = output.generated_sequence.values
original = output.original.values

output_rand = pd.read_csv('../results_random/results.csv')
generated_rand = output_rand.generated_sequence.values
original_rand = output_rand.original.values

In [78]:
output.shape, output_rand.shape

((10148, 11), (14207, 11))

In [82]:
output_rand.head()

Unnamed: 0,generated_sequence,prob,bleu,prob_diff,prob_drop,bleu_diff,bleu_drop,acceptance_probability,seq_len,original,wer
0,id_142 id_42 id_276 id_52 id_148 id_116 id_30 ...,0.649739,0.820777,-0.165471,1.254673,0.0,1.0,0.0,14,id_142 id_42 id_276 id_52 id_148 id_116 id_30 ...,2
1,id_23 id_23 id_18 id_26 id_22 id_22 id_35 id_19,0.395504,1.0,0.0,1.0,0.0,1.0,0.0,8,id_23 id_23 id_18 id_26 id_22 id_22 id_35 id_19,0
2,id_20 id_1 id_1 id_119 id_43 id_382 id_141 id_...,0.599958,0.866025,-0.095937,1.159907,0.0,1.0,0.0,11,id_20 id_1 id_1 id_119 id_43 id_1337 id_141 id...,1
3,id_1 id_12 id_1 id_2 id_1 id_93 id_3 id_46 id_...,0.454211,1.0,0.0,1.0,0.0,1.0,0.0,13,id_1 id_12 id_1 id_2 id_1 id_93 id_3 id_46 id_...,0
4,id_1 id_8 id_56 id_56 id_30 id_14 id_349 id_8 ...,0.512733,1.0,0.0,1.0,0.0,1.0,0.0,14,id_1 id_8 id_56 id_56 id_30 id_14 id_349 id_8 ...,0


In [79]:
def get_results(output, test_y, generated, original):
    num_generated_so_far = generated.shape[0]

    adversarial_probs = model.predict(generated)

    print(f'Average WER = {output.wer.mean()}\nAverage BLEU = {output.bleu.mean()}\n')
    print(f'Naive ROC AUC = {naive_auc}\nNaive Accuracy = {naive_acc}\n')

    non_adversarial_probs = model.predict(original)
    adv_auc = roc_auc_score(y_true=test_y[:num_generated_so_far], y_score=non_adversarial_probs[:, 1])
    print(f'Non-Adversarial ROC AUC = {adv_auc}')
    adv_acc = (test_y[:num_generated_so_far] == non_adversarial_probs.argmax(axis=1)).mean()
    print(f'Non-Adversarial Accuracy = {adv_acc}\n')

    adv_auc = roc_auc_score(y_true=test_y[:num_generated_so_far], y_score=adversarial_probs[:, 1])
    print(f'Adversarial ROC AUC = {adv_auc}')
    adv_acc = (test_y[:num_generated_so_far] == adversarial_probs.argmax(axis=1)).mean()
    print(f'Adversarial Accuracy = {adv_acc}')

In [80]:
print('>>> MCMC\n')
get_results(output, test_y, generated, original)

>>> MCMC

Average WER = 1.7852778872684272
Average BLEU = 0.822481190107604

Naive ROC AUC = 0.5
Naive Accuracy = 0.554656155416344

Non-Adversarial ROC AUC = 0.7453129418126201
Non-Adversarial Accuracy = 0.6877217185652346

Adversarial ROC AUC = 0.616949528408533
Adversarial Accuracy = 0.5910524241229799


In [81]:
print('>>> Random\n')
get_results(output_rand, test_y, generated_rand, original_rand)

>>> Random

Average WER = 1.4975012317871472
Average BLEU = 0.8514050227449653

Naive ROC AUC = 0.5
Naive Accuracy = 0.554656155416344

Non-Adversarial ROC AUC = 0.7425382836750724
Non-Adversarial Accuracy = 0.6852959808545084

Adversarial ROC AUC = 0.6678953766750988
Adversarial Accuracy = 0.6265221369747308
