In [1]:
import sys
sys.path.append('..')
import pandas as pd
from adat.models.classification_model import LogisticRegressionOnTfIdf
from sklearn.metrics import roc_auc_score, f1_score

In [2]:
train_data = pd.read_csv('../data/kaggle_transactions_data/train.csv')
test_data = pd.read_csv('../data/kaggle_transactions_data/test.csv')

In [3]:
train_x = train_data.transactions.values
train_y = train_data.label.values

test_x = test_data.transactions.values
test_y = test_data.label.values

In [4]:
model = LogisticRegressionOnTfIdf()

In [5]:
model.fit(train_x, train_y)



<adat.models.classification_model.LogisticRegressionOnTfIdf at 0x7f34b45fe278>

In [6]:
probs = model.predict(test_x)

In [7]:
auc = roc_auc_score(y_true=test_y, y_score=probs[:, 1])
print(f'ROC AUC = {auc}')

ROC AUC = 0.7425382836750724


In [8]:
acc = (test_y == probs.argmax(axis=1)).mean()

print(f'Accuracy = {acc}')

Accuracy = 0.6852959808545084


## Naive

In [9]:
import numpy as np

In [10]:
naive_auc = 0.5
print(f'Naive ROC AUC = {naive_auc}')

Naive ROC AUC = 0.5


In [11]:
naive_acc = (test_y == np.zeros(test_y.shape[0])).mean()

print(f'Naive Accuracy = {naive_acc}')

Naive Accuracy = 0.554656155416344


# Adversarial MCMC

In [41]:
# path to MCMC output
output = pd.read_csv('../results_2/results.csv')
generated = output.generated_sequence.values
original = output.original.values

output_rand = pd.read_csv('../results_random/results.csv')
generated_rand = output_rand.generated_sequence.values
original_rand = output_rand.original.values

In [42]:
# output.head()

In [43]:
def get_results(output, test_y, generated, original):
    num_generated_so_far = generated.shape[0]

    adversarial_probs = model.predict(generated)

    print(f'Average WER = {output.wer.mean()}\nAverage BLEU = {output.bleu.mean()}\n')
    print(f'Naive ROC AUC = {naive_auc}\nNaive Accuracy = {naive_acc}\n')

    non_adversarial_probs = model.predict(original)
    adv_auc = roc_auc_score(y_true=test_y[:num_generated_so_far], y_score=non_adversarial_probs[:, 1])
    print(f'Non-Adversarial ROC AUC = {adv_auc}')
    adv_acc = (test_y[:num_generated_so_far] == non_adversarial_probs.argmax(axis=1)).mean()
    print(f'Non-Adversarial Accuracy = {adv_acc}\n')

    adv_auc = roc_auc_score(y_true=test_y[:num_generated_so_far], y_score=adversarial_probs[:, 1])
    print(f'Adversarial ROC AUC = {adv_auc}')
    adv_acc = (test_y[:num_generated_so_far] == adversarial_probs.argmax(axis=1)).mean()
    print(f'Adversarial Accuracy = {adv_acc}')

In [44]:
print('>>> MCMC\n')
get_results(output, test_y, generated, original)

>>> MCMC

Average WER = 1.7441586491262517
Average BLEU = 0.8246803415438343

Naive ROC AUC = 0.5
Naive Accuracy = 0.554656155416344

Non-Adversarial ROC AUC = 0.7458822371923975
Non-Adversarial Accuracy = 0.6923227959945023

Adversarial ROC AUC = 0.6180508500027218
Adversarial Accuracy = 0.5941488317298252


In [45]:
print('>>> Random\n')
get_results(output_rand, test_y, generated_rand, original_rand)

>>> Random

Average WER = 1.519806763285024
Average BLEU = 0.8501234858180245

Naive ROC AUC = 0.5
Naive Accuracy = 0.554656155416344

Non-Adversarial ROC AUC = 0.7460974892957952
Non-Adversarial Accuracy = 0.6908212560386473

Adversarial ROC AUC = 0.6669048003787461
Adversarial Accuracy = 0.6260869565217392
