In [None]:
from typing import Union, List, Dict, Any, Sequence
from itertools import chain
import json
import matplotlib.pyplot as plt
import typer
import pandas as pd
import numpy as np
from typing import List, Optional, Dict, Union
from dataclasses_json import dataclass_json
import torch
from allennlp.data import TextFieldTensors

from dataclasses import dataclass
from dataclasses_json import dataclass_json


START_TOKEN = "<START>"
END_TOKEN = "<END>"
MASK_TOKEN = "@@MASK@@"


@dataclass_json
@dataclass
class TransactionsData:
    transactions: List[int]
    amounts: List[float]
    label: int
    client_id: Optional[int] = None


ModelsInput = Dict[str, Union[TextFieldTensors, torch.Tensor]]
import torch
import pickle
import jsonlines
import numpy as np
from allennlp.data import Batch
from allennlp.nn.util import move_to_device
from allennlp.data.dataset_readers import DatasetReader
from allennlp.data.vocabulary import Vocabulary
from sklearn.preprocessing import KBinsDiscretizer



def data_to_tensors(
    data: TransactionsData, reader: DatasetReader, vocab: Vocabulary, device: Union[torch.device, int] = -1
) -> ModelsInput:

    instances = Batch([reader.text_to_instance(**data.to_dict())])

    instances.index_instances(vocab)
    inputs = instances.as_tensor_dict()
    return move_to_device(inputs, device)


def decode_indexes(
    indexes: torch.Tensor, vocab: Vocabulary, namespace="transactions", drop_start_end: bool = True,
) -> List[str]:
    out = [vocab.get_token_from_index(idx.item(), namespace=namespace) for idx in indexes]

    if drop_start_end:
        return out[1:-1]

    return out


def load_jsonlines(path: str) -> List[Dict[str, Any]]:
    data = []
    with jsonlines.open(path, "r") as reader:
        for items in reader:
            data.append(items)
    return data


def write_jsonlines(data: Sequence[Dict[str, Any]], path: str) -> None:
    with jsonlines.open(path, "w") as writer:
        for ex in data:
            writer.write(ex)


def generate_transaction_amounts(total_amount: float, num_transactions: int, alpha: float = 1.0) -> List[float]:
    assert total_amount > 0
    values = np.random.dirichlet(np.ones(num_transactions) * alpha, size=1) * total_amount
    values = values.tolist()[0]
    return values


def load_discretizer(discretizer_path: str) -> KBinsDiscretizer:
    with open(discretizer_path, "rb") as f:
        discretizer: KBinsDiscretizer = pickle.load(f)
        assert discretizer.encode == "ordinal"

    return discretizer


def transform_amounts(amounts: List[float], discretizer: KBinsDiscretizer) -> List[str]:
    amounts = discretizer.transform([[x] for x in amounts])
    # unpack and covert float -> int -> str
    amounts = list(map(str, (map(int, chain(*amounts)))))
    return amounts


In [None]:
!pip3 install dataclasses-json

In [1]:
from matplotlib.ticker import MultipleLocator
def plot_statistics_concat(path, name: None ):
    output = load_jsonlines(path)
    output = pd.DataFrame(output).drop(columns="history")
    y_true = [output["data"][i]["transactions"] for i in range(len(output))]
    y_adv = [output["adversarial_data"][i]["transactions"] for i in range(len(output))]
    a_true = [output["data"][i]["amounts"] for i in range(len(output))]
    a_adv = [output["adversarial_data"][i]["amounts"] for i in range(len(output))]
    y_add_1 = [int(y_adv[i][len(y_adv[i])-1]) for  i in range(len(output))]
    y_add_2 = [int(y_adv[i][len(y_adv[i])-2]) for i in range(len(output))]
    a_add_1 = [a_adv[i][len(a_adv[i])-1] for  i in range(len(output))]
    a_add_2 = [a_adv[i][len(a_adv[i])-2] for i in range(len(output))]
    print(y_add_1, '\n')
    plt.figure(figsize=(400,100))
    plt.suptitle('Distribution of added tokens by number: '+name, fontsize=320)
    fig = plt.subplot(1, 4, 1)
    fig.hist(sorted(y_add_1), 20)
    fig.get_xaxis().set_major_locator(MultipleLocator(20))
    fig.set_title('transaction_add1', fontsize = 200)
    fig.set_xlabel('added tokens', fontsize = 160)
    fig.tick_params(axis='x', which='major', labelsize=120)
    fig.tick_params(axis='y', which='major', labelsize=120)
# second subplot
    fig = plt.subplot(1, 4, 2)
    fig.hist(sorted(y_add_2), 5)
    fig.get_xaxis().set_major_locator(MultipleLocator(20))
    fig.set_title('transaction_add2', fontsize = 200)
    fig.set_xlabel('added tokens', fontsize = 160)
    fig.tick_params(axis='x', which='major', labelsize=120)
    fig.tick_params(axis='y', which='major', labelsize=120)
# third subplot
    plt.subplot(1, 4, 3)
    plt.hist(sorted(a_add_1), 20)
    plt.title('amounts_add1', fontsize = 200)
    plt.xlabel('added tokens', fontsize = 160)
    plt.tick_params(axis='y', which='major', labelsize=120)
    plt.tick_params(axis='x', which='major', labelsize=120)
# fourth subplot
    plt.subplot(1, 4, 4)
    plt.hist(sorted(a_add_2),20)
    plt.title('amounts_add2', fontsize = 200)
    plt.xlabel('added tokens', fontsize = 160)
    plt.tick_params(axis='both', which='major', labelsize=120)
    plt.savefig('Distribution_of_added_tokens_by_number_' +name+'.png')
    plt.figure(figsize=(400,100))
    plt.suptitle('Distribution of added tokens: '+ name, fontsize=320)
    fig = plt.subplot(1, 2, 1)
    fig.hist(sorted((y_add_1+y_add_2)), 180)
    fig.set_title('transaction_ins', fontsize = 400)
    fig.get_xaxis().set_major_locator(MultipleLocator(5))
    fig.set_xlabel('added tokens', fontsize = 320)
    fig.tick_params(axis='x', which='major', labelsize=240)
    fig.tick_params(axis='y', which='major', labelsize=240)
# second subplot
    plt.subplot(1, 2, 2)
    plt.hist(sorted(a_add_1+a_add_2), 20)
    plt.title('amounts_ins', fontsize = 400)
    plt.xlabel('added tokens', fontsize = 320)
    plt.tick_params(axis='x', which='major', labelsize=240)
    plt.tick_params(axis='y', which='major', labelsize=240)
    plt.savefig('Distribution_of_added_tokens_'+name+'.png')
    #plt.show()
    diversity = [(len(list(dict.fromkeys(y_add_1)))/len(y_add_1)+ len(list(dict.fromkeys(y_add_2)))/len(y_add_2))/2,len(list(dict.fromkeys(y_add_1+y_add_2)))/(len(y_add_1+y_add_2)),(len(list(dict.fromkeys(a_add_1)))/len(a_add_1)+len(list(dict.fromkeys(a_add_2)))/len(a_add_2))/2, len(list(dict.fromkeys(a_add_1+a_add_2)))/(len(a_add_1+a_add_2)) ]
    return diversity

In [None]:
diversity = plot_statistics_concat('gender_results_gru_con_sf/output.json', 'Concate_sf_gru_gender')

In [None]:
# def plot_statistics_nonconcat1(path, name: None ):
    output = load_jsonlines(path)
    output = pd.DataFrame(output).drop(columns="history")
    y_true = [output["data"][i]["transactions"] for i in range(len(output))]
    y_adv = [output["adversarial_data_target"][i]["transactions"] for i in range(len(output))]
    a_true = [output["data"][i]["amounts"] for i in range(len(output))]
    a_adv = [output["adversarial_data_target"][i]["amounts"] for i in range(len(output))]
    y_ins = []
    y_num_ins = []
    y_old = []
    a_old = []
    for i in range (len(output)):
        k = len(y_ins)
        #print((len(y_adv[i])))
        for t in range (len(y_adv[i])):
            #if (len(y_ins) == k+2):
                #break
            #print (len(y_adv[i]))
            if (int(y_adv[i][t]) != int(y_true[i][t])):
                y_ins.append(int(y_adv[i][t]))
                y_old.append(int(y_true[i][t]))
                y_num_ins.append(t)
    plt.figure(figsize=(200,100))
    plt.suptitle('Distribution_of_inserted_tokens: '+ name, fontsize=400)
    fig = plt.subplot(1,1,1)
    #plt.subplot(1, 2, 1)
    fig.hist(y_ins, 180)
    #plt.title('transaction_ins', fontsize = 100)
    fig.set_xlabel('tokens', fontsize = 320)
    fig.set_ylabel('quantity of insertions', fontsize = 320)
    fig.tick_params(axis='both', which='major', labelsize=240)
    fig.get_xaxis().set_major_locator(MultipleLocator(10))
# second subplot
    plt.savefig('Distribution_of_inserted_tokens_'+name+'.png')
    return y_ins, y_old, y_num_ins
#plt.subplot(1, 2, 1)
def plot_statistics_nonconcat2(path, name, y_ins, y_old,  y_num_ins):
    plt.figure(figsize=(200,100))
    plt.suptitle('Inserted token(original token): '+ name, fontsize=400)
    plt.plot(y_ins, y_old, 'ro', markersize = 100)
    #plt.title('transactions', fontsize = 100)
    plt.ylabel('inserted tokens', fontsize = 320)
    plt.xlabel('old tokens', fontsize =320)
    plt.savefig('Inserted_token(original_token)_'+name+'.png')
    plt.tick_params(axis='x', which='major', labelsize=240)
    plt.tick_params(axis='y', which='major', labelsize=240)
    return 'Plot Inserted_token(original_token)'
def plot_statistics_nonconcat3(path, name, y_ins, y_old,  y_num_ins):
    plt.figure(figsize=(200,100))
    plt.suptitle('Inserted token (num of changed token): '+ name, fontsize=400)
    #plt.subplot(1, 2, 1)
    plt.plot(y_num_ins, y_ins, 'ro', markersize = 30)
    #plt.title('transactions', fontsize = 100)
    plt.ylabel('inserted tokens', fontsize = 320)
    plt.xlabel('number', fontsize =320)
    plt.savefig('Inserted_token(num_of_changed_token)_'+name+'.png')
    #plt.show()
    plt.tick_params(axis='x', which='major', labelsize=240)
    plt.tick_params(axis='y', which='major', labelsize=240)
    diversity = [(len(list(dict.fromkeys(y_ins[::2])))/len(y_ins[::2])+len(list(dict.fromkeys(y_ins[1::2])))/len(y_ins[1::2]))/2,len(list(dict.fromkeys(y_ins)))/(len(y_ins)) ]
    return diversity
def plot_statistics_nonconcat4(path, name, y_ins, y_old,y_num_ins):
    plt.figure(figsize=(200,100))
    plt.suptitle('Quantity of inserted_token (num of changed token): '+ name, fontsize=400)
    fig = plt.subplot(1,1,1)
    fig.tick_params(axis='both', which='major', labelsize=240)
    fig.get_xaxis().set_major_locator(MultipleLocator(10))
    #plt.subplot(1, 2, 1)
    fig.hist(y_num_ins, 20)
    #plt.title('transactions', fontsize = 100)
    fig.set_ylabel('quantity of inserted tokens', fontsize = 320)
    fig.set_xlabel('number', fontsize =320)
    plt.savefig('Quantity_Inserted_token(num_of_changed_token)_'+name+'.png')
    return 'Plot Quantity_Inserted_token(num_of_changed_token)'

In [None]:
def plot_statistics_nonconcat(path, name):
    y_ins, y_old, y_num_ins= plot_statistics_nonconcat1(path,name)
    diversity = plot_statistics_nonconcat3(path,name, y_ins, y_old, y_num_ins)
    p = plot_statistics_nonconcat2(path,name,y_ins, y_old, y_num_ins)
    d = plot_statistics_nonconcat4(path,name, y_ins, y_old, y_num_ins)
    return diversity

In [None]:
print(plot_statistics_nonconcat('gru_age.json','779'))

In [None]:
print('J')
diversity_age_results_gru_fgsm = plot_statistics_nonconcat('age_results_gru_fgsm/output_substitute2.json', 'FGSM_age_results_gru')

In [None]:
diversity_age_results_lstm_fgsm=plot_statistics_nonconcat('age_results_lstm_fgsm/output_substitute2.json', 'FGSM_age_results_lstm')

In [None]:
print(diversity_age_results__gru_fgsm)

In [None]:
diversity_age_results_rnn_fgsm=plot_statistics_nonconcat('age_results_rnn_fgsm/output_substitute2.json', 'FGSM_age_results_rnn')