In [1]:
#load the saved model and check predictions
from keras.models import Model, load_model
from keras.layers import Input
import numpy as np
import csv

Using TensorFlow backend.


In [2]:
#form input_texts
input_texts = []
with open('with_date(copy).csv') as csvfile:
    next(csvfile)
    readCSV = csv.reader(csvfile,delimiter=',')
    for row in readCSV:
        text = " ".join(row)
        input_texts.append(text)
#input_texts

In [3]:
#form output_texts
with open('description.txt','r',encoding='utf-8') as f:
    lines =   f.read().split('\n') 
target_texts = []
for line in lines:
    line = 'START_ ' + line + ' _END'
    target_texts.append(line)
#target_texts

In [4]:
input_words = set()
target_words = set()
for input_text,target_text in zip(input_texts,target_texts):
    words = input_text.split(' ') 
    for word in words:
        if word not in input_words:
            input_words.add(word)
    words = target_text.split(' ') 
    for word in words:
        if word not in target_words:
            target_words.add(word)
input_words = sorted(list(input_words))
target_words = sorted(list(target_words))
num_encoder_tokens = len(input_words)
num_decoder_tokens = len(target_words)
input_text_sizes = []
for text in input_texts:
    input_text_sizes.append(len(text.split(' ')))
target_text_sizes = []
for text in target_texts:
    target_text_sizes.append(len(text.split(' ')))
max_encoder_seq_length = max(input_text_sizes)
max_decoder_seq_length = max(target_text_sizes)  

In [5]:
input_token_index = dict(
    [(word, i) for i, word in enumerate(input_words)])
target_token_index = dict(
    [(word, i) for i, word in enumerate(target_words)])

In [6]:
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length),
    dtype='float32')
for i,(input_text,target_text) in enumerate(zip(input_texts,target_texts)):
    for t, word in enumerate(input_text.split(' ')):
        encoder_input_data[i,t]=input_token_index[word]

In [15]:
model = load_model('s2s.h5')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 50)     773550      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 50)     774350      input_2[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LS

In [16]:
encoder_inputs = model.input[0]
encoder_outputs,state_h_enc,state_c_enc = model.layers[4].output
encoder_states = [state_h_enc,state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)

In [17]:
decoder_state_input_h = Input(shape=(50,))
decoder_state_input_c = Input(shape=(50,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_inputs = model.input[1]

dex=  model.layers[3]

final_dex2= dex(decoder_inputs)

decoder_lstm = model.layers[5]
decoder_outputs2, state_h2, state_c2 = decoder_lstm(final_dex2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_dense = model.layers[6]
decoder_outputs2 = decoder_dense(decoder_outputs2)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2)

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

In [18]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['START_']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += ' '+sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '_END' or
           len(decoded_sentence) > 200):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence

In [20]:
for seq_index in range(1000,1050):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index: seq_index + 1])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: ['1000 1001 5121 Credit 811 0 India US USD 0.12 2009-05-24']
Decoded sentence:  Transaction no 1001 is a credit transaction,performed on 2009-05-24, from India to US worth 811 dollars, which is suspicious with 0.12 probability. _END
-
Input sentence: ['1001 1002 5140 Credit 6564 1 Pakistan Germany Euro 0.7 2011-01-13']
Decoded sentence:  Transaction no 1002 is a credit transaction,performed on 2011-01-13, from Pakistan to Germany worth 6564 euros, which is not suspicious with 0.7 probability. _END
-
Input sentence: ['1002 1003 5470 Credit 606 0 India India INR 0.95 2021-07-17']
Decoded sentence:  Transaction no 1003 is a credit transaction,performed on 2021-07-17, within India worth 606 ruppees, which is suspicious with 0.95 probability. _END
-
Input sentence: ['1003 1004 5058 Debit 671 0 Pakistan Japan Euro 0.38 2017-07-06']
Decoded sentence:  Transaction no 1004 is a debit transaction,performed on 2017-07-06, from Pakistan to Japan worth 671 euros, which is suspicio

-
Input sentence: ['1032 1033 5387 Debit 1121 1 India India INR 0.24 2020-06-14']
Decoded sentence:  Transaction no 1033 is a debit transaction,performed on 2020-06-14, within India worth 1121 ruppees, which is not suspicious with 0.24 probability. _END
-
Input sentence: ['1033 1034 5383 Debit 2728 0 Pakistan Japan Euro 0.82 2022-02-03']
Decoded sentence:  Transaction no 1034 is a debit transaction,performed on 2022-02-03, from Pakistan to Japan worth 2728 euros, which is suspicious with 0.82 probability. _END
-
Input sentence: ['1034 1035 5264 Credit 4746 0 US India INR 0.2 2019-05-26']
Decoded sentence:  Transaction no 1035 is a credit transaction,performed on 2019-05-26, from US to India worth 4746 ruppees, which is suspicious with 0.2 probability. _END
-
Input sentence: ['1035 1036 5141 Debit 9013 1 India India INR 0.5 2023-09-28']
Decoded sentence:  Transaction no 1036 is a debit transaction,performed on 2023-09-28, within India worth 9013 ruppees, which is not suspicious with 0.5