In [49]:
from tensorflow.keras.models import Model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

In [54]:
class AnomalyDetector(Model):
  def __init__(self):
    super(AnomalyDetector, self).__init__()
    self.encoder = Sequential([
      Dense(32, activation="relu"),
      Dense(16, activation="relu"),
      Dense(8, activation="relu")])

    self.decoder = Sequential([
      Dense(16, activation="relu"),
      Dense(32, activation="relu"),
     Dense(4, activation="relu")])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = AnomalyDetector()

In [28]:
df = pd.read_csv('../datasets/auto_encoder/bank_transactions_data_2.csv')

In [55]:
autoencoder.compile(optimizer='adam', loss='mae',run_eagerly=True)

In [16]:
df.columns

Index(['TransactionID', 'AccountID', 'TransactionAmount', 'TransactionDate',
       'TransactionType', 'Location', 'DeviceID', 'IP Address', 'MerchantID',
       'Channel', 'CustomerAge', 'CustomerOccupation', 'TransactionDuration',
       'LoginAttempts', 'AccountBalance', 'PreviousTransactionDate'],
      dtype='object')

In [17]:
df.head()

Unnamed: 0,TransactionID,AccountID,TransactionAmount,TransactionDate,TransactionType,Location,DeviceID,IP Address,MerchantID,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,PreviousTransactionDate
0,TX000001,AC00128,14.09,2023-04-11 16:29:14,Debit,San Diego,D000380,162.198.218.92,M015,ATM,70,Doctor,81,1,5112.21,2024-11-04 08:08:08
1,TX000002,AC00455,376.24,2023-06-27 16:44:19,Debit,Houston,D000051,13.149.61.4,M052,ATM,68,Doctor,141,1,13758.91,2024-11-04 08:09:35
2,TX000003,AC00019,126.29,2023-07-10 18:16:08,Debit,Mesa,D000235,215.97.143.157,M009,Online,19,Student,56,1,1122.35,2024-11-04 08:07:04
3,TX000004,AC00070,184.5,2023-05-05 16:32:11,Debit,Raleigh,D000187,200.13.225.150,M002,Online,26,Student,25,1,8569.06,2024-11-04 08:09:06
4,TX000005,AC00411,13.45,2023-10-16 17:51:24,Credit,Atlanta,D000308,65.164.3.100,M091,Online,26,Student,198,1,7429.4,2024-11-04 08:06:39


In [56]:
df = df[['TransactionAmount','TransactionDuration',
'LoginAttempts','AccountBalance']]

In [29]:
df = df[['TransactionAmount', 'TransactionDate',
'IP Address','TransactionDuration',
'LoginAttempts','AccountBalance',
'PreviousTransactionDate']]

In [None]:
# df['IP Address'] = df['IP Address'].apply(lambda x: int(x.replace('.','')))
# df['TransactionDate'] = df['TransactionDate'].apply(lambda x: int(x.replace('-','').replace(' ','').replace(':','')))
# df['PreviousTransactionDate'] = df['PreviousTransactionDate'].apply(lambda x: int(x.replace('-','').replace(' ','').replace(':','')))

In [31]:
df.head()

Unnamed: 0,TransactionAmount,TransactionDate,IP Address,TransactionDuration,LoginAttempts,AccountBalance,PreviousTransactionDate
0,14.09,20230411162914,16219821892,81,1,5112.21,20241104080808
1,376.24,20230627164419,13149614,141,1,13758.91,20241104080935
2,126.29,20230710181608,21597143157,56,1,1122.35,20241104080704
3,184.5,20230505163211,20013225150,25,1,8569.06,20241104080906
4,13.45,20231016175124,651643100,198,1,7429.4,20241104080639


In [57]:
df_train,df_test = train_test_split(df,test_size=0.2,random_state=0)

In [58]:
X = df_train.values

In [59]:
X_test = df_test.values

In [60]:
history = autoencoder.fit(X, X,
          epochs=20, 
          batch_size=10,
          validation_data=(X_test,X_test),
          shuffle=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [61]:
reconstruction = autoencoder.predict(X)
reconstruction_error = np.mean(np.abs(X - reconstruction), axis=1)
# Calcular o limiar de erro
limiar_erro = np.mean(reconstruction_error) + 2 * np.std(reconstruction_error)



In [67]:
df_train['label'] = reconstruction_error > limiar_erro

In [70]:
len(df)

2512

In [68]:
df_train.loc[df_train['label'] == True]

Unnamed: 0,TransactionAmount,TransactionDuration,LoginAttempts,AccountBalance,label
868,83.39,158,1,13182.11,True
1726,85.73,266,1,14928.35,True
971,194.37,161,1,14395.88,True
1894,447.83,88,1,13273.08,True
1886,72.50,30,1,12723.62,True
...,...,...,...,...,...
2146,97.63,127,1,12467.31,True
1167,838.57,273,1,14576.47,True
1641,50.52,293,1,12815.24,True
2076,325.03,106,1,13521.76,True


In [73]:
reconstruction

array([[ 483.40167 ,   57.212288,    0.      , 5862.2803  ],
       [ 467.25104 ,  195.80338 ,    0.      , 7070.905   ],
       [   0.      ,   72.59162 ,    0.      , 6990.5776  ],
       ...,
       [ 467.04727 ,   59.577713,    0.      , 2154.0254  ],
       [   0.      ,  192.85632 ,    0.      , 7600.753   ],
       [  74.35798 ,  121.34133 ,    0.      , 1594.8394  ]],
      dtype=float32)