In [None]:
import pandas as pd
from pyod.models.ecod import ECOD
import numpy as np
from sklearn.preprocessing import MinMaxScaler
pd.options.mode.chained_assignment = None 

In [None]:
df_customer = pd.read_feather('data/real_customer_metadata_openslava.ftr')
df_customer = df_customer.dropna()
df_customer

In [None]:
df_customer['CITY_ADDRESS'] = df_customer['CITY_ADDRESS'].astype('category').cat.codes
df_customer['GENDER'] = df_customer['GENDER'].astype('category').cat.codes
df_customer

In [None]:
df_transaction = pd.read_feather('data/real_transactions_openslava.ftr')
df_transaction = df_transaction.dropna()
df_transaction

In [None]:
df_transaction = df_transaction.drop('CURRENCY', axis=1)

In [None]:
df_transaction['DT_TXN_DAY'] = df_transaction['DT_TXN'].astype(str)
df_transaction['DT_TXN_DAY'] = df_transaction['DT_TXN_DAY'].apply(lambda x: x[len('YYYY-MM-'):])
df_transaction['DT_TXN_DAY'] = df_transaction['DT_TXN_DAY'].astype(int)

df_transaction['DT_TXN_MONTH'] = df_transaction['DT_TXN'].astype(str)
df_transaction['DT_TXN_MONTH'] = df_transaction['DT_TXN_MONTH'].apply(lambda x: x[len('YYYY-'):-len('-DD')])
df_transaction['DT_TXN_MONTH'] = df_transaction['DT_TXN_MONTH'].astype(int)

df_transaction = df_transaction.drop('DT_TXN', axis=1)
df_transaction

In [None]:
df_merged = pd.merge(df_customer, df_transaction, on='ID')
df_merged

In [None]:
scaler = MinMaxScaler(feature_range=(0, 10), copy=False)
global_scaled = scaler.fit_transform(np.array(df_merged.drop('ID', axis=1)))
ecod = ECOD(contamination=0.0001, n_jobs=-1)
ecod.fit(global_scaled)

In [None]:
def create_ratio_column(df, column_name):
    df[f'{column_name}_ratio'] = df.apply(lambda row: row.VL_TXN / row[column_name], axis=1)

In [None]:
df_list = [g for _, g in df_merged.groupby('ID')]
train_df = df_list[0]
train_df = train_df.drop(['ID', 'GENDER', 'AGE_YEARS', 'CITY_ADDRESS', 'CNT_CARDS', 'CNT_TXN', 'CNT_TXN_3M',
                         'CNT_TXN_6M', 'CNT_TXN_12M'], axis=1)
columns_to_ratio = ['VL_CURR_BALANCE', 'VL_INCOME', 'VL_TXN_ALL', 'VL_CURR_BALANCE_3M', 'VL_TXN_ALL_3M', 'VL_INCOME_3M',
                    'VL_CURR_BALANCE_6M', 'VL_TXN_ALL_6M', 'VL_INCOME_6M', 'VL_CURR_BALANCE_12M', 'VL_TXN_ALL_12M',
                    'VL_INCOME_12M']

for column_name in columns_to_ratio: 
    create_ratio_column(train_df, column_name)
    train_df = train_df.drop(column_name, axis=1)
train_df

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
global_input = keras.Input(shape=(global_scaled.shape[1],))
x = layers.Dense(16, activation="relu")(global_input)
x = layers.Dense(8, activation="relu")(x)
x = layers.Dense(16, activation="relu")(x)
global_out = layers.Dense(global_scaled.shape[1], activation='sigmoid')(x)
                          
local_scaled = scaler.fit_transform(np.array(train_df))
local_input = keras.Input(shape=(local_scaled.shape[1],))
x = layers.Dense(8, activation="relu")(local_input)
x = layers.Dense(4, activation="relu")(x)
x = layers.Dense(8, activation="relu")(x)
local_out = layers.Dense(local_scaled.shape[1], activation='sigmoid')(x)

x = layers.concatenate([global_out, local_out])
ensamble = layers.Dense(1)(x)

model = keras.Model(
    inputs=[global_input, local_input],
    outputs=ensamble
)

model.summary()