## Библиотеки

In [91]:
import csv
import numpy as np
import pandas as pd
from sklearn import (model_selection,
                     preprocessing)
import keras

In [92]:
fname = "data/creditcard.csv"

In [93]:
df = pd.read_csv(fname)
display(df.head())
print(df.shape)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


(284807, 31)


## Keras

In [94]:
all_features = []
all_targets = []
with open(fname) as f:
    for i, line in enumerate(f):
        if i == 0:
            print("HEADER:", line.strip())
            continue 
        fields = line.strip().split(",")
        all_features.append([float(v.replace('"', "")) for v in fields[:-1]])
        all_targets.append([int(fields[-1].replace('"', ""))])
        if i == 1:
            print("EXAMPLE FEATURES:", all_features[-1])

features = np.array(all_features, dtype="float32")
targets = np.array(all_targets, dtype="uint8")
print("features.shape:", features.shape)
print("targets.shape:", targets.shape)

HEADER: "Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
EXAMPLE FEATURES: [0.0, -1.3598071336738, -0.0727811733098497, 2.53634673796914, 1.37815522427443, -0.338320769942518, 0.462387777762292, 0.239598554061257, 0.0986979012610507, 0.363786969611213, 0.0907941719789316, -0.551599533260813, -0.617800855762348, -0.991389847235408, -0.311169353699879, 1.46817697209427, -0.470400525259478, 0.207971241929242, 0.0257905801985591, 0.403992960255733, 0.251412098239705, -0.018306777944153, 0.277837575558899, -0.110473910188767, 0.0669280749146731, 0.128539358273528, -0.189114843888824, 0.133558376740387, -0.0210530534538215, 149.62]
features.shape: (284807, 30)
targets.shape: (284807, 1)


In [95]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))

Number of training samples: 227846
Number of validation samples: 56961


In [96]:
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
print(np.round(weight_for_0, 5), np.round(weight_for_1, 5))

Number of positive samples in training data: 417 (0.18% of total)
0.0 0.0024


In [97]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

In [98]:
model = keras.Sequential(
    [
        keras.Input(shape=train_features.shape[1:]),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

In [99]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.keras")]
class_weight = {0: weight_for_0, 1: weight_for_1}

model.fit(
    train_features,
    train_targets,
    batch_size=2048,
    epochs=30,
    verbose=2,
    # callbacks=callbacks,
    validation_data=(val_features, val_targets),
    class_weight=class_weight,
)

Epoch 1/30
112/112 - 3s - 26ms/step - fn: 50.0000 - fp: 22040.0000 - loss: 2.3456e-06 - precision: 0.0164 - recall: 0.8801 - tn: 205389.0000 - tp: 367.0000 - val_fn: 5.0000 - val_fp: 3961.0000 - val_loss: 0.2446 - val_precision: 0.0174 - val_recall: 0.9333 - val_tn: 52925.0000 - val_tp: 70.0000
Epoch 2/30
112/112 - 1s - 13ms/step - fn: 32.0000 - fp: 7623.0000 - loss: 1.4618e-06 - precision: 0.0481 - recall: 0.9233 - tn: 219806.0000 - tp: 385.0000 - val_fn: 10.0000 - val_fp: 473.0000 - val_loss: 0.0605 - val_precision: 0.1208 - val_recall: 0.8667 - val_tn: 56413.0000 - val_tp: 65.0000
Epoch 3/30
112/112 - 1s - 12ms/step - fn: 23.0000 - fp: 6427.0000 - loss: 9.8293e-07 - precision: 0.0578 - recall: 0.9448 - tn: 221002.0000 - tp: 394.0000 - val_fn: 10.0000 - val_fp: 779.0000 - val_loss: 0.0569 - val_precision: 0.0770 - val_recall: 0.8667 - val_tn: 56107.0000 - val_tp: 65.0000
Epoch 4/30
112/112 - 1s - 13ms/step - fn: 23.0000 - fp: 5757.0000 - loss: 1.0192e-06 - precision: 0.0641 - recall:

<keras.src.callbacks.history.History at 0x27648306060>

## Sklearn

In [117]:
X = np.array(df.drop('Class', axis=1), dtype='float32')
y = np.array(df[['Class']], dtype='uint8')
print(X.shape, y.shape)

(284807, 30) (284807, 1)


In [None]:
# X = df.drop('Class', axis=1)
# y = df[['Class']]

In [118]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=.2, shuffle=False, random_state=13)
print(X_train.shape, X_test.shape)

(227845, 30) (56962, 30)


In [121]:
pd.DataFrame(y_train).value_counts(normalize=True)*100

0
0    99.816981
1     0.183019
Name: proportion, dtype: float64

In [122]:
pd.DataFrame(y_train)[0].value_counts()[0]

227428

In [124]:
weight_0 = 1 / pd.DataFrame(y_train)[0].value_counts()[0]
weight_1 = 1 / pd.DataFrame(y_train)[0].value_counts()[1]
print(np.round(weight_0, 5), np.round(weight_1, 5))

0.0 0.0024


In [125]:
scaler = preprocessing.StandardScaler()
X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.transform(X_test)

In [126]:
X_train_scaler.shape[1:]

(30,)

In [127]:
model = keras.Sequential(
    [
        keras.Input(shape=X_train_scaler.shape[1:]),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

In [128]:
metrics = [
    # keras.metrics.FalseNegatives(name="fn"),
    # keras.metrics.FalsePositives(name="fp"),
    # keras.metrics.TrueNegatives(name="tn"),
    # keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.keras")]
class_weight = {0: weight_0, 1: weight_1}

model.fit(
    X_train_scaler,
    y_train,
    batch_size=2048,
    epochs=30,
    verbose=2,
    # callbacks=callbacks,
    validation_data=(X_test_scaler, y_test),
    class_weight=class_weight,
)

Epoch 1/30
112/112 - 3s - 23ms/step - loss: 2.2646e-06 - precision: 0.0126 - recall: 0.8897 - val_loss: 0.1339 - val_precision: 0.0423 - val_recall: 0.8933
Epoch 2/30
112/112 - 1s - 13ms/step - loss: 1.3572e-06 - precision: 0.0532 - recall: 0.9161 - val_loss: 0.0373 - val_precision: 0.1680 - val_recall: 0.8667
Epoch 3/30
112/112 - 1s - 13ms/step - loss: 1.2315e-06 - precision: 0.0479 - recall: 0.9376 - val_loss: 0.0344 - val_precision: 0.1944 - val_recall: 0.8400
Epoch 4/30
112/112 - 1s - 13ms/step - loss: 1.1356e-06 - precision: 0.0516 - recall: 0.9257 - val_loss: 0.0501 - val_precision: 0.1262 - val_recall: 0.8533
Epoch 5/30
112/112 - 1s - 13ms/step - loss: 1.1230e-06 - precision: 0.0395 - recall: 0.9448 - val_loss: 0.0296 - val_precision: 0.1521 - val_recall: 0.8800
Epoch 6/30
112/112 - 1s - 13ms/step - loss: 8.0631e-07 - precision: 0.0565 - recall: 0.9568 - val_loss: 0.0365 - val_precision: 0.1231 - val_recall: 0.8667
Epoch 7/30
112/112 - 1s - 13ms/step - loss: 6.7351e-07 - precisi

<keras.src.callbacks.history.History at 0x2761eaad430>