In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
from keras import layers, losses, models, datasets
import matplotlib.pyplot as plt

# Загрузка и подготовка данных
(train_fashion, _), (test_fashion, _) = datasets.fashion_mnist.load_data()
(train_mnist, _), (test_mnist, _) = datasets.mnist.load_data()

# Нормализация данных и приведение к формату float32
train_fashion = train_fashion.astype("float32") / 255.0
test_fashion = test_fashion.astype("float32") / 255.0
train_mnist = train_mnist.astype("float32") / 255.0
test_mnist = test_mnist.astype("float32") / 255.0

# Объединение тестовых данных Fashion MNIST и MNIST в одну переменную
test_data = np.concatenate((test_fashion, test_mnist), axis=0)

# Создание меток
fashion_labels = np.ones(test_fashion.shape[0], dtype=bool)  # True для fashion_test_data
mnist_labels = np.zeros(test_mnist.shape[0], dtype=bool)        # False для nums_test_data
test_labels = np.concatenate((fashion_labels, mnist_labels), axis=0)

# Перемешивание данных и меток
indices = np.arange(test_data.shape[0])
np.random.shuffle(indices)

test_data = test_data[indices]
test_labels = test_labels[indices]


In [36]:
class AnomalyDetector(models.Model):
    def __init__(self):
        super(AnomalyDetector, self).__init__()
        
        # Encoder
        self.encoder = models.Sequential([
            layers.Input(shape=(28, 28, 1)),
            layers.Conv2D(16, (3, 3), strides=2, padding='same', activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPool2D(),
            layers.Conv2D(32, (3, 3), strides=2, padding='same', activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPool2D(),
            layers.Flatten(),
            layers.Dropout(0.15),  # Dropout для предотвращения переобучения
            layers.Dense(64, activation='relu')
        ])

        # Decoder
        self.decoder = models.Sequential([
            layers.Dense(7 * 7 * 32, activation='relu'),
            layers.Reshape((7, 7, 32)),
            layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same'),
            layers.BatchNormalization(),
            layers.UpSampling2D((2, 2)),
            layers.Conv2DTranspose(16, (3, 3), activation='relu', padding='same'),
            layers.BatchNormalization(),
            layers.UpSampling2D((2, 2)),
            layers.Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')
        ])

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = AnomalyDetector()

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(train_fashion, train_fashion,
                epochs=25,
                batch_size=512,
                shuffle=True)

Epoch 1/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 85ms/step - loss: 0.0948
Epoch 2/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 90ms/step - loss: 0.0263
Epoch 3/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 79ms/step - loss: 0.0213
Epoch 4/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 81ms/step - loss: 0.0192
Epoch 5/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 86ms/step - loss: 0.0178
Epoch 6/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 88ms/step - loss: 0.0167
Epoch 7/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 93ms/step - loss: 0.0160
Epoch 8/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 79ms/step - loss: 0.0155
Epoch 9/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 93ms/step - loss: 0.0150
Epoch 10/25
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s

<keras.src.callbacks.history.History at 0x223bcc700d0>

In [None]:
def get_loss(model, data):
    reconstructions = model.predict(data)
    reconstructions_flattened = reconstructions.reshape((reconstructions.shape[0], -1))
    data_flattened = data.reshape((data.shape[0], -1))
    return losses.mean_squared_error(data_flattened, reconstructions_flattened)

train_loss = get_loss(autoencoder, train_fashion)
threshold = np.mean(train_loss) + np.std(train_loss)
print(f"threshold = {threshold}")

loss = get_loss(autoencoder, test_data)
predict = np.less(loss, threshold)

print(f"accuracy = {accuracy_score(test_labels, predict)}")
print(f"precision = {precision_score(test_labels, predict)}")
print(f"recall = {recall_score(test_labels, predict)}")


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step
threshold = 0.018819449469447136
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step
accuracy = 0.89555
precision = 0.9151012698079547
recall = 0.872
