In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim

csv_file_path = 'C:/Users/Вася/Downloads/extracted_files/creditcard.csv'  
df = pd.read_csv(csv_file_path)
X = df.drop(columns=['Class']) 
y = df['Class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

iso_forest = IsolationForest(contamination=0.001, random_state=42)
iso_forest.fit(X_train)
y_pred_iso = iso_forest.predict(X_test)

y_pred_iso = [1 if i == -1 else 0 for i in y_pred_iso]


print("Isolation Forest:")
print(classification_report(y_test, y_pred_iso))



Isolation Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85307
           1       0.39      0.23      0.29       136

    accuracy                           1.00     85443
   macro avg       0.69      0.61      0.64     85443
weighted avg       1.00      1.00      1.00     85443



In [6]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

iso_forest = IsolationForest(contamination=0.001, random_state=42)
iso_forest.fit(X_train)
y_pred_iso = iso_forest.predict(X_test)

y_pred_iso = [1 if i == -1 else 0 for i in y_pred_iso]


print("Isolation Forest:")
print(classification_report(y_test, y_pred_iso))


Isolation Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85307
           1       0.39      0.23      0.29       136

    accuracy                           1.00     85443
   macro avg       0.69      0.61      0.64     85443
weighted avg       1.00      1.00      1.00     85443



In [7]:
from sklearn.neighbors import LocalOutlierFactor

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.001)
y_pred_lof = lof.fit_predict(X_test)
y_pred_lof = [1 if i == -1 else 0 for i in y_pred_lof]

print("Local Outlier Factor:")
print(classification_report(y_test, y_pred_lof))



found 0 physical cores < 1
  File "C:\Users\Вася\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Local Outlier Factor:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85307
           1       0.00      0.00      0.00       136

    accuracy                           1.00     85443
   macro avg       0.50      0.50      0.50     85443
weighted avg       1.00      1.00      1.00     85443



In [13]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(X_train.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, X_train.shape[1]),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 50
for epoch in range(num_epochs):
    output = model(torch.tensor(X_train, dtype=torch.float32))
    loss = criterion(output, torch.tensor(X_train, dtype=torch.float32))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/50], Loss: 1.2405
Epoch [20/50], Loss: 1.2198
Epoch [30/50], Loss: 1.1799
Epoch [40/50], Loss: 1.0907
Epoch [50/50], Loss: 1.0106


In [14]:
with torch.no_grad():
    output = model(torch.tensor(X_test, dtype=torch.float32))
    loss_per_sample = ((output - torch.tensor(X_test, dtype=torch.float32)) ** 2).mean(axis=1)

threshold = loss_per_sample.mean() + 3 * loss_per_sample.std()
y_pred_autoencoder = [1 if x > threshold else 0 for x in loss_per_sample]

print("Autoencoder:")
print(classification_report(y_test, y_pred_autoencoder))


Autoencoder:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     85307
           1       0.13      0.49      0.21       136

    accuracy                           0.99     85443
   macro avg       0.56      0.74      0.60     85443
weighted avg       1.00      0.99      1.00     85443

