In [2]:
import pandas as pd
import numpy as np

from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM

import matplotlib.pyplot as plt
import seaborn as sns

import sys
import os

module_path = os.path.abspath(os.path.join('..'))  # Подняться на уровень выше (в src/)
if module_path not in sys.path:
    sys.path.append(module_path)
from utils import find_best_threshold

In [3]:
df = pd.read_csv('../../data/processed/df_scaled.csv')

X = df.drop(columns=['Class'])
y = df['Class']

In [4]:
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_train = X_train_full[y_train_full == 0].copy()
y_train = y_train_full[y_train_full == 0].copy()

print(f"Train size (Class=0 only): {X_train.shape}")
print(f"Test size (mixed): {X_test.shape}")

Train size (Class=0 only): (227451, 16)
Test size (mixed): (56962, 16)


In [5]:
ocsvm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.017)
ocsvm.fit(X_train)

ocsvm_scores = ocsvm.decision_function(X_test)  

In [6]:
y_pred_test = ocsvm.predict(X_test)
y_pred_labels = (y_pred_test == -1).astype(int)

print(confusion_matrix(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels, digits=4))

[[55833  1031]
 [   13    85]]
              precision    recall  f1-score   support

           0     0.9998    0.9819    0.9907     56864
           1     0.0762    0.8673    0.1400        98

    accuracy                         0.9817     56962
   macro avg     0.5380    0.9246    0.5654     56962
weighted avg     0.9982    0.9817    0.9893     56962



In [7]:
ocsvm_result = find_best_threshold(y_test, ocsvm_scores, target_recall=0.75)
print(f"Threshold: {ocsvm_result['threshold']:.5f}")
print(f"Precision: {ocsvm_result['precision']:.4f}, Recall: {ocsvm_result['recall']:.4f}, F1 Score: {ocsvm_result['f1']:.4f}")

Threshold: -39.14464
Precision: 0.5725, Recall: 0.7653, F1 Score: 0.6550


In [8]:
ocsvm_preds = (ocsvm_scores < ocsvm_result['threshold']).astype(int)

print(confusion_matrix(y_test, ocsvm_preds))
print(classification_report(y_test, ocsvm_preds, digits=4))

[[56808    56]
 [   23    75]]
              precision    recall  f1-score   support

           0     0.9996    0.9990    0.9993     56864
           1     0.5725    0.7653    0.6550        98

    accuracy                         0.9986     56962
   macro avg     0.7861    0.8822    0.8272     56962
weighted avg     0.9989    0.9986    0.9987     56962

