In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor

from scipy.stats import kurtosis
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV

def evaluate_model(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    print('True Positive:', tp)
    print('True Negatives:', tn)
    print('False Positive:', fp)
    print('False Negatives:', fn)

    precision = tp / (tp+fp)
    recall = tp / (tp+fn)
    f1 = (2 * precision * recall) / (precision + recall)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', f1)
    print('Accuracy:', accuracy)

In [2]:
def load_data(file_name):
    df = pd.read_csv('dataset/LBNL_FDD_Dataset_SDAHU_all_3/LBNL_FDD_Dataset_SDAHU/'+file_name)
    return df

In [7]:
df_correct = load_data('correct_data.csv')

In [8]:
df_correct.drop(['Unnamed: 0'], axis=1, inplace=True)

In [9]:
df_correct

Unnamed: 0,MA_TEMP,RA_TEMP,RF_WAT
0,66.374680,68.34175,-2.617164e-13
1,66.374680,68.34786,-2.616267e-13
2,66.374680,68.35378,-2.615462e-13
3,66.374626,68.35948,-2.614546e-13
4,66.374626,68.36498,-2.613691e-13
...,...,...,...
525535,69.257660,64.08044,-8.024693e-18
525536,69.257660,64.07638,-8.061065e-18
525537,69.257660,64.07231,-8.029253e-18
525538,69.257660,64.06822,-8.062031e-18


In [10]:
scaler_correct = StandardScaler().fit(df_correct.values)
X_train = scaler_correct.transform(df_correct.values)

In [13]:
one_class_svm_model = OneClassSVM(nu=0.05)  # Adjust the nu parameter based on your data
one_class_svm_model.fit(X_train)

In [14]:
df_damper_stuck_010 = load_data('damper_stuck_010_annual.csv')

In [15]:
features = list(df_correct.keys())

In [16]:
features

['MA_TEMP', 'RA_TEMP', 'RF_WAT']

In [17]:
predictions = one_class_svm_model.predict(scaler_correct.transform(df_damper_stuck_010[features].values))

In [18]:
predictions

array([1, 1, 1, ..., 1, 1, 1], dtype=int64)

In [21]:
predictions[predictions == 1] = 0
predictions[predictions == -1] = 1

In [19]:
y_true = [1]*len(df_damper_stuck_010)

In [20]:
def evaluate_model(y_true, y_pred, name=None):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    print('True Positive:', tp)
    print('True Negatives:', tn)
    print('False Positive:', fp)
    print('False Negatives:', fn)

    precision = tp / (tp+fp)
    recall = tp / (tp+fn)
    f1 = (2 * precision * recall) / (precision + recall)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', f1)
    print('Accuracy:', accuracy)
    

In [22]:
evaluate_model(y_true, predictions)

True Positive: 24464
True Negatives: 0
False Positive: 0
False Negatives: 501076
Precision: 1.0
Recall: 0.04655021501693496
F1: 0.0889593530228871
Accuracy: 0.04655021501693496
