In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import classification_report, confusion_matrix


features = ['f6','f7','f8','f9','f10','f11','f12','f13','f14','f15','f16']


train_files = ['tube1.csv', 'tube2.csv', 'tube3.csv']
train_df = pd.concat([pd.read_csv(f) for f in train_files], ignore_index=True)
train_df['label'] = (train_df['f7'] * train_df['f8'] > 0.1).astype(int)
train_df = train_df.dropna(subset=features)
X_train = train_df[features].values

scaler = StandardScaler()
scaler.fit(X_train)


test_files = ['tube4.csv', 'tube5.csv']
for file in test_files:
    print(f"\n🔍 Testing {file}")
    
    df = pd.read_csv(file)
    df['label'] = (df['f7'] * df['f8'] > 0.1).astype(int)
    df = df.dropna(subset=features)
    
    X_test = df[features].values
    y_true = df['label'].values
    X_test_scaled = scaler.transform(X_test)

    lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05, novelty=False)
    y_pred_lof = lof.fit_predict(X_test_scaled)


    y_pred = (y_pred_lof == -1).astype(int)


    print(classification_report(y_true, y_pred, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))



🔍 Testing tube4.csv
              precision    recall  f1-score   support

           0     0.5684    0.9518    0.7118     33428
           1     0.4528    0.0523    0.0938     25492

    accuracy                         0.5626     58920
   macro avg     0.5106    0.5021    0.4028     58920
weighted avg     0.5184    0.5626    0.4444     58920

Confusion Matrix:
 [[31816  1612]
 [24158  1334]]

🔍 Testing tube5.csv
              precision    recall  f1-score   support

           0     0.5499    0.9540    0.6977    183309
           1     0.4964    0.0549    0.0988    151433

    accuracy                         0.5472    334742
   macro avg     0.5231    0.5044    0.3982    334742
weighted avg     0.5257    0.5472    0.4268    334742

Confusion Matrix:
 [[174879   8430]
 [143125   8308]]


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import classification_report, confusion_matrix

features = ['f6','f7','f8','f9','f10','f11','f12','f13','f14','f15','f16']


train_files = ['tube1.csv', 'tube2.csv', 'tube3.csv']
train_df = pd.concat([pd.read_csv(f) for f in train_files], ignore_index=True)
f7_rounded = train_df['f7'].round(6)
train_df['label'] = (~f7_rounded.isin([0.857143, 0.714286])).astype(int)
train_df = train_df.dropna(subset=features)
X_train = train_df[features].values

scaler = StandardScaler()
scaler.fit(X_train)

test_files = ['tube4.csv', 'tube5.csv']
for file in test_files:
    print(f"\n🔍 Testing {file}")
    
    df = pd.read_csv(file)
    f7_rounded = df['f7'].round(6)
    df['label'] = (~f7_rounded.isin([0.857143, 0.714286])).astype(int)
    df = df.dropna(subset=features)
    
    X_test = df[features].values
    y_true = df['label'].values
    X_test_scaled = scaler.transform(X_test)

 
    lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05, novelty=False)
    y_pred_lof = lof.fit_predict(X_test_scaled)

  
    y_pred = (y_pred_lof == -1).astype(int)

  
    print(classification_report(y_true, y_pred, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))



🔍 Testing tube4.csv
              precision    recall  f1-score   support

           0     0.9268    0.9534    0.9399     54415
           1     0.1385    0.0906    0.1095      4505

    accuracy                         0.8874     58920
   macro avg     0.5326    0.5220    0.5247     58920
weighted avg     0.8665    0.8874    0.8764     58920

Confusion Matrix:
 [[51877  2538]
 [ 4097   408]]

🔍 Testing tube5.csv
              precision    recall  f1-score   support

           0     0.9160    0.9545    0.9348    305169
           1     0.1702    0.0963    0.1230     29573

    accuracy                         0.8787    334742
   macro avg     0.5431    0.5254    0.5289    334742
weighted avg     0.8501    0.8787    0.8631    334742

Confusion Matrix:
 [[291279  13890]
 [ 26725   2848]]
