In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix

features = ['f6','f7','f8','f9','f10','f11','f12','f13','f14','f15','f16']

# ========================
# Autoencoder
# ========================
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 8),
            nn.ReLU(),
            nn.Linear(8, 4)
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, input_dim)
        )
    def forward(self, x):
        return self.decoder(self.encoder(x))

train_files = ['tube1.csv', 'tube2.csv', 'tube3.csv']
train_dfs = []

for file in train_files:
    df = pd.read_csv(file)
    df['label'] = (df['f7'] * df['f8'] > 0.1).astype(int)
    df = df.dropna(subset=features)
    df = df[df['label'] == 0]  
    train_dfs.append(df)

train_df = pd.concat(train_dfs, ignore_index=True)
X_train_raw = train_df[features].values

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_raw)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)

# ========================
# Autoencoder
# ========================
model = Autoencoder(input_dim=X_train_tensor.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1000):
    output = model(X_train_tensor)
    loss = criterion(output, X_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.6f}")

with torch.no_grad():
    train_reconstructed = model(X_train_tensor)
    train_mse = torch.mean((X_train_tensor - train_reconstructed)**2, dim=1).numpy()
threshold_ae = np.percentile(train_mse, 90)

test_files = ['tube4.csv', 'tube5.csv']
for file in test_files:
    print(f"\n🔍 Testing: {file}")
    df = pd.read_csv(file)
    df['label'] = (df['f7'] * df['f8'] > 0.1).astype(int)
    df = df.dropna(subset=features)
    y_true = df['label'].values

    X_test_raw = df[features].values
    X_test_scaled = scaler.transform(X_test_raw)
    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

    # 1. Autoencoder 
    with torch.no_grad():
        reconstructed = model(X_test_tensor)
        test_mse = torch.mean((X_test_tensor - reconstructed)**2, dim=1).numpy()
    ae_pred = (test_mse > threshold_ae).astype(int)

    # 2. LOF 
    lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
    lof_pred = lof.fit_predict(X_test_scaled)  # -1 = outlier
    lof_pred = (lof_pred == -1).astype(int)

    # 3. Isolation Forest 
    isof = IsolationForest(contamination=0.1, random_state=42)
    isof_pred = isof.fit_predict(X_test_scaled)
    isof_pred = (isof_pred == -1).astype(int)

    votes = ae_pred + lof_pred + isof_pred
    fused_pred = (votes >= 2).astype(int)

    print("Classification Report (Fused Prediction):")
    print(classification_report(y_true, fused_pred, digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, fused_pred))


Epoch 10, Loss: 1.005968
Epoch 20, Loss: 0.987744
Epoch 30, Loss: 0.968080
Epoch 40, Loss: 0.945852
Epoch 50, Loss: 0.920684
Epoch 60, Loss: 0.892083
Epoch 70, Loss: 0.859491
Epoch 80, Loss: 0.822325
Epoch 90, Loss: 0.781988
Epoch 100, Loss: 0.745837
Epoch 110, Loss: 0.714583
Epoch 120, Loss: 0.686820
Epoch 130, Loss: 0.662312
Epoch 140, Loss: 0.640435
Epoch 150, Loss: 0.620839
Epoch 160, Loss: 0.603335
Epoch 170, Loss: 0.587862
Epoch 180, Loss: 0.574329
Epoch 190, Loss: 0.562552
Epoch 200, Loss: 0.552280
Epoch 210, Loss: 0.543199
Epoch 220, Loss: 0.535068
Epoch 230, Loss: 0.527708
Epoch 240, Loss: 0.520930
Epoch 250, Loss: 0.514509
Epoch 260, Loss: 0.508184
Epoch 270, Loss: 0.501645
Epoch 280, Loss: 0.494664
Epoch 290, Loss: 0.486989
Epoch 300, Loss: 0.478528
Epoch 310, Loss: 0.469343
Epoch 320, Loss: 0.459785
Epoch 330, Loss: 0.450409
Epoch 340, Loss: 0.441796
Epoch 350, Loss: 0.434070
Epoch 360, Loss: 0.426952
Epoch 370, Loss: 0.420062
Epoch 380, Loss: 0.413168
Epoch 390, Loss: 0.40

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix

features = ['f6','f7','f8','f9','f10','f11','f12','f13','f14','f15','f16']

# ========================
# Autoencoder
# ========================
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 8),
            nn.ReLU(),
            nn.Linear(8, 4)
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, input_dim)
        )
    def forward(self, x):
        return self.decoder(self.encoder(x))

train_files = ['tube1.csv', 'tube2.csv', 'tube3.csv']
train_dfs = []

for file in train_files:
    df = pd.read_csv(file)
    f7_rounded = df['f7'].round(6)
    df['label'] = (~f7_rounded.isin([0.857143, 0.714286])).astype(int)
    df = df.dropna(subset=features)
    df = df[df['label'] == 0]
    train_dfs.append(df)

train_df = pd.concat(train_dfs, ignore_index=True)
X_train_raw = train_df[features].values

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_raw)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)

# ========================
# Autoencoder
# ========================
model = Autoencoder(input_dim=X_train_tensor.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(800):
    output = model(X_train_tensor)
    loss = criterion(output, X_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.6f}")

with torch.no_grad():
    train_reconstructed = model(X_train_tensor)
    train_mse = torch.mean((X_train_tensor - train_reconstructed)**2, dim=1).numpy()
threshold_ae = np.percentile(train_mse, 90)


test_files = ['tube4.csv', 'tube5.csv']
for file in test_files:
    print(f"\n🔍 Testing: {file}")
    df = pd.read_csv(file)
    f7_rounded = df['f7'].round(6)
    df['label'] = (~f7_rounded.isin([0.857143, 0.714286])).astype(int)
    df = df.dropna(subset=features)
    y_true = df['label'].values

    X_test_raw = df[features].values
    X_test_scaled = scaler.transform(X_test_raw)
    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

    # 1. Autoencoder 
    with torch.no_grad():
        reconstructed = model(X_test_tensor)
        test_mse = torch.mean((X_test_tensor - reconstructed)**2, dim=1).numpy()
    ae_pred = (test_mse > threshold_ae).astype(int)

    # 2. LOF 
    lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
    lof_pred = lof.fit_predict(X_test_scaled)  # -1 = outlier
    lof_pred = (lof_pred == -1).astype(int)

    # 3. Isolation Forest 
    isof = IsolationForest(contamination=0.05, random_state=42)
    isof_pred = isof.fit_predict(X_test_scaled)
    isof_pred = (isof_pred == -1).astype(int)

    # Majority Voting
    votes = ae_pred + lof_pred + isof_pred
    fused_pred = (votes >= 2).astype(int)

    # --- 评估 ---
    print("Classification Report (Fused Prediction):")
    print(classification_report(y_true, fused_pred, digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, fused_pred))


Epoch 10, Loss: 1.032690
Epoch 20, Loss: 1.004716
Epoch 30, Loss: 0.976374
Epoch 40, Loss: 0.946350
Epoch 50, Loss: 0.914083
Epoch 60, Loss: 0.879768
Epoch 70, Loss: 0.844330
Epoch 80, Loss: 0.809374
Epoch 90, Loss: 0.776697
Epoch 100, Loss: 0.747058
Epoch 110, Loss: 0.719825
Epoch 120, Loss: 0.694025
Epoch 130, Loss: 0.669016
Epoch 140, Loss: 0.644494
Epoch 150, Loss: 0.620905
Epoch 160, Loss: 0.599353
Epoch 170, Loss: 0.580739
Epoch 180, Loss: 0.564962
Epoch 190, Loss: 0.551395
Epoch 200, Loss: 0.539516
Epoch 210, Loss: 0.529003
Epoch 220, Loss: 0.519612
Epoch 230, Loss: 0.511127
Epoch 240, Loss: 0.503389
Epoch 250, Loss: 0.496312
Epoch 260, Loss: 0.489854
Epoch 270, Loss: 0.483967
Epoch 280, Loss: 0.478586
Epoch 290, Loss: 0.473646
Epoch 300, Loss: 0.469082
Epoch 310, Loss: 0.464820
Epoch 320, Loss: 0.460789
Epoch 330, Loss: 0.456905
Epoch 340, Loss: 0.453078
Epoch 350, Loss: 0.449224
Epoch 360, Loss: 0.445226
Epoch 370, Loss: 0.440937
Epoch 380, Loss: 0.436180
Epoch 390, Loss: 0.43