In [None]:
# ============================================================
# 1. Import required libraries
# ============================================================
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt


# ============================================================
# 2. Load dataset WITHOUT column names
# ============================================================
# Your dataset must be numeric only
df = pd.read_csv("dataset/ECGdataset(Ass4)/ecg_autoencoder_dataset.csv", header=None)

# Convert to numpy
X = df.values.astype(float)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)


# ============================================================
# 3. Build Autoencoder (Encoder + Decoder)
# ============================================================
input_dim = X_train.shape[1]   # number of columns
latent_dim = 8                 # bottleneck size

# ----- Encoder -----
inputs = Input(shape=(input_dim,))
encoder = Dense(32, activation='relu')(inputs)
encoder = Dense(16, activation='relu')(encoder)
latent = Dense(latent_dim, activation='relu')(encoder)

# ----- Decoder -----
decoder = Dense(16, activation='relu')(latent)
decoder = Dense(32, activation='relu')(decoder)
outputs = Dense(input_dim, activation='linear')(decoder)

# Full autoencoder model
autoencoder = Model(inputs, outputs)


# ============================================================
# 4. Compile the model
# ============================================================
autoencoder.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

autoencoder.summary()


# ============================================================
# 5. Train the Autoencoder
# ============================================================
history = autoencoder.fit(
    X_train, X_train,
    epochs=50,
    batch_size=32,
    shuffle=True,
    validation_split=0.2,
    verbose=1
)


# ============================================================
# 6. Compute reconstruction error for anomaly detection
# ============================================================
reconstructions = autoencoder.predict(X_test)
mse_errors = np.mean(np.power(X_test - reconstructions, 2), axis=1)

# Set anomaly threshold (top 5% MSE)
threshold = np.percentile(mse_errors, 95)

# Assign anomaly labels
# 1 = anomaly, 0 = normal
anomaly_labels = (mse_errors > threshold).astype(int)

print("Anomaly threshold:", threshold)
print("Anomalies detected:", anomaly_labels.sum())


# ============================================================
# OPTIONAL: Plot reconstruction error
# ============================================================
plt.figure(figsize=(10, 5))
plt.hist(mse_errors, bins=50)
plt.axvline(threshold, linestyle='--')
plt.title("Reconstruction Error Distribution")
plt.xlabel("MSE Error")
plt.ylabel("Count")
plt.show()
