In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# a. Load and preprocess ECG dataset
data = pd.read_csv('/home/ayush/Desktop/lp4/dataset/ecg_autoencoder_dataset.csv', header=None)

# Last column (140) is the class label, rest are features
X = data.iloc[:, :-1].values  # All columns except last
y = data.iloc[:, -1].values   # Last column is the label (0=anomaly, 1=normal)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Separate normal data (Class = 1) for training the autoencoder
x_train = X_scaled[y == 1]  # Train only on normal heartbeats

# Test data (normal + anomalies)
x_test = X_scaled
y_test = y

# b. Build Autoencoder model
input_dim = x_train.shape[1]  # 140 features
inp = Input((input_dim,))
enc = Dense(64, activation='relu')(inp)
lat = Dense(32, activation='relu')(enc)
dec = Dense(64, activation='relu')(lat)
out = Dense(input_dim, activation='linear')(dec)
model = Model(inp, out)

# c. Compile & Train
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model.fit(x_train, x_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# d. Reconstruction on test data
reconstructions = model.predict(x_test)

# e. Calculate reconstruction error
mse = np.mean(np.power(x_test - reconstructions, 2), axis=1)

# f. Set threshold for anomaly detection (using 95th percentile of training errors)
train_reconstructions = model.predict(x_train)
train_mse = np.mean(np.power(x_train - train_reconstructions, 2), axis=1)
threshold = np.percentile(train_mse, 95)
print("Threshold: ", threshold)

# g. Predict anomalies (mse > threshold means anomaly, so predict 0)
y_pred = (mse > threshold).astype(int)  # 1 if mse > threshold (anomaly)
y_pred = 1 - y_pred  # Flip: 0=anomaly, 1=normal to match original labels

# h. Evaluation
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Anomaly', 'Normal']))

print("Accuracy: ", accuracy_score(y_test, y_pred))

2025-11-10 01:40:41.998459: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-10 01:40:42.043370: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-10 01:40:43.198246: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


Epoch 1/10


2025-11-10 01:40:43.799776: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.4786 - mae: 0.4833 - val_loss: 0.2396 - val_mae: 0.3309
Epoch 2/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1851 - mae: 0.2940 - val_loss: 0.1543 - val_mae: 0.2645
Epoch 3/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1313 - mae: 0.2478 - val_loss: 0.1216 - val_mae: 0.2327
Epoch 4/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1030 - mae: 0.2181 - val_loss: 0.1032 - val_mae: 0.2112
Epoch 5/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0892 - mae: 0.2022 - val_loss: 0.0952 - val_mae: 0.2011
Epoch 6/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0826 - mae: 0.1947 - val_loss: 0.0917 - val_mae: 0.1976
Epoch 7/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0786 - mae: 0.190

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# a. Load and preprocess dataset
data = pd.read_csv('/home/ayush/Desktop/lp4/dataset/creditcard.csv')
data['Amount'] = StandardScaler().fit_transform(data[['Amount']])
data = data.drop(['Time'], axis=1)

# Separate normal transactions (Class = 0) for training
x_train = data[data['Class'] == 0].drop(['Class'], axis=1).values

# Test data (normal + fraud)
x_test = data.drop(['Class'], axis=1).values
y_test = data['Class'].values

# b. Build Autoencoder model
inp = Input((29,))
enc = Dense(16, activation='relu')(inp)
lat = Dense(8, activation='relu')(enc)
dec = Dense(16, activation='relu')(lat)
out = Dense(29, activation='linear')(dec)
model = Model(inp, out)

# c. Compile & Train
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(x_train, x_train, epochs=5, batch_size=32, validation_split=0.2, verbose=1)

# d. Reconstruction on test data
reconstructions = model.predict(x_test)

# e. Calculate reconstruction error
mse = np.mean(np.power(x_test - reconstructions, 2), axis=1)

# f. Set threshold for anomaly detection
threshold = np.percentile(mse, 95)  # top 5% errors = fraud
print("Threshold: ", threshold)

# g. Predict anomalies
y_pred = (mse > threshold).astype(int)

# h. Evaluation
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Fraud']))

print("Accuracy: ", accuracy_score(y_test, y_pred))

Epoch 1/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 908us/step - loss: 0.4921 - mae: 0.4484 - val_loss: 0.3762 - val_mae: 0.4006
Epoch 2/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 862us/step - loss: 0.3598 - mae: 0.3847 - val_loss: 0.3531 - val_mae: 0.3865
Epoch 3/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 871us/step - loss: 0.3361 - mae: 0.3683 - val_loss: 0.3308 - val_mae: 0.3694
Epoch 4/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 874us/step - loss: 0.3286 - mae: 0.3619 - val_loss: 0.3280 - val_mae: 0.3728
Epoch 5/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 873us/step - loss: 0.3241 - mae: 0.3587 - val_loss: 0.3247 - val_mae: 0.3681
[1m8901/8901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 400us/step
Threshold:  0.7450179863458147

Confusion Matrix:
[[270510  13805]
 [    56    436]]

Classification Report:
              precision    recall  f1