In [14]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [15]:
import numpy as np

# Path to your CSV file
path_train = 'data/train_data.npy'
path_test = 'data/test_data.npy'
path_test_label = 'data/test_labels.npy'

# Read the CSV file into a DataFrame
train = np.load(path_train)
test = np.load(path_test)
test_labels = np.load(path_test_label)

In [16]:
data = test
labels = test_labels

In [17]:
# Ideas: Look at papers about anomaly detection (using autoencoders)
# Do hyperparam search on lr and percentile
# do not do data scaling

In [23]:
# Scale the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Split into training (normal data) and testing sets
X_train = train
X_test = test
y_test = test_labels

shuffle = True
    
# Shuffle the data and labels in unison 
def shuffle_data(X, y=None): 
    indices = np.arange(X.shape[0]) 
    np.random.shuffle(indices) 
    if y is not None: 
        return X[indices], y[indices] 
    else: 
        return X[indices] 

if shuffle:
    # Shuffle the data 
    X_train = shuffle_data(X_train) 
    X_test, y_test = shuffle_data(X_test, y_test)
    

#X_train, X_test, y_train, y_test = train_test_split(data_scaled, labels, test_size=0.2, random_state=42)
#X_train = X_train[y_train == 0]  # Train only on normal data

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [24]:
# Define the autoencoder model
class Autoencoder(nn.Module):
    lr = 0.0001
    percentile = 90
    def __init__(self, input_dim, lr, percentile):
        super(Autoencoder, self).__init__()
        self.lr = lr
        self.percentile = percentile
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 14),
            nn.ReLU(),
            nn.Linear(14, 7),
            nn.ReLU(),
            nn.Linear(7, 3),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 7),
            nn.ReLU(),
            nn.Linear(7, 14),
            nn.ReLU(),
            nn.Linear(14, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class NNAutoencoderSmall(nn.Module):
    lr = 0.0001
    percentile = 90
    def __init__(self, input_dim, lr, percentile):
        super(NNAutoencoderSmall, self).__init__()
        self.lr = lr
        self.percentile = percentile
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 14),
            nn.ReLU(),
            nn.Linear(14, 7),
            nn.ReLU(),
            nn.Linear(7, 3),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 7),
            nn.ReLU(),
            nn.Linear(7, 14),
            nn.ReLU(),
            nn.Linear(14, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [28]:
# Instantiate the model, define the loss function and optimizer
lr = 0.001
percentile = 95

input_dim = X_train.shape[1]
model = NNAutoencoderSmall(input_dim, lr=lr, percentile=percentile)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=model.lr)

# Train the autoencoder
num_epochs = 5
batch_size = 32

last_loss = "undefined"

for epoch in range(num_epochs):
    permutation = torch.randperm(X_train.size()[0])
    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i + batch_size]
        batch_x = X_train[indices]

        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_x)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    last_loss = f"{loss.item():.4f}"
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/5], Loss: 0.0120
Epoch [2/5], Loss: 0.0073
Epoch [3/5], Loss: 0.0056
Epoch [4/5], Loss: 0.0083
Epoch [5/5], Loss: 0.0178


In [29]:
# Use the autoencoder to reconstruct the test data
X_test_pred = model(X_test).detach().numpy()

# Calculate reconstruction error
mse = np.mean((X_test.numpy() - X_test_pred) ** 2, axis=1)

# Set a threshold for anomaly detection (e.g., 95th percentile)
threshold = np.percentile(mse, model.percentile)

# Identify anomalies
anomalies = mse > threshold

# Evaluate the model
accuracy = accuracy_score(y_test.numpy(), anomalies)
precision = precision_score(y_test.numpy(), anomalies)
recall = recall_score(y_test.numpy(), anomalies)
f1 = f1_score(y_test.numpy(), anomalies)

print(f"Shuffle {shuffle}")
print(f"{num_epochs} epochs with last loss {last_loss}")
print(f"Parameters: lr: {model.lr}, percentile: {model.percentile}, batch_size{batch_size}")
print(f"Number of total samples: {len(X_test)}")
print(f"Number of predicted anomalies: {np.sum(anomalies)}")
print(f"Number of actual anomalies: {np.sum(y_test.numpy())}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Shuffle True
5 epcohs with last loss 0.0178
Parameters: lr: 0.001, percentile: 95, batch_size32
Number of total samples: 449919
Number of predicted anomalies: 22496
Number of actual anomalies: 54584.0
Accuracy: 0.84
Precision: 0.11
Recall: 0.05
F1-score: 0.07


In [6]:
'''
percentile: 95

Epoch [50/50], Loss: 0.4138
Number of total samples: 89984
Number of predicted anomalies: 4500
Number of actual anomalies: 11032.0
Accuracy: 0.91
Precision: 0.86
Recall: 0.35
F1-score: 0.50
'''

'''
Epoch [10/10], Loss: 0.0019
Parameters: lr: 0.0001, percentile: 90
Number of total samples: 449919
Number of predicted anomalies: 44992
Number of actual anomalies: 54584.0
Accuracy: 0.94
Precision: 0.79
Recall: 0.65
F1-score: 0.71
'''

'''
Epoch [10/10], Loss: 0.0110
Parameters: lr: 0.0001, percentile: 90
Number of total samples: 449919
Number of predicted anomalies: 44992
Number of actual anomalies: 54584.0
Accuracy: 0.82
Precision: 0.21
Recall: 0.17
F1-score: 0.19
'''

'''10 epcohs with last loss 0.0073
Parameters: lr: 0.0001, percentile: 95, batch_size32
Number of total samples: 449919
Number of predicted anomalies: 22496
Number of actual anomalies: 54584.0
Accuracy: 0.89
Precision: 0.62
Recall: 0.26
F1-score: 0.36

=> try shuffling the data
=> is the loss already too good? -> increase percentile and lr
'''

'''
Shuffle True
5 epcohs with last loss 0.0370
Parameters: lr: 0.001, percentile: 95, batch_size32
Number of total samples: 449919
Number of predicted anomalies: 22496
Number of actual anomalies: 54584.0
Accuracy: 0.93
Precision: 1.00
Recall: 0.41
F1-score: 0.58

=> further decrease loss
'''


'''
Shuffle True
5 epcohs with last loss 0.0178
Parameters: lr: 0.001, percentile: 95, batch_size32
Number of total samples: 449919
Number of predicted anomalies: 22496
Number of actual anomalies: 54584.0
Accuracy: 0.84
Precision: 0.11
Recall: 0.05
F1-score: 0.07
'''

'\npercentile: 95\n\nEpoch [50/50], Loss: 0.4138\nNumber of total samples: 89984\nNumber of predicted anomalies: 4500\nNumber of actual anomalies: 11032.0\nAccuracy: 0.91\nPrecision: 0.86\nRecall: 0.35\nF1-score: 0.50\n'

## USE GRIDSEARCH

In [30]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.base import BaseEstimator, RegressorMixin

# Define the custom autoencoder model with sklearn interface
class Autoencoder(nn.Module, BaseEstimator, RegressorMixin):
    def __init__(self, input_dim, lr=0.001, percentile=95):
        super(Autoencoder, self).__init__()
        self.lr = lr
        self.percentile = percentile
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 14),
            nn.ReLU(),
            nn.Linear(14, 7),
            nn.ReLU(),
            nn.Linear(7, 3),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 7),
            nn.ReLU(),
            nn.Linear(7, 14),
            nn.ReLU(),
            nn.Linear(14, input_dim),
            nn.Sigmoid()
        )
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def fit(self, X, y=None):
        self.train()
        X_tensor = torch.tensor(X, dtype=torch.float32)
        dataset = torch.utils.data.TensorDataset(X_tensor, X_tensor)
        loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        for epoch in range(self.num_epochs):
            for batch in loader:
                batch_x, _ = batch
                self.optimizer.zero_grad()
                outputs = self.forward(batch_x)
                loss = self.criterion(outputs, batch_x)
                loss.backward()
                self.optimizer.step()
            print(f'Epoch [{epoch + 1}/{self.num_epochs}], Loss: {loss.item():.4f}')
        
        self.loss_ = loss.item()
        return self

    def predict(self, X):
        self.eval()
        with torch.no_grad():
            X_tensor = torch.tensor(X, dtype=torch.float32)
            reconstructed = self.forward(X_tensor)
            return reconstructed.numpy()

    def score(self, X, y=None):
        reconstructed = self.predict(X)
        mse = np.mean(np.power(X - reconstructed, 2), axis=1)
        threshold = np.percentile(mse, self.percentile)
        anomalies = mse > threshold
        return -f1_score(y, anomalies)

    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

    def get_params(self, deep=True):
        return {'input_dim': self.encoder[0].in_features, 'lr': self.lr, 'percentile': self.percentile}

In [37]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.base import BaseEstimator, RegressorMixin

# Define the custom autoencoder model with sklearn interface
class AutoencoderSmall(nn.Module, BaseEstimator, RegressorMixin):
    def __init__(self, input_dim, lr=0.001, percentile=95):
        super(AutoencoderSmall, self).__init__()
        self.lr = lr
        self.percentile = percentile
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 7),
            nn.ReLU(),
            nn.Linear(7, 3),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 7),
            nn.ReLU(),
            nn.Linear(7, input_dim),
            nn.Sigmoid()
        )
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def fit(self, X, y=None):
        self.train()
        X_tensor = torch.tensor(X, dtype=torch.float32)
        dataset = torch.utils.data.TensorDataset(X_tensor, X_tensor)
        loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        for epoch in range(self.num_epochs):
            for batch in loader:
                batch_x, _ = batch
                self.optimizer.zero_grad()
                outputs = self.forward(batch_x)
                loss = self.criterion(outputs, batch_x)
                loss.backward()
                self.optimizer.step()
            print(f'Epoch [{epoch + 1}/{self.num_epochs}], Loss: {loss.item():.4f}')
        
        self.loss_ = loss.item()
        return self

    def predict(self, X):
        self.eval()
        with torch.no_grad():
            X_tensor = torch.tensor(X, dtype=torch.float32)
            reconstructed = self.forward(X_tensor)
            return reconstructed.numpy()

    def score(self, X, y=None):
        reconstructed = self.predict(X)
        mse = np.mean(np.power(X - reconstructed, 2), axis=1)
        threshold = np.percentile(mse, self.percentile)
        anomalies = mse > threshold
        return -f1_score(y, anomalies)

    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

    def get_params(self, deep=True):
        return {'input_dim': self.encoder[0].in_features, 'lr': self.lr, 'percentile': self.percentile}

In [None]:
# Define parameter grid for Grid Search
param_grid = {
    'lr': [0.1, 0.01, 0.001, 0.0001, 0.00001],
    'percentile': [85, 90, 95, 97],
    'batch_size': [64],
    'num_epochs': [5]
}

# Initialize the model and GridSearchCV
input_dim = X_train.shape[1]
autoencoder = AutoencoderSmall(input_dim)
grid_search = GridSearchCV(estimator=autoencoder, param_grid=param_grid, scoring='f1', cv=3)

# Perform GridSearchCV
grid_search.fit(X_train, X_train)

# Get the best parameters and model
best_params = grid_search.best_params_
print(f"Best Parameters: {best_params}")

  X_tensor = torch.tensor(X, dtype=torch.float32)


Epoch [1/5], Loss: 0.0024
Epoch [2/5], Loss: 0.0134
Epoch [3/5], Loss: 0.0022
Epoch [4/5], Loss: 0.0095
Epoch [5/5], Loss: 0.0066


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0090
Epoch [2/5], Loss: 0.0153
Epoch [3/5], Loss: 0.0106
Epoch [4/5], Loss: 0.0024
Epoch [5/5], Loss: 0.0039


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0545
Epoch [2/5], Loss: 0.0332
Epoch [3/5], Loss: 0.0487
Epoch [4/5], Loss: 0.0853
Epoch [5/5], Loss: 0.0438


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0227
Epoch [2/5], Loss: 0.0012
Epoch [3/5], Loss: 0.0022
Epoch [4/5], Loss: 0.0026
Epoch [5/5], Loss: 0.0017


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0441
Epoch [2/5], Loss: 0.0545
Epoch [3/5], Loss: 0.0421
Epoch [4/5], Loss: 0.0375
Epoch [5/5], Loss: 0.0505


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0172
Epoch [2/5], Loss: 0.0086
Epoch [3/5], Loss: 0.0081
Epoch [4/5], Loss: 0.0185
Epoch [5/5], Loss: 0.0095


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0116
Epoch [2/5], Loss: 0.0069
Epoch [3/5], Loss: 0.0034
Epoch [4/5], Loss: 0.0019
Epoch [5/5], Loss: 0.0060


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0054
Epoch [2/5], Loss: 0.0059
Epoch [3/5], Loss: 0.0068
Epoch [4/5], Loss: 0.0020
Epoch [5/5], Loss: 0.0246


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0092
Epoch [2/5], Loss: 0.0066
Epoch [3/5], Loss: 0.0046
Epoch [4/5], Loss: 0.0120
Epoch [5/5], Loss: 0.0131


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0095
Epoch [2/5], Loss: 0.0025
Epoch [3/5], Loss: 0.0045
Epoch [4/5], Loss: 0.0092
Epoch [5/5], Loss: 0.0068


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0126
Epoch [2/5], Loss: 0.0020
Epoch [3/5], Loss: 0.0080
Epoch [4/5], Loss: 0.0051
Epoch [5/5], Loss: 0.0040


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0065
Epoch [2/5], Loss: 0.0024
Epoch [3/5], Loss: 0.0047
Epoch [4/5], Loss: 0.0053
Epoch [5/5], Loss: 0.0050


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0171
Epoch [2/5], Loss: 0.0108
Epoch [3/5], Loss: 0.0051
Epoch [4/5], Loss: 0.0134
Epoch [5/5], Loss: 0.0084


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0042
Epoch [2/5], Loss: 0.0031
Epoch [3/5], Loss: 0.0034
Epoch [4/5], Loss: 0.0051
Epoch [5/5], Loss: 0.0041


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0100
Epoch [2/5], Loss: 0.0106
Epoch [3/5], Loss: 0.0075
Epoch [4/5], Loss: 0.0059
Epoch [5/5], Loss: 0.0058


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0130
Epoch [2/5], Loss: 0.0110
Epoch [3/5], Loss: 0.0123
Epoch [4/5], Loss: 0.0088
Epoch [5/5], Loss: 0.0148


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0049
Epoch [2/5], Loss: 0.0023
Epoch [3/5], Loss: 0.0031
Epoch [4/5], Loss: 0.0024
Epoch [5/5], Loss: 0.0027


  X_tensor = torch.tensor(X, dtype=torch.float32)
Traceback (most recent call last):
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Johnn\anaconda3\envs\best-hackathon-iba\lib\site-packages\sklearn\metrics\_classification.py", line 1238, in f1_score
    return fbeta_scor

Epoch [1/5], Loss: 0.0149


In [None]:
# Rebuild and train the best model
best_model = Autoencoder(input_dim, lr=best_params['lr'], percentile=best_params['percentile'])
best_model.batch_size = best_params['batch_size']
best_model.num_epochs = best_params['num_epochs']
best_model.fit(X_train, X_train)

# Use the autoencoder to reconstruct the test data
X_test_pred = best_model.predict(X_test)

# Calculate reconstruction error
mse = np.mean((X_test - X_test_pred) ** 2, axis=1)

# Set a threshold for anomaly detection
threshold = np.percentile(mse, best_params['percentile'])

# Identify anomalies
anomalies = mse > threshold

# Evaluate the model
accuracy = accuracy_score(y_test, anomalies)
precision = precision_score(y_test, anomalies)
recall = recall_score(y_test, anomalies)
f1 = f1_score(y_test, anomalies)

print(f"Number of total samples: {len(X_test)}")
print(f"Number of predicted anomalies: {np.sum(anomalies)}")
print(f"Number of actual anomalies: {np.sum(y_test)}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")