In [2]:
import torch
import numpy as np
import pandas as pd 
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn import metrics
import seaborn as sns
#https://www.comet.com/site/blog/credit-card-fraud-detection-with-autoencoders/

In [3]:
%run neuralnetwork.ipynb

In [10]:
class AutoEncoder(nn.Module):

    def __init__(self, input_dimension = None, output_dimension = None, latent_dim = None, hidden_layers=None, num_hidden_layers = None, hidden_dim = 64,
                 activation_default = "relu",
                 activations = None, loss_method = "BCE", opt_method = "SGD", lr = 0.01, alpha=None, gamma = None, threshold_ = None, epochs=None):

        super().__init__()

        self.input_dimension = input_dimension
        self.output_dimension = output_dimension
        self.hidden_layers = hidden_layers
        self.num_hidden_layers = num_hidden_layers
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.activation_default = activation_default
        self.activations = activations
        self.opt_method = opt_method
        self.lr = lr 
        self.alpha = alpha
        self.gamma = gamma 
        self.threshold_=threshold_
        self.epochs = epochs
        self.loss_method = loss_method
        self.alpha = alpha
        self.gamma = gamma


        hidden_layers_encoder = None
        if self.hidden_layers is not None:
            
            hidden_layers_encoder = self.hidden_layers

        else:
            hidden_layers_encoder = [self.hidden_dim//(2**i) for i in range(self.num_hidden_layers)]


        hidden_layers_decoder = hidden_layers_encoder[::-1]
            
            
        self.encoder = NeuralN(input_dimension=self.input_dimension, output_dimension=self.latent_dim, hidden_layers = hidden_layers_encoder,  
                               num_hidden_layers=self.num_hidden_layers, hidden_dim=self.hidden_dim, activation_default=self.activation_default,
                               activations = self.activations, loss_method = self.loss_method, opt_method = self.opt_method, lr = self.lr, alpha = self.alpha, gamma = self.gamma, epochs=self.epochs)

        self.decoder = NeuralN(input_dimension=self.latent_dim, output_dimension=self.input_dimension, hidden_layers = hidden_layers_decoder,
                               num_hidden_layers=self.num_hidden_layers, hidden_dim=self.hidden_dim, activation_default=self.activation_default,
                               activations=self.activations, loss_method=self.loss_method, opt_method=self.opt_method, lr=self.lr, alpha=self.alpha, gamma=self.gamma, epochs=self.epochs)


    def forward(self, x):
        
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)

        return decoded

    def get_optimizer(self):
    
        if self.opt_method == "SGD":
            return torch.optim.SGD(params = self.parameters(), lr = self.lr)
    
        elif self.opt_method == "Adam":
            return torch.optim.Adam(params = self.parameters(), lr = self.lr)
    
        elif self.opt_method == "RMSprop":
            return torch.optim.RMSprop(params = self.parameters(), lr = self.lr)
    
        else: 
            raise ValueError(f"{self.opt_method} is not valid!")

    def train_model_ae(self, train_loader):#https://www.geeksforgeeks.org/how-to-implement-neural-networks-in-pytorch/
        #https://www.kaggle.com/code/shivamkc3/linear-autoencoder-using-pytorch-part-01
        print(self.encoder)
        print(self.decoder)
        print("Training starts ! ")
        
        loss_fn = self.encoder.get_loss()
        optimizer = self.get_optimizer()
        size = len(train_loader.dataset)
        t_loss=[]
        for e in range(self.epochs):
            self.train()
            train_loss = 0
            for batch, (X, y) in enumerate(train_loader):
       
                output = self(X)
                loss = loss_fn(output, X)
    
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
    
                train_loss += loss.item() * X.size(0)
            
            train_loss_ = train_loss/len(train_loader.dataset) 
            t_loss.append(train_loss_)

        return t_loss 
        #https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
            
            
    def test_model_ae(self, test_loader):
        print("Testing starts ! ")
        loss_fn = self.encoder.get_loss()
        self.eval()
        
        with torch.inference_mode():
            
            test_loss = 0
            for X, y in test_loader: 
                output=self(X)
                test_loss += loss_fn(output, X).item()*X.size(0)
            
        test_loss = test_loss/len(test_loader)

        print(f"Test error: {test_loss} ")
        return test_loss 

    def reconstruction_error(self, test_loader):
  
        self.eval()
        

        with torch.inference_mode(): #https://medium.com/@dhruvi.kothari.in/unveiling-outliers-in-data-using-autoencoders-a-deep-dive-213f46854031
            loss_per_sample = []
            labels = []
            #https://www.kaggle.com/code/deepaksurana/fraud-detection-using-autoencoders-in-keras
           
            for X, y in test_loader:
                output = self(X)
                loss_samp = torch.mean((output - X)**2, dim = 1)

                loss_per_sample.append(loss_samp.numpy())
                labels.append(y.numpy())
            loss_per_sample = np.hstack(loss_per_sample)
            labels = np.hstack(labels)#https://stackoverflow.com/questions/61688882/how-is-numpy-stack-different-from-numpy-v-stack-and-h-stack
        
        return loss_per_sample, labels
                
                
    def analysis(self,test_loader):
        
        error, true_label = self.reconstruction_error(test_loader)

        fpr, tpr, thresholds = metrics.roc_curve(true_label, error)
        auc_ = metrics.auc(fpr, tpr)

        plt.figure(figsize=(10, 8))
        plt.plot(fpr, tpr)
        plt.plot([0,1], [0,1])
        plt.title("ROC")
        plt.ylabel("TPR")
        plt.xlabel("FPR")
        plt.show()

        predictions = [1 if e > self.threshold_ else 0 for e in error]
        cm = confusion_matrix(true_label, predictions)

        plt.figure(figsize=(10,8))
        sns.heatmap(cm, annot=True, fmt="d")
        plt.title("Confusion Matrix")
        plt.ylabel("Actual Class")
        plt.xlabel("Predicted Class")

        plt.show()
                      