In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

## Reading Data

In [7]:
data = pd.read_csv('data.csv')

data.drop(columns=['Unnamed: 32'], inplace=True)
data.head()


Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## Data Preparation

#### Handling Missing Values

In [8]:
print(data.isnull().sum())

id                         0
diagnosis                  0
radius_mean                0
texture_mean               0
perimeter_mean             0
area_mean                  0
smoothness_mean            0
compactness_mean           0
concavity_mean             0
concave points_mean        0
symmetry_mean              0
fractal_dimension_mean     0
radius_se                  0
texture_se                 0
perimeter_se               0
area_se                    0
smoothness_se              0
compactness_se             0
concavity_se               0
concave points_se          0
symmetry_se                0
fractal_dimension_se       0
radius_worst               0
texture_worst              0
perimeter_worst            0
area_worst                 0
smoothness_worst           0
compactness_worst          0
concavity_worst            0
concave points_worst       0
symmetry_worst             0
fractal_dimension_worst    0
dtype: int64


There is no null values in the dataset.

#### Encoding Categorical Variables

In [9]:
data = pd.get_dummies(data, drop_first=True)
data.head()

Unnamed: 0,id,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis_M
0,842302,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,True
1,842517,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,True
2,84300903,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,True
3,84348301,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,True
4,84358402,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,True


#### Standardization

In [10]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

data_scaled.head()

Unnamed: 0,id,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis_M
0,-0.236405,1.097064,-2.073335,1.269934,0.984375,1.568466,3.283515,2.652874,2.532475,2.217515,...,-1.359293,2.303601,2.001237,1.307686,2.616665,2.109526,2.296076,2.750622,1.937015,1.297676
1,-0.236403,1.829821,-0.353632,1.685955,1.908708,-0.826962,-0.487072,-0.023846,0.548144,0.001392,...,-0.369203,1.535126,1.890489,-0.375612,-0.430444,-0.146749,1.087084,-0.24389,0.28119,1.297676
2,0.431741,1.579888,0.456187,1.566503,1.558884,0.94221,1.052926,1.363478,2.037231,0.939685,...,-0.023974,1.347475,1.456285,0.527407,1.082932,0.854974,1.955,1.152255,0.201391,1.297676
3,0.432121,-0.768909,0.253732,-0.592687,-0.764464,3.283553,3.402909,1.915897,1.451707,2.867383,...,0.133984,-0.249939,-0.550021,3.394275,3.893397,1.989588,2.175786,6.046041,4.93501,1.297676
4,0.432201,1.750297,-1.151816,1.776573,1.826229,0.280372,0.53934,1.371011,1.428493,-0.00956,...,-1.46677,1.338539,1.220724,0.220556,-0.313395,0.613179,0.729259,-0.868353,-0.3971,1.297676


## Activation functions

In [11]:
def relu(x):
    return np.maximum(0, x)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def tanh(x):
    return np.tanh(x)

## Derivatives of activation functions

In [12]:
def relu_der(x):
    return (x > 0).astype(float)

def sigmoid_der(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh_der(x):
    return 1 - np.tanh(x) ** 2

## Dense Layer Implementation

In [17]:
class DenseLayer:
    def __init__(self, input_dim, output_dim, activation):
        self.W = np.random.randn(input_dim, output_dim) 
        self.b = np.random.randn(1, output_dim)
        if activation == 'tanh':
            self.activation = tanh
            self.activation_der = tanh_der
        elif activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_der = sigmoid_der
        elif activation == 'relu':
            self.activation = relu
            self.activation_der = relu_der

    def forward(self, X):
        self.X = X
        self.Z = X @ self.W + self.b

        self.A = self.activation(self.Z)

        return self.A

    def backward(self, dA, l2_lambda):
        dz = self.activation_der(self.Z)
        dZ = dA * dz

        dW = self.X.T @ dZ + l2_lambda * self.W
        db = np.sum(dZ, axis=0, keepdims=True)
        dX = dZ @ self.W.T

        return dX, dW, db


## AutoEncoder Implementation

In [None]:
class Autoencoder:
    def __init__(self, layers):
        self.layers = layers

    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backward(self, dLoss, l2_lambda):
        # reverse to loop through layers from the last 
        for layer in reversed(self.layers):
            dLoss, dW, db = layer.backward(dLoss, l2_lambda)
            layer.dW = dW
            layer.db = db
            
    def mse_loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2) / 2
    
    def create_batches(self, X, batch_size):
        batches = []
        for i in range(0, len(X), batch_size):
            batches.append(X[i:i + batch_size])
        return batches

    def train(self, X, epochs, batch_size, learning_rate, l2_lambda):
        losses = []

        for epoch in range(epochs):
            epoch_loss = 0
            
            np.random.shuffle(X)
            
            for batch in self.create_batches(X, batch_size):
                y_pred = self.forward(batch)

                loss = self.mse_loss(batch, y_pred)
                epoch_loss += loss

                dLoss = (y_pred - batch) / batch.shape[0]

                self.backward(dLoss, l2_lambda)

                # Update weights
                for layer in self.layers:
                    layer.W -= learning_rate * layer.dW
                    layer.b -= learning_rate * layer.db

            epoch_loss /= (len(X) / batch_size)
            losses.append(epoch_loss)

            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {epoch_loss}")

        return losses

## Building the AutoEncoder Model

In [19]:
bottle_neck = 10  
input_dim = data_scaled.shape[1]

layers = [
    DenseLayer(input_dim, 128, 'relu'),
    DenseLayer(128, 64, 'relu'),           
    DenseLayer(64, 32, 'tanh'),       
    
    DenseLayer(32, bottle_neck, 'tanh'),    

    
    DenseLayer(bottle_neck, 32, 'tanh'),    
    DenseLayer(32, 64, 'relu'),            
    DenseLayer(64, 128, 'relu'),           
    DenseLayer(128, input_dim, 'sigmoid')  
]

autoencoder = Autoencoder(layers)

In [22]:
autoencoder.train(data_scaled.values, epochs=1000, batch_size=32, learning_rate=0.01, l2_lambda=0.001)

Epoch 0, Loss: 0.609436369986347
Epoch 10, Loss: 0.5755023335161414
Epoch 20, Loss: 0.570588389392122
Epoch 30, Loss: 0.5856295127111032
Epoch 40, Loss: 0.5664637111198484
Epoch 50, Loss: 0.5682145581040752
Epoch 60, Loss: 0.5617759147886232
Epoch 70, Loss: 0.5530840214431634
Epoch 80, Loss: 0.5470078044115676
Epoch 90, Loss: 0.5461183050113443
Epoch 100, Loss: 0.5484927640999923
Epoch 110, Loss: 0.5457210907700322
Epoch 120, Loss: 0.5440983605054244
Epoch 130, Loss: 0.5427960975172479
Epoch 140, Loss: 0.5420152677383658
Epoch 150, Loss: 0.5358350358259708
Epoch 160, Loss: 0.5436005118227849
Epoch 170, Loss: 0.530664080771339
Epoch 180, Loss: 0.5452187985246838
Epoch 190, Loss: 0.5432861032219392
Epoch 200, Loss: 0.5400587975636041
Epoch 210, Loss: 0.5417865340669986
Epoch 220, Loss: 0.5428008912083653
Epoch 230, Loss: 0.5369229282974685
Epoch 240, Loss: 0.5459696820089968
Epoch 250, Loss: 0.5318157754335894
Epoch 260, Loss: 0.5300355436730287
Epoch 270, Loss: 0.5305648891564018
Epoch 

[0.609436369986347,
 0.6135734697274255,
 0.6152388300677888,
 0.612980221429871,
 0.6158660396461678,
 0.6096307087794202,
 0.6076307684600221,
 0.5972443407429773,
 0.5941548701011714,
 0.5834811971090182,
 0.5755023335161414,
 0.573826808045881,
 0.5733229976514806,
 0.5728357957754326,
 0.5726862008606931,
 0.5717034015243703,
 0.5694029077058188,
 0.5714949965908285,
 0.573126601489124,
 0.5724908645975587,
 0.570588389392122,
 0.5699437031274379,
 0.5746104177765416,
 0.5681889991997168,
 0.5683737997281256,
 0.5663488373207753,
 0.5660293324488701,
 0.5679541658399275,
 0.5727593613045268,
 0.5732128876878382,
 0.5856295127111032,
 0.5953820600332589,
 0.5844761370068918,
 0.5754720041412243,
 0.5750559330847964,
 0.5769296504161417,
 0.5761852516248321,
 0.5757993484341175,
 0.5684907229560714,
 0.5672212419143998,
 0.5664637111198484,
 0.5651691404542563,
 0.564929715575119,
 0.5677032120381807,
 0.5674657286192641,
 0.5674935529291252,
 0.5682322512784308,
 0.5683666320097152