# Autoencoders

### Import the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

### Data Aquisition

In [2]:
data = 'http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv'

In [3]:
df = pd.read_csv(data, header=None)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,131,132,133,134,135,136,137,138,139,140
0,-0.112522,-2.827204,-3.773897,-4.349751,-4.376041,-3.474986,-2.181408,-1.818287,-1.250522,-0.477492,...,0.792168,0.933541,0.796958,0.578621,0.25774,0.228077,0.123431,0.925286,0.193137,1.0
1,-1.100878,-3.99684,-4.285843,-4.506579,-4.022377,-3.234368,-1.566126,-0.992258,-0.75468,0.042321,...,0.538356,0.656881,0.78749,0.724046,0.555784,0.476333,0.77382,1.119621,-1.43625,1.0
2,-0.567088,-2.59345,-3.87423,-4.584095,-4.187449,-3.151462,-1.74294,-1.490658,-1.18358,-0.394229,...,0.886073,0.531452,0.311377,-0.021919,-0.713683,-0.532197,0.321097,0.904227,-0.421797,1.0
3,0.490473,-1.914407,-3.616364,-4.318823,-4.268016,-3.88111,-2.99328,-1.671131,-1.333884,-0.965629,...,0.350816,0.499111,0.600345,0.842069,0.952074,0.990133,1.086798,1.403011,-0.383564,1.0
4,0.800232,-0.874252,-2.384761,-3.973292,-4.338224,-3.802422,-2.53451,-1.783423,-1.59445,-0.753199,...,1.148884,0.958434,1.059025,1.371682,1.277392,0.960304,0.97102,1.614392,1.421456,1.0


In [5]:
df.shape

(4998, 141)

## Data Preprocessing

### Remove the last column of the dataframe which is the target

In [6]:
X = df.drop(140, axis=1)

In [7]:
y = df[140]

In [8]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,130,131,132,133,134,135,136,137,138,139
0,-0.112522,-2.827204,-3.773897,-4.349751,-4.376041,-3.474986,-2.181408,-1.818287,-1.250522,-0.477492,...,0.160348,0.792168,0.933541,0.796958,0.578621,0.25774,0.228077,0.123431,0.925286,0.193137
1,-1.100878,-3.99684,-4.285843,-4.506579,-4.022377,-3.234368,-1.566126,-0.992258,-0.75468,0.042321,...,0.560327,0.538356,0.656881,0.78749,0.724046,0.555784,0.476333,0.77382,1.119621,-1.43625
2,-0.567088,-2.59345,-3.87423,-4.584095,-4.187449,-3.151462,-1.74294,-1.490658,-1.18358,-0.394229,...,1.284825,0.886073,0.531452,0.311377,-0.021919,-0.713683,-0.532197,0.321097,0.904227,-0.421797
3,0.490473,-1.914407,-3.616364,-4.318823,-4.268016,-3.88111,-2.99328,-1.671131,-1.333884,-0.965629,...,0.491173,0.350816,0.499111,0.600345,0.842069,0.952074,0.990133,1.086798,1.403011,-0.383564
4,0.800232,-0.874252,-2.384761,-3.973292,-4.338224,-3.802422,-2.53451,-1.783423,-1.59445,-0.753199,...,0.966606,1.148884,0.958434,1.059025,1.371682,1.277392,0.960304,0.97102,1.614392,1.421456


In [9]:
y.head()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
Name: 140, dtype: float64

### Split data into testing and training sets

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [12]:
train_index = y_train[y_train == 1].index

In [13]:
train_data = X_train.loc[train_index]

In [14]:
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,130,131,132,133,134,135,136,137,138,139
1812,-0.544707,-2.718413,-3.694501,-4.469415,-4.099968,-3.553914,-2.364821,-1.540491,-1.360389,-0.915743,...,0.995894,1.039285,0.968414,0.934525,0.915821,0.97425,1.330332,1.392033,0.812976,-1.23899
1169,-1.690098,-3.327908,-3.780237,-4.723238,-4.490969,-3.441501,-1.852904,-1.253405,-0.680921,0.154727,...,0.679656,1.80393,2.299171,2.613399,2.815431,2.100823,0.606403,-1.034268,-1.472465,-1.92149
727,0.514812,-0.943819,-2.726276,-3.697543,-3.594244,-2.955086,-1.333834,-0.278983,-0.309078,0.411023,...,0.143624,0.654506,0.345206,-0.131598,-0.787399,-1.775,-2.61167,-3.299898,-2.566351,-3.058445
189,-1.752227,-3.048471,-3.298518,-3.957783,-4.001944,-3.662148,-2.718075,-1.817975,-1.334923,-0.954247,...,1.763348,1.547471,1.246141,0.805574,0.304868,-0.406971,-0.771673,-1.116807,-0.711234,-0.722253
1839,-0.289117,-2.902968,-3.86349,-4.354358,-3.805222,-2.597311,-1.965614,-1.611443,-0.825002,-0.295307,...,1.174893,1.290417,1.119727,0.85116,0.522584,0.277179,0.516915,1.124296,0.83568,-1.758655


### Scale the values in between 0 and 1

In [15]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

In [16]:
X_train_scaled = scaler.fit_transform(train_data.copy())

In [17]:
X_test_scaled = scaler.fit_transform(X_test.copy())

## Create autoencoder model

### Import functions

In [18]:
from tensorflow.keras import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.losses import MeanSquaredLogarithmicError

### Create an autoencoder model

In [19]:
class Autoencoder(Model):
    def __init__(self, output_units, code_size=8):
        super().__init__()
        self.encoder = Sequential([
            Dense(64, activation='relu'),
            Dropout(0.1),
            Dense(32, activation='relu'),
            Dropout(0.1),
            Dense(16, activation='relu'),
            Dropout(0.1),
            Dense(code_size, activation='relu')
        ])
        self.decoder = Sequential([
            Dense(16, activation='relu'),
            Dropout(0.1),
            Dense(32, activation='relu'),
            Dropout(0.1),
            Dense(64, activation='relu'),
            Dropout(0.1),
            Dense(output_units, activation='sigmoid')
        ])
    
    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

In [20]:
X_train_scaled.shape

(2338, 140)

### Initialise Model

In [21]:
model = Autoencoder(output_units = X_train_scaled.shape[1])

### Compile Model

In [26]:
model.compile(loss='msle', metrics=['mse', 'accuracy'], optimizer='adam')

### Fit the data to the model

In [27]:
model.fit(X_train_scaled, 
          X_train_scaled, 
          epochs = 20, 
          batch_size = 512, 
          validation_data = (X_test_scaled, X_test_scaled)
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f2ce034dca0>