In [28]:
import random
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

In [2]:
def makeDNN(numOfLayers, numOfNeurons, activationFunc):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Flatten())
    for i in range(numOfLayers):
        model.add(tf.keras.layers.Dense(numOfNeurons, activation = activationFunc))
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))
    return model

In [35]:
def embedAttribute(attribute):
    weights = [[10 * random.random() for i in range(len(attribute[0]) - 1)] for j in range(len(attribute[0]))]
    attribute = attribute @ np.array(weights)
    return attribute

In [4]:
from sklearn.datasets import load_breast_cancer

In [5]:
data, target = load_breast_cancer(return_X_y = True)

In [6]:
target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [7]:
data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [8]:
std_scale = preprocessing.StandardScaler().fit(data)
data = std_scale.transform(data)

In [9]:
data

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [36]:
categoricalAttr = np.array([['dog', 2, 'man'],
                    ['cat', 1, 'car'],
                    ['hen', 0, 'man'],
                    ['cat', 3, 'pole'],
                    ['hen', 2, 'pole']])
continuousAttr = np.array([[0.2],
                            [0.53],
                            [0.19],
                            [0.98],
                            [1.0]])

In [37]:
categoricalAttr = pd.DataFrame(categoricalAttr)
continuousAttr = pd.DataFrame(continuousAttr)

In [38]:
len(categoricalAttr.columns)

3

In [39]:
encodedInput = pd.DataFrame()

In [40]:
for i in range(len(categoricalAttr.columns)):
    labelEncoder = preprocessing.LabelEncoder()
    labelTemp = labelEncoder.fit_transform(categoricalAttr.iloc[:,i])
    print(labelTemp)
    labelTemp = labelTemp.reshape(len(labelTemp), 1)
    #Embedding the One Hot Encoded feature
    sparseTemp = embedAttribute(tf.keras.utils.to_categorical(labelTemp))
    sparseTemp = pd.DataFrame(sparseTemp, columns = [(len(encodedInput.columns) + len(continuousAttr.columns) + i) for i in range(len(sparseTemp[0]))])
    print(sparseTemp)
    encodedInput = pd.concat([encodedInput, sparseTemp], axis = 1)

[1 0 2 0 2]
          1         2
0  3.512079  9.246862
1  1.791436  6.634177
2  0.467457  1.650629
3  1.791436  6.634177
4  0.467457  1.650629
[2 1 0 3 2]
          3         4         5
0  3.222484  7.812146  6.921197
1  3.602083  2.474481  2.494482
2  4.191789  4.575484  8.025688
3  5.294766  0.609639  9.422742
4  3.222484  7.812146  6.921197
[1 0 1 2 2]
          6         7
0  9.171637  9.836128
1  3.152830  5.081098
2  9.171637  9.836128
3  0.360579  3.815429
4  0.360579  3.815429


In [41]:
encodedInput

Unnamed: 0,1,2,3,4,5,6,7
0,3.512079,9.246862,3.222484,7.812146,6.921197,9.171637,9.836128
1,1.791436,6.634177,3.602083,2.474481,2.494482,3.15283,5.081098
2,0.467457,1.650629,4.191789,4.575484,8.025688,9.171637,9.836128
3,1.791436,6.634177,5.294766,0.609639,9.422742,0.360579,3.815429
4,0.467457,1.650629,3.222484,7.812146,6.921197,0.360579,3.815429


In [42]:
inputData = pd.concat([continuousAttr, encodedInput], axis = 1)

In [43]:
inputData

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.2,3.512079,9.246862,3.222484,7.812146,6.921197,9.171637,9.836128
1,0.53,1.791436,6.634177,3.602083,2.474481,2.494482,3.15283,5.081098
2,0.19,0.467457,1.650629,4.191789,4.575484,8.025688,9.171637,9.836128
3,0.98,1.791436,6.634177,5.294766,0.609639,9.422742,0.360579,3.815429
4,1.0,0.467457,1.650629,3.222484,7.812146,6.921197,0.360579,3.815429


In [18]:
optimalDNN = makeDNN(3, 16, 'relu')

In [19]:
optimalDNN.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [20]:
optimalDNN.fit(data, target, epochs = 6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f12097b6898>