## Data Preprocessing for Keras

In [1]:
import numpy as np
from random import randint 
from sklearn.preprocessing import MinMaxScaler

In [2]:
train_labels = []
train_samples = []

- An experiment drug was tested on individuals from ages 13 to 100
- the trial had 2100 participants 
- 95% of participants older than 65 experienced side effects
- 95% of participants younger than 65 experienced no side effects

### Generate Random Data

In [3]:
for i in range(1050):
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(0)
    
    random_older = randint(65,100)
    train_samples.append(random_older)
    train_labels.append(1)

In [4]:
len(train_samples)

2100

In [5]:
len(train_labels)

2100

In [6]:
print(type(train_labels))
print(type(train_samples))

<class 'list'>
<class 'list'>


#### Keras expects numpy arrays rather than python lists so the lists are transformed

In [7]:
train_samples = np.array(train_samples)
train_labels = np.array(train_labels)

In [8]:
print(type(train_labels))
print(type(train_samples))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


#### The ages from 13 to 100 are scalled down using MinMaxScaler to be between 0 and 1

In [9]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform((train_samples).reshape(-1,1))
# Keras doesnt accept a 1D array thats why the data is reshaped to (-1,1)

In [10]:
for i in scaled_train_samples:
    print(i)

[0.37931034]
[0.6091954]
[0.10344828]
[0.97701149]
[0.33333333]
[0.82758621]
[0.04597701]
[0.64367816]
[0.05747126]
[0.83908046]
[0.22988506]
[0.68965517]
[0.31034483]
[0.71264368]
[0.36781609]
[0.97701149]
[0.]
[0.89655172]
[0.51724138]
[0.97701149]
[0.04597701]
[0.74712644]
[0.14942529]
[0.59770115]
[0.]
[0.91954023]
[0.57471264]
[0.95402299]
[0.47126437]
[0.82758621]
[0.57471264]
[0.96551724]
[0.06896552]
[0.74712644]
[0.3908046]
[0.79310345]
[0.47126437]
[0.96551724]
[0.56321839]
[0.85057471]
[0.]
[0.96551724]
[0.27586207]
[0.88505747]
[0.33333333]
[0.98850575]
[0.3908046]
[0.86206897]
[0.34482759]
[0.88505747]
[0.13793103]
[0.96551724]
[0.36781609]
[0.68965517]
[0.1954023]
[0.59770115]
[0.47126437]
[0.96551724]
[0.28735632]
[0.7816092]
[0.51724138]
[0.70114943]
[0.50574713]
[0.94252874]
[0.22988506]
[0.66666667]
[0.25287356]
[0.65517241]
[0.18390805]
[0.72413793]
[0.03448276]
[0.63218391]
[0.50574713]
[0.90804598]
[0.29885057]
[0.75862069]
[0.01149425]
[0.81609195]
[0.05747126]
[0

[0.28735632]
[1.]
[0.2183908]
[0.79310345]
[0.]
[0.96551724]
[0.40229885]
[0.71264368]
[0.18390805]
[0.62068966]
[0.20689655]
[0.64367816]
[0.56321839]
[0.89655172]
[0.31034483]
[0.74712644]
[0.2183908]
[0.7816092]
[0.57471264]
[0.86206897]
[0.2183908]
[0.95402299]
[0.01149425]
[0.91954023]
[0.24137931]
[0.91954023]
[0.5862069]
[0.70114943]
[0.48275862]
[0.6091954]
[0.26436782]
[1.]
[0.37931034]
[0.98850575]
[0.34482759]
[0.7816092]
[0.49425287]
[0.73563218]
[0.49425287]
[0.8045977]
[0.1954023]
[0.59770115]
[0.06896552]
[0.70114943]
[0.51724138]
[0.93103448]
[0.55172414]
[0.72413793]
[0.52873563]
[0.83908046]
[0.50574713]
[0.59770115]
[0.05747126]
[0.93103448]
[0.18390805]
[0.95402299]
[0.01149425]
[0.7816092]
[0.31034483]
[0.83908046]
[0.03448276]
[0.63218391]
[0.50574713]
[0.67816092]
[0.36781609]
[0.71264368]
[0.48275862]
[0.82758621]
[0.45977011]
[0.90804598]
[0.50574713]
[0.91954023]
[0.1954023]
[0.59770115]
[0.34482759]
[0.96551724]
[0.03448276]
[0.65517241]
[0.3908046]
[0.977011

[0.95402299]
[0.25287356]
[0.72413793]
[0.17241379]
[0.71264368]
[0.32183908]
[0.97701149]
[0.48275862]
[0.88505747]
[0.48275862]
[0.98850575]
[0.14942529]
[0.94252874]
[0.54022989]
[0.75862069]
[0.09195402]
[0.63218391]
[0.11494253]
[0.90804598]
[0.27586207]
[0.96551724]
[0.12643678]
[0.90804598]
[0.35632184]
[0.79310345]
[0.09195402]
[0.72413793]
[0.05747126]
[0.88505747]
[0.14942529]
[0.89655172]
[0.3908046]
[0.70114943]
[0.04597701]
[0.89655172]
[0.04597701]
[0.63218391]
[0.17241379]
[0.66666667]
[0.54022989]
[0.85057471]
[0.48275862]
[0.75862069]
[0.26436782]
[0.85057471]
[0.3908046]
[0.83908046]
[0.36781609]
[0.95402299]
[0.42528736]
[0.82758621]
[0.17241379]
[0.65517241]
[0.20689655]
[0.86206897]
[0.56321839]
[0.97701149]
[0.45977011]
[0.87356322]
[0.16091954]
[0.97701149]
[0.12643678]
[0.90804598]
[0.4137931]
[0.88505747]
[0.17241379]
[0.81609195]
[0.05747126]
[0.64367816]
[0.52873563]
[0.90804598]
[0.2183908]
[0.89655172]
[0.08045977]
[0.91954023]
[0.25287356]
[0.66666667]
[0.

In [11]:
scaled_train_samples.shape

(2100, 1)

In [14]:
import tensorflow.keras
from tensorflow.keras import backend as k
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Activation
from tensorflow.python.keras.layers.core import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy


In [15]:
model = Sequential ([
    Dense(16, input_shape =(1,), activation = 'relu'),
    Dense(32, activation = 'relu'), 
    Dense(2, activation = 'softmax')
    
])

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                32        
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 66        
Total params: 642
Trainable params: 642
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.compile(Adam(lr = .0001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [18]:
model.fit(scaled_train_samples, train_labels, batch_size=10, epochs=20, verbose = 2)

#batch size is how many items do we want our model to look at a time rather than 1 at a time
#epochs is how may training runs do we want


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 2100 samples
Epoch 1/20
2100/2100 - 3s - loss: 0.6953 - accuracy: 0.5662
Epoch 2/20
2100/2100 - 0s - loss: 0.6552 - accuracy: 0.7324
Epoch 3/20
2100/2100 - 0s - loss: 0.6211 - accuracy: 0.7957
Epoch 4/20
2100/2100 - 1s - loss: 0.5832 - accuracy: 0.8505
Epoch 5/20
2100/2100 - 0s - loss: 0.5443 - accuracy: 0.8795
Epoch 6/20
2100/2100 - 0s - loss: 0.5053 - accuracy: 0.9014
Epoch 7/20
2100/2100 - 1s - loss: 0.4650 - accuracy: 0.9148
Epoch 8/20
2100/2100 - 1s - loss: 0.4242 - accuracy: 0.9310
Epoch 9/20
2100/2100 - 1s - loss: 0.3846 - accuracy: 0.9376
Epoch 10/20
2100/2100 - 0s - loss: 0.3475 - accuracy: 0.9529
Epoch 11/20
2100/2100 - 1s - loss: 0.3138 - accuracy: 0.9514
Epoch 12/20
2100/2100 - 0s - loss: 0.2840 - accuracy: 0.9629
Epoch 13/20
2100/2100 - 0s - loss: 0.2574 - accuracy: 0.9652
Epoch 14/20
2100/2100 - 0s - loss: 0.2326 - accuracy: 0.9676
Epoch 15/20
2100/2100 - 0s - loss: 0.21

<tensorflow.python.keras.callbacks.History at 0x2af3093cba8>