# Using a Multilayer 

We will now apply our knowledge of different layers to real data with using a multilayer neural network on the Low Birthweight dataset.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.framework import ops

In [2]:
BATCH_SIZE = 100

In [63]:
def create_weight(shape, std=1, name='weight'):
    return tf.Variable(tf.random_normal(shape=shape, mean=0, stddev=std), name=name)

def create_bias(shape, std=1, name='bias'):
    return tf.Variable(tf.random_normal(shape=shape, stddev=std), name=name)

def fully_connected(inputs, weights, biases, name='fully_connected'):
    full = tf.add(tf.matmul(inputs, weights), biases)
    return tf.nn.relu(full, name=name)

### Downloading data

In [4]:
DATA_URL = 'https://www.umass.edu/statdata/statdata/data/lowbwt.dat'
DATA_FILE = 'remote_data/birthdata.txt'

In [5]:
%mkdir remote_data

mkdir: remote_data: File exists


In [6]:
!wget {DATA_URL} -O {DATA_FILE}

--2017-04-24 11:58:24--  https://www.umass.edu/statdata/statdata/data/lowbwt.dat
Resolving www.umass.edu... 128.119.8.148
Connecting to www.umass.edu|128.119.8.148|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15053 (15K) [text/plain]
Saving to: ‘remote_data/birthdata.txt’


2017-04-24 11:58:25 (137 MB/s) - ‘remote_data/birthdata.txt’ saved [15053/15053]



### Loading data

In [7]:
widths = [4, 7, 7, 7, 8, 9, 7, 6, 6, 7, 8]
COLS_OF_INTEREST = ['AGE', 'LWT', 'RACE', 'SMOKE', 'PTL', 'HT', 'UI', 'FTV', 'BWT']

df = pd.read_fwf(DATA_FILE, widths=widths, skiprows=5, skipfooter=1, skip_blank_lines=True, usecols=COLS_OF_INTEREST)
df = df.drop(0)

In [8]:
print(df.shape)
df.head(3)

(189, 9)


Unnamed: 0,AGE,LWT,RACE,SMOKE,PTL,HT,UI,FTV,BWT
1,19.0,182.0,2.0,0.0,0.0,0.0,1.0,0.0,2523.0
2,33.0,155.0,3.0,0.0,0.0,0.0,0.0,3.0,2551.0
3,20.0,105.0,1.0,1.0,0.0,0.0,0.0,1.0,2557.0


### Normalizing data

In [9]:
df.fillna(df.mean(), inplace=True)
df.head(3)

Unnamed: 0,AGE,LWT,RACE,SMOKE,PTL,HT,UI,FTV,BWT
1,19.0,182.0,2.0,0.0,0.0,0.0,1.0,0.0,2523.0
2,33.0,155.0,3.0,0.0,0.0,0.0,0.0,3.0,2551.0
3,20.0,105.0,1.0,1.0,0.0,0.0,0.0,1.0,2557.0


### Splitting training and test sets

In [10]:
seed = 4
tf.set_random_seed(seed)
np.random.seed(seed)

In [11]:
N_SAMPLES = df.shape[0]
N_TRAIN = int(N_SAMPLES * 0.8)

print(N_SAMPLES)
print(N_TRAIN)

189
151


In [12]:
idx = np.arange(N_SAMPLES)
np.random.shuffle(idx)

idx_train = idx[: N_TRAIN]
idx_test = idx[N_TRAIN :]

df_train = df.iloc[idx_train]
df_test = df.iloc[idx_test]

In [13]:
print(df_train.shape)
df_train.head(3)

(151, 9)


Unnamed: 0,AGE,LWT,RACE,SMOKE,PTL,HT,UI,FTV,BWT
34,25.0,155.0,1.0,0.0,0.0,0.0,0.0,1.0,2977.0
83,30.0,110.0,3.0,0.0,0.0,0.0,0.0,0.0,3475.0
167,21.0,100.0,3.0,0.0,1.0,0.0,0.0,4.0,2301.0


In [14]:
print(df_test.shape)
df_test.head(3)

(38, 9)


Unnamed: 0,AGE,LWT,RACE,SMOKE,PTL,HT,UI,FTV,BWT
96,29.0,135.0,1.0,0.0,0.0,0.0,0.0,1.0,3651.0
99,19.0,147.0,1.0,1.0,0.0,0.0,0.0,0.0,3651.0
116,28.0,134.0,3.0,0.0,0.0,0.0,0.0,1.0,3941.0


### Splitting inputs and labels

In [15]:
COLS_INPUT = ['AGE', 'LWT', 'RACE', 'SMOKE', 'PTL', 'HT', 'UI', 'FTV']
COL_LABEL = ['BWT']

inputs_train = df_train[COLS_INPUT]
labels_train = df_train[COL_LABEL]
inputs_test = df_test[COLS_INPUT]
labels_test = df_test[COL_LABEL]

print(inputs_train.shape)
print(labels_train.shape)
print(inputs_test.shape)
print(labels_test.shape)

(151, 8)
(151, 1)
(38, 8)
(38, 1)


In [86]:
a = pd.DataFrame([1, 2, 3], columns=['Value'])
a

Unnamed: 0,Value
0,1
1,2
2,3


In [87]:
b = tf.constant([[1, 2, 3]])
b

<tf.Tensor 'Const_2:0' shape=(1, 3) dtype=int32>

In [88]:
ad = tf.placeholder(shape=[3, 1], dtype=tf.int32)
c = tf.matmul(ad, b)
c

<tf.Tensor 'MatMul_4:0' shape=(3, 3) dtype=int32>

In [89]:
sess.run(c, feed_dict={ad: a})

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]], dtype=int32)

In [74]:
a = tf.matmul(X, w1)
a

<tf.Tensor 'MatMul:0' shape=(?, 25) dtype=float32>

In [75]:
sess.run(a, feed_dict={X: inputs_train})

array([[ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan]], dtype=float32)

In [72]:
sess.run(w1)

array([[  8.98104286e+00,  -1.75578213e+00,   4.41987801e+00,
         -2.94943862e-02,   8.16234016e+00,   5.47929192e+00,
          1.11572790e+01,   4.76468474e-01,  -4.43702841e+00,
          4.07331020e-01,  -4.41468000e-01,  -3.21676040e+00,
         -2.85538530e+00,   2.47448659e+00,   2.76009750e+00,
          6.66295576e+00,  -1.39069881e+01,  -1.02546463e+01,
          1.12508574e+01,   1.24828711e+01,  -5.54175377e+00,
         -3.28265905e+00,   5.75678587e+00,   2.12327156e+01,
          4.32058525e+00],
       [  4.37598050e-01,  -1.25312948e+01,   8.13111687e+00,
          1.03819036e+01,   1.24020443e+01,  -3.05788374e+00,
          1.77261887e+01,   1.97146244e+01,   5.21626902e+00,
         -2.37826014e+00,   9.97630692e+00,   2.59630489e+00,
         -7.66364050e+00,  -1.26710300e+01,  -2.64937305e+00,
         -1.39346943e+01,  -8.33961391e+00,  -5.79609013e+00,
          6.62745953e-01,  -2.26647520e+00,   3.85621572e+00,
         -8.62159348e+00,   1.03137932e+01,

In [59]:
inputs_train

Unnamed: 0,AGE,LWT,RACE,SMOKE,PTL,HT,UI,FTV
34,25.0,155.0,1.0,0.0,0.0,0.0,0.0,1.0
83,30.0,110.0,3.0,0.0,0.0,0.0,0.0,0.0
167,21.0,100.0,3.0,0.0,1.0,0.0,0.0,4.0
142,19.0,91.0,1.0,1.0,2.0,0.0,1.0,0.0
48,19.0,132.0,3.0,0.0,0.0,0.0,0.0,0.0
12,19.0,150.0,3.0,0.0,0.0,0.0,0.0,1.0
2,33.0,155.0,3.0,0.0,0.0,0.0,0.0,3.0
129,24.0,116.0,1.0,0.0,0.0,0.0,0.0,1.0
120,16.0,95.0,3.0,0.0,0.0,0.0,0.0,1.0
113,29.0,130.0,1.0,1.0,0.0,0.0,0.0,2.0


### Resetting graph and creating session

In [64]:
ops.reset_default_graph()
sess = tf.Session()

### Initializing input placeholders

In [65]:
X = tf.placeholder(shape=[None, 8], dtype=tf.float32, name='X')
y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='y')

print(X)
print(y)

Tensor("X:0", shape=(?, 8), dtype=float32)
Tensor("y:0", shape=(?, 1), dtype=float32)


### Layer 1 (25 hidden nodes)

In [66]:
with tf.name_scope('layer_1'):
    w1 = create_weight(shape=[8, 25], std=10.0, name='w1')
    b1 = create_bias(shape=[25], std=10.0, name='b1')
    layer1 = fully_connected(inputs=X, weights=w1, biases=b1, name='layer1')
    
print(w1)
print(b1)
print(layer1)

Tensor("layer_1/w1/read:0", shape=(8, 25), dtype=float32)
Tensor("layer_1/b1/read:0", shape=(25,), dtype=float32)
Tensor("layer_1/layer1:0", shape=(?, 25), dtype=float32)


### Layer 2 (10 hidden nodes)

In [67]:
with tf.name_scope('layer_2'):
    w2 = create_weight(shape=[25, 10], std=10.0, name='w2')
    b2 = create_bias(shape=[10], std=10.0, name='b2')
    layer2 = fully_connected(inputs=layer1, weights=w2, biases=b2, name='layer2')
    
print(w2)
print(b2)
print(layer2)

Tensor("layer_2/w2/read:0", shape=(25, 10), dtype=float32)
Tensor("layer_2/b2/read:0", shape=(10,), dtype=float32)
Tensor("layer_2/layer2:0", shape=(?, 10), dtype=float32)


### Layer 3 (3 hidden nodes)

In [68]:
with tf.name_scope('layer_3'):
    w3 = create_weight(shape=[10, 3], std=10.0, name='w3')
    b3 = create_bias(shape=[3], std=10.0, name='b3')
    layer3 = fully_connected(inputs=layer2, weights=w3, biases=b3, name='layer3')
    
print(w3)
print(b3)
print(layer3)

Tensor("layer_3/w3/read:0", shape=(10, 3), dtype=float32)
Tensor("layer_3/b3/read:0", shape=(3,), dtype=float32)
Tensor("layer_3/layer3:0", shape=(?, 3), dtype=float32)


### Output layer

In [69]:
with tf.name_scope('output'):
    w4 = create_weight(shape=[3, 1], std=10.0, name='w4')
    b4 = create_bias(shape=[1], std=10.0, name='b4')
    output = fully_connected(inputs=layer3, weights=w4, biases=b4, name='output')
    loss = tf.reduce_mean(tf.abs(output - y), name='loss')
    
print(w4)
print(b4)
print(output)
print(loss)

Tensor("output/w4/read:0", shape=(3, 1), dtype=float32)
Tensor("output/b4/read:0", shape=(1,), dtype=float32)
Tensor("output/output:0", shape=(?, 1), dtype=float32)
Tensor("output/loss:0", shape=(), dtype=float32)


### Loss and training step

In [70]:
optimizer = tf.train.AdamOptimizer(0.000005)
train = optimizer.minimize(loss)

### Initializing variables

In [71]:
init = tf.global_variables_initializer()
sess.run(init)

### Training

In [73]:
losses_train = []
losses_test = []

feed_dict_test = {X: inputs_test, y: labels_test}

for i in range(200):
    idx_rand = np.random.choice(idx_train, size=BATCH_SIZE, replace=False)
#     print(idx_rand[: 6])
#     idx_rand = idx_train[: 6]
    inputs_rand = inputs_train.loc[idx_rand]
    labels_rand = labels_train.loc[idx_rand]
    feed_dict_train = {X: inputs_rand, y: labels_rand}
    sess.run(train, feed_dict=feed_dict_train)
    
    loss_train = sess.run(loss, feed_dict=feed_dict_train)
    losses_train.append(loss_train)
    
    loss_test = sess.run(loss, feed_dict=feed_dict_test)
    losses_test.append(loss_test)
    
    if i == 0 or (i + 1) % 20 == 0:
        print('#{} - loss: {}'.format(i + 1, loss_train))

#1 - loss: nan
#20 - loss: nan
#40 - loss: nan
#60 - loss: nan
#80 - loss: nan
#100 - loss: nan
#120 - loss: nan
#140 - loss: nan
#160 - loss: nan
#180 - loss: nan
#200 - loss: nan
