In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [3]:
tf.__version__

'2.1.0'

In [4]:
query="""
SELECT
 weight_pounds,
 is_male,
 mother_age,
 plurality,
 gestation_weeks
FROM
 publicdata.samples.natality
WHERE year > 2000
LIMIT 20000
"""

df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,8.187968,False,44,1,38.0
1,6.481591,False,15,1,37.0
2,6.188376,True,14,1,40.0
3,8.000575,False,15,1,41.0
4,6.437498,True,44,1,37.0


In [5]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,19985.0,20000.0,20000.0,19778.0
mean,7.293696,27.4079,1.0326,38.707503
std,1.318461,6.203744,0.185038,2.52386
min,0.562179,13.0,1.0,17.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.124034,32.0,1.0,40.0
max,12.588395,50.0,4.0,47.0


In [7]:
df['is_male'].value_counts()

True     10183
False     9817
Name: is_male, dtype: int64

In [8]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [9]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])
data['is_male'] = data['is_male'].astype(int)

In [10]:
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
17667,1,35,1,39.0
1511,0,18,1,39.0
7606,1,25,1,43.0
9397,1,26,1,40.0
17492,1,35,1,36.0


## Build and train a TF model

In [11]:
# Use sklearn to split data
x_train, x_test, y_train, y_test = train_test_split(data, labels)

In [12]:
# Build and train the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(32, activation='relu'),
    Dense(1)]
)

In [13]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
             loss=tf.keras.losses.MeanSquaredError(),
             metrics=['mae','mse'])

<bound method Network.summary of <tensorflow.python.keras.engine.sequential.Sequential object at 0x7faf9c271ad0>>

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                320       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 2,433
Trainable params: 2,433
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.fit(x_train, y_train, epochs=10, validation_split=0.15)

Train on 12600 samples, validate on 2224 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7faf34b49290>

In [16]:
# Generate predictions on test examples
num_examples = 10
predictions = model.predict(x_test[:num_examples])

In [17]:
for i in range(num_examples):
    print('Predicted val: ', predictions[i][0])
    print('Actual val: ',y_test.iloc[i])

Predicted val:  7.51305
Actual val:  9.105091420599999
Predicted val:  7.269867
Actual val:  6.75055446244
Predicted val:  5.629475
Actual val:  7.7492485093
Predicted val:  6.92276
Actual val:  6.9996768185
Predicted val:  7.2285867
Actual val:  6.97542596968
Predicted val:  8.062556
Actual val:  7.31273323054
Predicted val:  7.501497
Actual val:  7.43839671988
Predicted val:  8.027619
Actual val:  6.75055446244
Predicted val:  7.503191
Actual val:  7.1760466281
Predicted val:  8.47601
Actual val:  8.56275425608
