**Importing necessary modules**

In [23]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery


**Installing What-If tool**

In [None]:
!pip install witwidget

In [25]:
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [26]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [27]:
project_id = 'unicef-aldeav'

In [28]:
client = bigquery.Client(project=project_id)

**Retrieving natality dataset using BigQuery**

In [29]:
sql = """
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""

df = client.query(sql).to_dataframe()

In [30]:
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,6.68662,True,18,1,43.0
1,9.360828,True,32,1,41.0
2,8.437091,False,30,1,39.0
3,6.124442,False,24,1,40.0
4,7.12534,False,26,1,41.0


In [31]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9989.0,10000.0,10000.0,9890.0
mean,7.297602,27.2989,1.0344,38.699798
std,1.291685,6.165838,0.192926,2.539957
min,0.612885,12.0,1.0,17.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.124034,32.0,1.0,40.0
max,12.257702,50.0,3.0,47.0


In [32]:
df['is_male'].value_counts()

True     5150
False    4850
Name: is_male, dtype: int64

**Preparing the data**

In [33]:
df = df.dropna()
df = shuffle(df, random_state = 2)

In [34]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])
data['is_male'] = data['is_male'].astype(int)

In [35]:
x, y = data, labels
x_train, x_test, y_train, y_test = train_test_split(x, y)

**Building and training model**

In [36]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.columns),)),
    Dense(32, activation='relu'),
    Dense(1)
])

In [37]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mae', 'mse'])

In [38]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 64)                320       
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 33        
Total params: 2,433
Trainable params: 2,433
Non-trainable params: 0
_________________________________________________________________


In [39]:
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe63feeb250>

In [40]:
num_examples = 10
predictions = model.predict(x_test[:num_examples])
predictions.shape

(10, 1)

In [41]:
for i in range(num_examples):
  print('Predicted val: ', predictions[i][0])
  print('Actual val: ', y_test.iloc[i])
  print()

Predicted val:  7.591659
Actual val:  7.36784879604

Predicted val:  8.056238
Actual val:  8.18796841068

Predicted val:  7.3322444
Actual val:  7.81318256528

Predicted val:  7.3100014
Actual val:  7.31273323054

Predicted val:  7.7091746
Actual val:  6.0627122049999995

Predicted val:  7.688924
Actual val:  6.71968974576

Predicted val:  7.167837
Actual val:  7.464852191319999

Predicted val:  7.6010714
Actual val:  5.74965579296

Predicted val:  7.540803
Actual val:  7.31273323054

Predicted val:  7.7856293
Actual val:  6.56316153974



**Using What-If tool for visualisation**

In [42]:
wit_data = pd.concat([x_test, y_test], axis=1)

In [43]:
def custom_predict(examples_to_infer):
  preds = model.predict(examples_to_infer)
  return preds

In [None]:
config_builder = (WitConfigBuilder(wit_data[:500].values.tolist(), data.columns.tolist() + ['weight_pounds'])
  .set_custom_predict_fn(custom_predict)
  .set_target_feature('weight_pounds')
  .set_model_type('regression'))
WitWidget(config_builder, height=800)