In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [2]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,8.81849,False,17,1,42.0
1,8.141671,False,29,1,38.0
2,5.948072,True,38,1,38.0
3,8.838332,True,27,1,39.0
4,9.259415,True,28,1,38.0


In [3]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9991.0,10000.0,10000.0,9881.0
mean,7.279259,27.3323,1.0332,38.704888
std,1.317603,6.238122,0.188948,2.602659
min,0.661387,12.0,1.0,19.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.124034,32.0,1.0,40.0
max,12.808857,53.0,3.0,47.0


In [4]:
df['is_male'].value_counts()

True     5158
False    4842
Name: is_male, dtype: int64

In [5]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [6]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])
data['is_male'] = data['is_male'].astype(int)

In [7]:
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
757,1,23,1,39.0
6544,1,19,1,38.0
3190,1,28,1,40.0
2064,0,32,1,40.0
4066,0,17,1,40.0


In [8]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [9]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(32, activation='relu'),
    Dense(1)]
)

2021-10-26 23:21:45.083696: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2299995000 Hz
2021-10-26 23:21:45.086364: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x556b939b1d60 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-10-26 23:21:45.086413: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-10-26 23:21:45.088968: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [10]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mae', 'mse'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                320       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 2,433
Trainable params: 2,433
Non-trainable params: 0
_________________________________________________________________


In [12]:
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

Train on 6665 samples, validate on 741 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f69f7c27310>

In [13]:
num_examples = 10
predictions = model.predict(x_test[:num_examples])

In [14]:
for i in range(num_examples):
    print('Predicted val: ', predictions[i][0])
    print('Actual val: ',y_test.iloc[i])
    print()

Predicted val:  6.9608936
Actual val:  7.12534030784

Predicted val:  6.8394136
Actual val:  7.84404728196

Predicted val:  6.6438894
Actual val:  7.87491199864

Predicted val:  6.8740435
Actual val:  7.75145313192

Predicted val:  6.2766953
Actual val:  6.37576861704

Predicted val:  6.3135467
Actual val:  8.437090766739999

Predicted val:  6.3600235
Actual val:  5.3131405142

Predicted val:  6.3519273
Actual val:  6.10019078954

Predicted val:  6.9518137
Actual val:  6.96440285658

Predicted val:  6.360024
Actual val:  5.87311465968



In [15]:
wit_data = pd.concat([x_test, y_test], axis=1)

In [16]:
def custom_predict(examples_to_infer):
    preds = model.predict(examples_to_infer)
    return preds

In [17]:
config_builder = (WitConfigBuilder(wit_data[:500].values.tolist(), data.columns.tolist() + ['weight_pounds'])
  .set_custom_predict_fn(custom_predict)
  .set_target_feature('weight_pounds')
  .set_model_type('regression'))
WitWidget(config_builder, height=800)

WitWidget(config={'model_type': 'regression', 'label_vocab': [], 'feature_names': ['is_male', 'mother_age', 'p…