In [1]:
import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery

In [2]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()


Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,6.68662,True,18,1,43.0
1,9.360828,True,32,1,41.0
2,8.437091,False,30,1,39.0
3,6.124442,False,24,1,40.0
4,7.12534,False,26,1,41.0


In [3]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9989.0,10000.0,10000.0,9890.0
mean,7.297602,27.2989,1.0344,38.699798
std,1.291685,6.165838,0.192926,2.539957
min,0.612885,12.0,1.0,17.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.124034,32.0,1.0,40.0
max,12.257702,50.0,3.0,47.0


In [4]:
df['is_male'].value_counts()

True     5150
False    4850
Name: is_male, dtype: int64

In [5]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [8]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])

In [9]:
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
39,True,32,1,41.0
6132,False,28,1,30.0
5986,False,44,1,38.0
7682,False,34,1,38.0
4910,True,31,1,40.0


In [10]:
data['is_male'] = data['is_male'].astype(int)

In [11]:
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
39,1,32,1,41.0
6132,0,28,1,30.0
5986,0,44,1,38.0
7682,0,34,1,38.0
4910,1,31,1,40.0


In [12]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [13]:
model = xgb.XGBRegressor(
    objective='reg:squarederror'
)

In [14]:
model.fit(x_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [15]:
y_pred = model.predict(x_test)

In [16]:
for i in range(20):
    print('Predicted weight: ', y_pred[i])
    print('Actual weight: ', y_test.iloc[i])
    print()

Predicted weight:  7.7516375
Actual weight:  8.12623897732

Predicted weight:  7.654772
Actual weight:  8.62448368944

Predicted weight:  7.9154887
Actual weight:  8.24969784404

Predicted weight:  7.4484067
Actual weight:  6.66457418026

Predicted weight:  3.6419094
Actual weight:  4.7509617461

Predicted weight:  8.1031885
Actual weight:  7.62578964258

Predicted weight:  7.3584156
Actual weight:  5.8135898489399995

Predicted weight:  8.22723
Actual weight:  6.6359140862

Predicted weight:  7.826521
Actual weight:  9.75104584826

Predicted weight:  7.581806
Actual weight:  6.56316153974

Predicted weight:  7.3078
Actual weight:  6.3118345610599995

Predicted weight:  7.830519
Actual weight:  8.437090766739999

Predicted weight:  8.021765
Actual weight:  7.28848238172

Predicted weight:  7.6027865
Actual weight:  9.12493302418

Predicted weight:  6.9428096
Actual weight:  7.1870697412

Predicted weight:  8.237577
Actual weight:  7.31273323054

Predicted weight:  7.8663554
Actual weig

In [17]:
model.save_model('model.bst')

In [19]:
!gcloud config list project --format "value(core.project)"

phonic-obelisk-325801


In [18]:
# Update these to your own GCP project, model, and version names
GCP_PROJECT = 'phonic-obelisk-325801'
MODEL_BUCKET = 'gs://phonic-obelisk-325801'
VERSION_NAME = 'v1'
MODEL_NAME = 'baby_weight'


In [20]:
!gsutil mb $MODEL_BUCKET

Creating gs://phonic-obelisk-325801/...


In [21]:
!gsutil cp ./model.bst $MODEL_BUCKET

Copying file://./model.bst [Content-Type=application/octet-stream]...
/ [1 files][326.7 KiB/326.7 KiB]                                                
Operation completed over 1 objects/326.7 KiB.                                    


In [22]:
!gcloud ai-platform models create $MODEL_NAME --region=us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
Created ai platform model [projects/phonic-obelisk-325801/models/baby_weight].


In [23]:
!gcloud ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--framework='XGBOOST' \
--runtime-version=2.5 \
--origin=$MODEL_BUCKET \
--python-version=3.7 \
--project=$GCP_PROJECT \
--region=us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
Creating version (this might take a few minutes)......done.                    


In [24]:
%%writefile predictions.json
[0.0, 33.0, 1.0, 27.0]
[1.0, 26.0, 1.0, 40.0]

Writing predictions.json


In [25]:
prediction = !gcloud ai-platform predict --model=$MODEL_NAME --json-instances=predictions.json --version=$VERSION_NAME
print(prediction.s)

Using endpoint [https://us-central1-ml.googleapis.com/] [1.3198626041412354, 7.960344314575195]
