## Google BigData Showcase
BigQuery ML Tutorial 03 - 출생 체중 예측하기  
Edited by 김하제  
https://cloud.google.com/bigquery/docs/bigqueryml-natality?hl=ko

### BigQuery 라이브러리 호출

In [1]:
from google.cloud import bigquery
client = bigquery.Client()

### 데이터셋 설정 "bqml_tutorial"

In [2]:
dataset = bigquery.Dataset(client.dataset('bqml_tutorial'))
#dataset.location = 'US'
#client.create_dataset(dataset)

### Jupyter에서 BigQuery SQL을 사용하기 위한 magic 명령어 클라이언트 라이브러리 호출

In [3]:
%load_ext google.cloud.bigquery

### 1. BigQuery Public Data 데이터 불러오기

In [4]:
%%bigquery
SELECT
  *
FROM
  `bigquery-public-data.samples.natality`
WHERE
  weight_pounds IS NOT NULL
  AND RAND() < 0.001
LIMIT 15

Unnamed: 0,source_year,year,month,day,wday,state,is_male,child_race,weight_pounds,plurality,...,alcohol_use,drinks_per_week,weight_gain_pounds,born_alive_alive,born_alive_dead,born_dead,ever_born,father_race,father_age,record_weight
0,1989,1989,7,,2.0,CA,True,1,8.062305,1.0,...,,,99.0,7,0,0,8,1,42,1
1,1971,1971,5,25.0,,GA,True,1,5.937049,2.0,...,,,,7,0,0,8,1,25,2
2,1975,1975,8,25.0,,IL,True,1,10.937133,1.0,...,,,,6,0,1,8,1,39,1
3,1969,1969,2,11.0,,OH,False,1,7.687519,,...,,,,5,0,2,8,1,41,2
4,1981,1981,7,29.0,,OR,False,1,8.501025,1.0,...,,,,5,0,2,8,1,34,1
5,1989,1989,2,,2.0,UT,True,1,8.245289,1.0,...,False,,3.0,10,0,0,11,1,40,1
6,1996,1996,4,,6.0,UT,False,9,5.37487,2.0,...,False,,17.0,7,0,0,8,1,35,1
7,1979,1979,7,19.0,,VA,True,1,7.156205,1.0,...,,,,7,0,1,9,1,44,1
8,1984,1984,11,12.0,,WV,False,1,9.186662,1.0,...,,,,7,0,0,8,1,41,1
9,1986,1986,3,6.0,,CA,False,1,6.937947,1.0,...,,,,3,0,5,9,1,27,1


### 2. 모델 만들기

In [5]:
%%bigquery
#standardSQL
CREATE OR REPLACE MODEL `bqml_tutorial.natality_model`
OPTIONS
  (model_type='linear_reg',
    input_label_cols=['weight_pounds']) AS
SELECT
  weight_pounds,
  is_male,
  gestation_weeks,
  mother_age,
  CAST(mother_race AS string) AS mother_race
FROM
  `bigquery-public-data.samples.natality`
WHERE
  weight_pounds IS NOT NULL
  AND RAND() < 0.001

### 3. 학습된 모델 평가하기

In [6]:
%%bigquery
#standardSQL
SELECT
  *
FROM
  ML.EVALUATE(MODEL `bqml_tutorial.natality_model`,
    (
    SELECT
      weight_pounds,
      is_male,
      gestation_weeks,
      mother_age,
      CAST(mother_race AS STRING) AS mother_race
    FROM
      `bigquery-public-data.samples.natality`
    WHERE
      weight_pounds IS NOT NULL))

Unnamed: 0,mean_absolute_error,mean_squared_error,mean_squared_log_error,median_absolute_error,r2_score,explained_variance
0,0.956495,1.675661,0.034254,0.737922,0.046491,0.046493


### 4. 평가된 모델로 예측하기

In [7]:
%%bigquery
#standardSQL
SELECT
  predicted_weight_pounds
FROM
  ML.PREDICT(MODEL `bqml_tutorial.natality_model`,
    (
    SELECT
      is_male,
      gestation_weeks,
      mother_age,
      CAST(mother_race AS STRING) AS mother_race
    FROM
      `bigquery-public-data.samples.natality`
    WHERE
      state = "WY"))

Unnamed: 0,predicted_weight_pounds
0,8.099989
1,7.564458
2,7.305245
3,8.148934
4,7.928028
5,7.566584
6,7.606009
7,7.728316
8,7.598505
9,7.498899
