In [None]:
# 事前にGCPの認証が必要で、認証方法は環境によって異なる
# colabの場合:
#   セルで下記を実行
#     from google.colab import auth
#     auth.authenticate_user()
#     %env GCLOUD_PROJECT=GCPのプロジェクトID
# PC等のローカル環境の場合:
#   初回のみ、https://cloud.google.com/sdk/docs/install-sdk からgcloud CLIをインストールし、gcloud initを実行
%load_ext google.cloud.bigquery

# 10章 数値
## 10-1 数値型への変換
### Q: さまざまな数値型への変換
#### Awesome

In [None]:
%%bigquery
with
values as (
    select
        40000 as v1,
        3 as v2
),

cast_values as (
    select
        -- （1）−1 int64へ変換
        cast(v1 as int64) as v1_int64,
        cast(v2 as int64) as v2_int64,
        -- （1）− float64へ変換
        cast(v1 as float64) as v1_float64,
        cast(v2 as float64) as v2_float64,
        -- （1）−3 numericへ変換
        cast(v1 as numeric) as v1_numeric,
        cast(v2 as numeric) as v2_numeric,
        -- （1）−4 bignumericへ変換
        cast(v1 as bignumeric) as v1_bignumeric,
        cast(v2 as bignumeric) as v2_bignumeric
    from values
)

select
    *,
    -- （2）−1 int64同士の計算
    v1_int64 / v2_int64 as result_int64,
    -- （2）−2 float64同士の計算
    v1_float64 / v2_float64 as result_float64,
    -- （2）−3 numeric同士の計算
    v1_numeric / v2_numeric as result_numeric,
    -- （2）−4 bignumeric同士の計算
    v1_bignumeric / v2_bignumeric as result_bignumeric
from cast_values

## 10-2 数値の欠損処理
### Q: `thickness`が欠損しているレコードの削除
#### Awesome

In [None]:
%%bigquery
select *
from example.production_missing_num
where thickness is not null

### Q: 欠損している`thickness`を定数で補完
#### Awesome


In [None]:
%%bigquery
select
    type,
    length,
    coalesce(thickness, 1) as thickness,
    fault_flg
from example.production_missing_num

### Q: 欠損しているthicknessを平均値で補完
#### Awesome

In [None]:
%%bigquery
select
    type,
    length,
    coalesce(thickness, avg(thickness) over ()) as thickness,
    fault_flg
from example.production_missing_num

## 10-3 数値の外れ値除去
### Q: `thickness`の外れ値を四分位数ベースの外れ値検出で除去
#### Awesome

In [None]:
%%bigquery
select * except (q1, q3)
from (
    -- （1） Q1, Q3の計算
    select
        *,
        percentile_cont(thickness, 0.25) over () as q1,
        percentile_cont(thickness, 0.75) over () as q3
    from example.production
)
-- （2） Q1, Q3を用いて外れ値除去
where thickness between q1 - 1.5 * (q3 - q1) and q3 + 1.5 * (q3 - q1)

## 10-4 数値変換
### Q: 予約の合計金額の標準化
#### Awesome

In [None]:
%%bigquery
select
    * except (total_price),
    (total_price - avg(total_price) over ()) / stddev(total_price) over ()
        as total_price
from example.reservation

### Q: 予約の合計金額の対数変換
#### Awesome

In [None]:
%%bigquery
select
    * except (total_price),
    log(total_price) as total_price
from example.reservation

## 10-5 数値のカテゴリ化
### Q: 顧客の年齢のカテゴリ化
#### Awesome

In [None]:
%%bigquery
select
    *,
    cast(floor(age / 10) as int) as age_cat
from example.customer