### 設置客戶端

In [None]:
!pip install google-cloud-bigquery

In [3]:
import os
from google.cloud import bigquery

#SET GCP 金鑰
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r"/content/tibame-gad253-14-bigquery-83c8469952e8.json"

#初始化BigQuery客戶端
client = bigquery.Client()
print("Client creating using default project: {}".format(client.project))

Client creating using default project: tibame-gad253-14-bigquery


### 設定資料集變數

In [4]:
#設定資料集
dataset_id = 'tibame_gad253_14_dataset_python'
us_dataset_id = 'tibame_gad253_14_dataset_us'
eu_dataset_id = 'tibame_gad253_14_dataset_eu'

### 建立資料集

In [None]:
# 定義資料集參數
dataset_ref = client.dataset(dataset_id)

# 設定資料集的描述和其他選項
dataset = bigquery.Dataset(dataset_ref)
dataset.description = 'This is a new dataset created from Python.'
dataset.location = 'US'  # 設定資料集的地區位置

# 建立資料集
dataset = client.create_dataset(dataset, timeout=30)  # 可設定超時時間
print(f"dataset: {dataset.dataset_id} ,created")

### 建立資料表

In [6]:
table_id = 'age_table'
schema = [
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("age", "INTEGER"),
]

# 定義表格參數
table_ref = client.dataset(dataset_id).table(table_id)
table = bigquery.Table(table_ref, schema=schema)

# 建立資料表
table = client.create_table(table)
print(f"table: {table.table_id} ,created")

Conflict: 409 POST https://bigquery.googleapis.com/bigquery/v2/projects/tibame-gad253-14-bigquery/datasets/tibame_gad253_14_dataset_python/tables?prettyPrint=false: Already Exists: Table tibame-gad253-14-bigquery:tibame_gad253_14_dataset_python.age_table

### 寫入資料

In [None]:
# 定義資料
rows_to_insert = [
    {"name": "Alice", "age": 25},
    {"name": "Bob", "age": 35}
]

# 插入資料
errors = client.insert_rows_json(f"{dataset_id}.{table_id}", rows_to_insert)

if errors == []:
    print("data insert successfully")
else:
    print(f"error: {errors}")

data insert successfully


### 查詢資料

In [7]:
# 定義查詢語句
query = f"""
    SELECT name, age
    FROM `{dataset_id}.{table_id}`
    WHERE age > 30
"""

# 執行查詢
query_job = client.query(query)

# 獲取結果
results = query_job.result()

# 印出結果
for row in results:
    print(f"name: {row.name}, age: {row.age}")

name: Bob, age: 35


### 建立線性回歸模型

#### 建立企鵝體重預測模型

In [None]:
# 定義模型建立SQL程式
query = f"""
    CREATE OR REPLACE MODEL `{us_dataset_id}.penguins_model`
OPTIONS
  (model_type='linear_reg',
  input_label_cols=['body_mass_g'],
  enable_global_explain = TRUE) AS
SELECT
  *
FROM
  `bigquery-public-data.ml_datasets.penguins`
WHERE
  body_mass_g IS NOT NULL;
"""

# 執行查詢
query_job = client.query(query)

# 獲取結果
results = query_job.result()
print("Model Created!")

Model Created!


#### 評估模型

In [8]:
# 定義模型評估SQL程式
query = f"""
   SELECT
  *
FROM
  ML.EVALUATE(MODEL `{us_dataset_id}.penguins_model`,
    (
    SELECT
      *
    FROM
      `bigquery-public-data.ml_datasets.penguins`
    WHERE
      body_mass_g IS NOT NULL));
"""

# 執行查詢
query_job = client.query(query)

# 獲取結果
results = query_job.result()

In [9]:
# 取得 BigQuery 回傳的欄位名稱
field_names = results.schema

# 印出評估結果
for row in results:
    for field, value in zip(field_names, row):
        print(f"{field.name}: {value}")

mean_absolute_error: 227.01223667447215
mean_squared_error: 81838.15989216763
mean_squared_log_error: 0.005070447373501307
median_absolute_error: 173.08081641661465
r2_score: 0.8723772534253442
explained_variance: 0.8723772534253443


#### 預測線性回歸模型

In [10]:
# 定義模型預測SQL程式
query = f"""
   SELECT
  predicted_body_mass_g
FROM
  ML.PREDICT(MODEL `{us_dataset_id}.penguins_model`,
    (
    SELECT
      *
    FROM
      `bigquery-public-data.ml_datasets.penguins`
    WHERE
      island = 'Biscoe'));
"""

# 執行查詢
query_job = client.query(query)

# 獲取結果
results = query_job.result()

In [11]:
# 將每筆 row 解成 tuple（row.values() 是一整筆，row[i] 是值）
rows_data = [tuple(row.values()) for row in results]

# 取得欄位名稱
columns = [field.name for field in results.schema]

# 建立 DataFrame
import pandas as pd
df = pd.DataFrame(rows_data, columns=columns)

# 顯示前幾筆資料
print(df.head())

   predicted_body_mass_g
0            4681.782896
1            3875.224470
2            3303.096891
3            3976.529009
4            3457.923587


#### 列出全局解釋結果

In [17]:
# 定義模型全局解釋結果SQL程式
query = f"""
   SELECT
  *
FROM
  ML.GLOBAL_EXPLAIN(MODEL `{us_dataset_id}.penguins_model`)
"""

# 執行查詢
query_job = client.query(query)

# 獲取結果
results = query_job.result()

In [18]:
# 將每筆 row 解成 tuple（row.values() 是一整筆，row[i] 是值）
rows_data = [tuple(row.values()) for row in results]

# 取得欄位名稱
columns = [field.name for field in results.schema]

# 建立 DataFrame
import pandas as pd
df = pd.DataFrame(rows_data, columns=columns)

# 顯示前幾筆資料
print(df.head())

             feature  attribution
0                sex  4198.011155
1             island  3247.750335
2            species   572.285375
3  flipper_length_mm   193.612051
4    culmen_depth_mm   117.084944
