In [1]:
import os
from dotenv import load_dotenv

_ = load_dotenv(override=True)

In [2]:
# to check
os.getenv("GOOGLE_APPLICATION_CREDENTIALS")

'../../04-data-pipeline-with-apache-airflow/cred/sa.json'

In [3]:
from google.cloud import bigquery

# สร้าง client object สำหรับเชื่อมต่อกับ BigQuery
client = bigquery.Client()

In [4]:
PROJECT_ID = "dataaibootcamp-443306"
DATASET_ID = "dataai_werockstar_007" # change dataset name

In [5]:
# read data from bigquery
query = """
    SELECT name, SUM(number) as total_people
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
    WHERE state = 'TX'
    GROUP BY name
    ORDER BY total_people DESC
    LIMIT 10
"""
query_job = client.query(query)

for row in query_job:
    print(f"ชื่อ: {row.name}, จำนวนคน: {row.total_people:,}")

ชื่อ: James, จำนวนคน: 272,793
ชื่อ: John, จำนวนคน: 235,139
ชื่อ: Michael, จำนวนคน: 225,320
ชื่อ: Robert, จำนวนคน: 220,399
ชื่อ: David, จำนวนคน: 219,028
ชื่อ: Mary, จำนวนคน: 209,893
ชื่อ: William, จำนวนคน: 173,092
ชื่อ: Jose, จำนวนคน: 157,362
ชื่อ: Christopher, จำนวนคน: 144,196
ชื่อ: Maria, จำนวนคน: 131,056


In [6]:
# Select from uploaded csv file: change dataset 
# *Don't forget to LIMIT*
query = f"""
    SELECT *
    FROM `{PROJECT_ID}.{DATASET_ID}.house_price`
    LIMIT 10
"""
query_job = client.query(query)
for row in query_job:
    print(row)

Row((19, 111400, 1700, 2, 2, 1, True, 'East'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((41, 106600, 1560, 2, 2, 1, False, 'East'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((66, 111100, 1450, 2, 2, 1, True, 'North'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((83, 164800, 2050, 2, 2, 1, True, 'West'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((96, 152500, 1970, 2, 2, 1, True, 'West'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((98, 126800, 2000, 2, 2, 1, True, 'East'), {'Home': 0, 'Price': 1, 'SqFt': 2, 'Bedrooms': 3, 'Bathrooms': 4, 'Offers': 5, 'Brick': 6, 'Neighborhood': 7})
Row((111, 111600, 1710, 2, 2, 1, False, 'North'), 

In [7]:
# create table in bigquery
schema = [
    bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]

table_id = f"{PROJECT_ID}.{DATASET_ID}.mock_customer"

table = bigquery.Table(table_id, schema=schema)
table = client.create_table(table)
print(f"สร้างตาราง {table.table_id}")

สร้างตาราง mock_customer


In [8]:
# ตัวอย่างการ insert ข้อมูล
rows_to_insert = [
    {"full_name": "สมชาย ใจดี", "age": 25},
    {"full_name": "สมหญิง รักเรียน", "age": 30}, 
]

errors = client.insert_rows_json(table_id, rows_to_insert)
if not errors:
    print("เพิ่มข้อมูลสำเร็จ")
else:
    print("เกิดข้อผิดพลาด:", errors)

เพิ่มข้อมูลสำเร็จ


In [9]:
query = f"""
    SELECT *
    FROM `{PROJECT_ID}.{DATASET_ID}.mock_customer`
    LIMIT 10
"""
query_job = client.query(query)
for row in query_job:
    print(row)

Row(('สมชาย ใจดี', 25), {'full_name': 0, 'age': 1})
Row(('สมหญิง รักเรียน', 30), {'full_name': 0, 'age': 1})


# Pandas Google Bigquery

In [10]:
import pandas as pd
import pandas_gbq

In [11]:
rows_to_insert = [
    {"full_name": "Adam", "age": 70},
    {"full_name": "Eva", "age": 75}, 
]

df = pd.DataFrame(rows_to_insert)
df


Unnamed: 0,full_name,age
0,Adam,70
1,Eva,75


In [12]:
pandas_gbq.to_gbq(df, f"{DATASET_ID}.mock_customer", project_id=PROJECT_ID, if_exists='append')

In [13]:
query = f"""
    SELECT *
    FROM `{PROJECT_ID}.{DATASET_ID}.mock_customer`
    LIMIT 10
"""

df = pandas_gbq.read_gbq(query, project_id=PROJECT_ID)
df

  record_batch = self.to_arrow(


Unnamed: 0,full_name,age
0,Adam,70
1,Eva,75
2,สมชาย ใจดี,25
3,สมหญิง รักเรียน,30
