Data Warehouse
- project_id = dataex-486105
- gcp_bucket = dex_bk
- external table = yellow_taxi
- normal and unpartitioned table = yellow_taxi_n
- partitioned and clustered table = yellow_taxi_partitioned_clustered
CREATE OR REPLACE EXTERNAL TABLE `dataex-486105.ny_taxi.yellow_taxi`
OPTIONS (
format = 'PARQUET',
uris = ['gs://dez_bk/*.parquet']
);CREATE OR REPLACE TABLE `dataex-486105.ny_taxi.yellow_taxi_n`
AS
SELECT * FROM dataex-486105.ny_taxi.yellow_taxi;SELECT COUNT(*) AS records
FROM dataex-486105.ny_taxi.yellow_taxi;SELECT COUNT(DISTINCT PULocationID)
FROM `dataex-486105.ny_taxi.yellow_taxi`;
SELECT COUNT(DISTINCT PULocationID)
FROM `dataex-486105.ny_taxi.yellow_taxi_n`;
SELECT PULocationID, DOLocationID
FROM `dataex-486105.ny_taxi.yellow_taxi_n`;
SELECT COUNT(*) AS fare_zero FROM `dataex-486105.ny_taxi.yellow_taxi`
WHERE fare_amount=0;CREATE OR REPLACE TABLE `dataex-486105.ny_taxi.yellow_taxi_partitioned_clustered`
PARTITION BY DATE(tpep_dropoff_datetime)
CLUSTER BY (VendorID) AS
SELECT * FROM `dataex-486105.ny_taxi.yellow_taxi`;Partition by tpep_dropoff_datetime and Cluster on VendorID
SELECT DISTINCT(VendorID)
FROM `dataex-486105.ny_taxi.yellow_taxi_n`
WHERE DATE(tpep_dropoff_datetime) BETWEEN '2024-03-01' AND '2024-03-15' ;Query cost = 310.24 MB
SELECT DISTINCT(VendorID)
FROM `dataex-486105.ny_taxi.yellow_taxi_partitioned_clustered`
WHERE DATE(tpep_dropoff_datetime) BETWEEN '2024-03-01' AND '2024-03-15' ;Query cost = 26.84 MB
SELECT COUNT(*)
FROM `dataex-486105.ny_taxi.yellow_taxi_partitioned_clustered`;This query costs 0 bytes because BigQuery doesn't need to scan the table to produce the number of rows of a table.