--Queries to create external table and create a table in BQ using Yellow Taxi Trip Records
CREATE OR REPLACE EXTERNAL TABLE zoomcamp.yellow_taxi_external
OPTIONS (
format = 'PARQUET',
uris = [
'gs://kestra-zoomcamp-janio-demo/yellow_tripdata_2024-*.parquet'
]
);
CREATE OR REPLACE TABLE zoomcamp.yellow_taxi
AS
SELECT *
FROM zoomcamp.yellow_taxi_external;
--Question 02
SELECT
COUNT(DISTINCT PULocationID) AS distinct_pu_locations
FROM zoomcamp.yellow_taxi_external;
SELECT
COUNT(DISTINCT PULocationID) AS distinct_pu_locations
FROM zoomcamp.yellow_taxi;
--Question 03
SELECT
PULocationID
FROM zoomcamp.yellow_taxi;
SELECT
PULocationID,
DOLocationID
FROM zoomcamp.yellow_taxi;
--Question 04
SELECT count(*)
FROM zoomcamp.yellow_taxi
WHERE fare_amount = 0;
--Question 05
CREATE OR REPLACE TABLE zoomcamp.yellow_taxi_optimized
PARTITION BY DATE(tpep_dropoff_datetime)
CLUSTER BY VendorID AS
SELECT *
FROM zoomcamp.yellow_taxi;
--Question 06
SELECT distinct(VendorID)
FROM zoomcamp.yellow_taxi
WHERE tpep_dropoff_datetime BETWEEN '2024-03-01' AND '2024-03-15';
SELECT distinct(VendorID)
FROM zoomcamp.yellow_taxi_optimized
WHERE tpep_dropoff_datetime BETWEEN '2024-03-01' AND '2024-03-15';
HOMEWORK 4
--question 4
SELECT
pickup_zone,
SUM(revenue_monthly_total_amount) AS total_revenue_2020
FROM project-764327a3-812c-442b-870.dbt_jmendoncajunior.fct_monthly_zone_revenue
WHERE service_type = 'Green'
AND EXTRACT(YEAR FROM revenue_month) = 2020
GROUP BY pickup_zone
ORDER BY total_revenue_2020 DESC
LIMIT 1;
-- question 5
SELECT
SUM(total_monthly_trips) AS total_trips_oct_2019
FROM project-764327a3-812c-442b-870.dbt_jmendoncajunior.fct_monthly_zone_revenue
WHERE service_type = 'Green'
AND revenue_month = DATE '2019-10-01';
-- question 6
CREATE OR REPLACE EXTERNAL TABLE project-764327a3-812c-442b-870.nytaxi.fhv_tripdata
OPTIONS (
format = 'CSV',
compression = 'GZIP',
skip_leading_rows = 1,
uris = [
'gs://kestra-zoomcamp-janio-demo/fhv_tripdata_2019-*.csv.gz'
]
);
SELECT COUNT(*) FROM project-764327a3-812c-442b-870.dbt_jmendoncajunior.stg_fhv_tripdata
--stg_fhv_tripdata--
with source as ( select * from {{ source('raw', 'fhv_tripdata') }} ),
renamed as ( select -- identifiers cast(dispatching_base_num as string) as dispatching_base_num,
-- location ids (rename to project convention)
{{ safe_cast('pulocationid', 'integer') }} as pickup_location_id,
{{ safe_cast('dolocationid', 'integer') }} as dropoff_location_id,
-- timestamps
cast(pickup_datetime as timestamp) as pickup_datetime,
cast(dropoff_datetime as timestamp) as dropoff_datetime,
-- additional fhv fields
cast(sr_flag as string) as sr_flag,
cast(affiliated_base_number as string) as affiliated_base_number
from source
-- requirement: filter out null dispatching_base_num
where dispatching_base_num is not null
)
select * from renamed