In [0]:
%sql
create or refresh streaming table taxi_raw_records
as
select * 
from 
STREAM(samples.nyctaxi.trips)

Name,Type
tpep_pickup_datetime,timestamp
tpep_dropoff_datetime,timestamp
trip_distance,double
fare_amount,double
pickup_zip,int
dropoff_zip,int


In [0]:
%sql
create or refresh streaming table taxi_raw_records
(constraint valid expect(trip_distance>0.0)on violation drop row)
as
select * 
from 
STREAM(samples.nyctaxi.trips)

Name,Type
tpep_pickup_datetime,timestamp
tpep_dropoff_datetime,timestamp
trip_distance,double
fare_amount,double
pickup_zip,int
dropoff_zip,int


In [0]:
%sql
--silver layer:Data transformation and cleansing
--we look into short trips or trips within the same zipcode thats more than $50
create or refresh streaming table flagged_rides
as select 
date_trunc('week',tpep_pickup_datetime) as week,
pickup_zip as zip,trip_distance,fare_amount
from stream(samples.nyctaxi.trips)
where (pickup_zip=dropoff_zip and fare_amount>50);


Name,Type
week,timestamp
zip,int
trip_distance,double
fare_amount,double


In [0]:
%sql
create or refresh materialized view weekly_stats
as select 
date_trunc('week',tpep_pickup_datetime) as week,
avg(fare_amount) as avg_amount,
avg(trip_distance) as avg_distance
from live.taxi_raw_records
group by week
order by week asc

Name,Type
week,timestamp
avg_amount,double
avg_distance,double


In [0]:
%sql
create or replace materialized view top_m
as select
weekly_stats.week,
round(avg_amount,2) as avga,
round(avg_distance,3) as avgd,
fare_amount,trip_distance,zip
from live.flagged_rides
left join live.weekly_stats on flagged_rides.week=weekly_stats.week
order by fare_amount desc;


Name,Type
week,timestamp
avga,double
avgd,double
fare_amount,double
trip_distance,double
zip,int
