### Trip-related Queries

#### Analyze how much time is spent per ride/trip

In [0]:
%sql
SELECT   
  d.season, d.day_of_week, d.time_of_day, d.is_weekend,  
  f.age_trip,  
  r.is_member,  
  ss.name as start_station,  
  es.name as end_station,  
  f.duration_in_minutes  
FROM  
  gold.fact_trips f  
INNER JOIN  
  gold.dim_date_trips d ON d.date_key = f.date_key  
INNER JOIN  
  gold.dim_riders r ON r.rider_key = f.rider_key  
INNER JOIN  
  gold.dim_stations ss ON ss.station_key = f.start_station_key  
INNER JOIN  
  gold.dim_stations es ON es.station_key = f.end_station_key
LIMIT 1000

season,day_of_week,time_of_day,is_weekend,age_trip,is_member,start_station,end_station,duration_in_minutes
Winter,2,Night,0,29,False,Canal St & Adams St,Desplaines St & Jackson Blvd,3.02
Winter,2,Night,0,36,True,Clinton St & Jackson Blvd,Rush St & Cedar St,21.7
Winter,2,Night,0,25,True,California Ave & Fletcher St,Maplewood Ave & Peterson Ave,40.68
Winter,2,Night,0,36,True,Halsted St & Wrightwood Ave,Broadway & Waveland Ave,7.07
Winter,2,Night,0,38,True,California Ave & Cortez St,California Ave & Cortez St,19.42
Winter,2,Night,0,41,True,Wood St & Taylor St (Temp),Racine Ave & 18th St,11.97
Winter,2,Morning,0,31,False,Greenview Ave & Diversey Pkwy,Wood St & Milwaukee Ave,21.92
Winter,2,Morning,0,30,False,Clarendon Ave & Junior Ter,Broadway & Waveland Ave,7.08
Winter,2,Morning,0,19,True,Sedgwick St & Webster Ave,Wells St & Elm St,9.95
Winter,2,Morning,0,53,False,Orleans St & Hubbard St,Wabash Ave & Grand Ave,5.58


#### Date and time factors such as day of week and time of day

In [0]:
%sql
SELECT  
    d.day_of_week,     
    d.time_of_day,     
    AVG(f.duration_in_minutes) as avg_duration  
FROM    
    gold.fact_trips f    
INNER JOIN    
    gold.dim_date_trips d ON d.date_key = f.date_key    
GROUP BY    
    d.day_of_week,     
    d.time_of_day  
ORDER BY  
    d.day_of_week,     
    CASE d.time_of_day   
        WHEN 'Morning' THEN 1   
        WHEN 'Afternoon' THEN 2   
        WHEN 'Evening' THEN 3   
        WHEN 'Night' THEN 4   
        ELSE 5   
    END  


day_of_week,time_of_day,avg_duration
1,Morning,26.295212
1,Afternoon,28.384431
1,Evening,26.778832
1,Night,29.165728
2,Morning,17.161856
2,Afternoon,21.911332
2,Evening,19.673188
2,Night,25.948018
3,Morning,15.101351
3,Afternoon,18.982752


In [0]:
%sql
SELECT  
    d.season,  
    d.time_of_day,     
    d.is_weekend,     
    AVG(f.duration_in_minutes) as avg_duration  
FROM    
    gold.fact_trips f    
INNER JOIN    
    gold.dim_date_trips d ON d.date_key = f.date_key    
GROUP BY    
    d.season,  
    d.time_of_day,  
    d.is_weekend  
ORDER BY  
    CASE d.season   
        WHEN 'Spring' THEN 1   
        WHEN 'Summer' THEN 2   
        WHEN 'Autumn' THEN 3   
        WHEN 'Winter' THEN 4   
        ELSE 5   
    END,  
    CASE d.time_of_day   
        WHEN 'Morning' THEN 1   
        WHEN 'Afternoon' THEN 2   
        WHEN 'Evening' THEN 3   
        WHEN 'Night' THEN 4   
        ELSE 5   
    END,  
    d.is_weekend  


season,time_of_day,is_weekend,avg_duration
Spring,Morning,0,16.828798
Spring,Morning,1,25.719781
Spring,Afternoon,0,22.924248
Spring,Afternoon,1,31.866273
Spring,Evening,0,22.083272
Spring,Evening,1,30.46065
Spring,Night,0,26.280684
Spring,Night,1,33.30516
Summer,Morning,0,17.177329
Summer,Morning,1,25.51993


#### Which station is the starting and / or ending station?

In [0]:
%sql
SELECT     
    ss.name as start_station,     
    es.name as end_station,     
    AVG(f.duration_in_minutes) as avg_duration    
FROM    
    gold.fact_trips f    
INNER JOIN    
    gold.dim_stations ss ON ss.station_key = f.start_station_key    
INNER JOIN    
    gold.dim_stations es ON es.station_key = f.end_station_key    
GROUP BY    
    ss.name,     
    es.name  


start_station,end_station,avg_duration
Broadway & Granville Ave,Clark St & Grace St,24.666033
Sedgwick St & North Ave,Michigan Ave & 18th St,31.4
Wolcott Ave & Polk St,Loomis St & Archer Ave,20.724
Daley Center Plaza,Dearborn St & Van Buren St,11.089395
Morgan St & Polk St,Clinton St & Jackson Blvd,9.497896
Fairbanks St & Superior St,Canal St & Adams St,17.055783
Sedgwick St & Webster Ave,Canal St & Madison St,24.103158
Winchester (Ravenswood) Ave & Balmoral Ave,Clark St & Bryn Mawr Ave,7.670149
Leavitt St & Chicago Ave,Wolcott Ave & Polk St,26.999783
LaSalle St & Illinois St,Clinton St & Washington Blvd,10.797766


#### Age of the rider at time of the ride

In [0]:
%sql
SELECT     
    f.age_trip,     
    AVG(f.duration_in_minutes) as avg_duration    
FROM    
    gold.fact_trips f    
GROUP BY    
    f.age_trip  
ORDER by f.age_trip  


age_trip,avg_duration
14,13.013
15,23.40943
16,21.567564
17,21.946248
18,20.922264
19,22.549064
20,21.531098
21,22.706549
22,22.120647
23,21.602666


In [0]:
%sql
SELECT  
    CASE      
        WHEN f.age_trip BETWEEN 10 AND 19 THEN '10-19'    
        WHEN f.age_trip BETWEEN 20 AND 29 THEN '20-29'    
        WHEN f.age_trip BETWEEN 30 AND 39 THEN '30-39'    
        WHEN f.age_trip BETWEEN 40 AND 49 THEN '40-49'    
        WHEN f.age_trip BETWEEN 50 AND 59 THEN '50-59'    
        WHEN f.age_trip BETWEEN 60 AND 69 THEN '60-69'    
        WHEN f.age_trip BETWEEN 70 AND 79 THEN '70-79'    
        WHEN f.age_trip BETWEEN 80 AND 89 THEN '80-89'    
        ELSE 'Other'    
    END as age_range,    
    AVG(f.duration_in_minutes) as avg_duration      
FROM      
    gold.fact_trips f      
INNER JOIN      
    gold.dim_riders r ON r.rider_key = f.rider_key  
GROUP BY      
    CASE      
        WHEN f.age_trip BETWEEN 10 AND 19 THEN '10-19'    
        WHEN f.age_trip BETWEEN 20 AND 29 THEN '20-29'    
        WHEN f.age_trip BETWEEN 30 AND 39 THEN '30-39'    
        WHEN f.age_trip BETWEEN 40 AND 49 THEN '40-49'    
        WHEN f.age_trip BETWEEN 50 AND 59 THEN '50-59'    
        WHEN f.age_trip BETWEEN 60 AND 69 THEN '60-69'    
        WHEN f.age_trip BETWEEN 70 AND 79 THEN '70-79'    
        WHEN f.age_trip BETWEEN 80 AND 89 THEN '80-89'    
        ELSE 'Other'    
    END  
ORDER BY    
    age_range  


age_range,avg_duration
10-19,21.95564
20-29,21.867833
30-39,21.942706
40-49,21.413517
50-59,21.321178
60-69,21.644246
70-79,19.707701


#### Rider is a member or a casual rider

In [0]:
%sql
SELECT     
    r.is_member,     
    AVG(f.duration_in_minutes) as avg_duration  
FROM    
    gold.fact_trips f    
INNER JOIN    
    gold.dim_riders r ON r.rider_key = f.rider_key    
GROUP BY    
    r.is_member  

is_member,avg_duration
True,21.904424
False,21.323787


### Payment-related Queries

#### Overall spent per year, quarter, month

In [0]:
%sql
SELECT  
    d.year, d.quarter, d.month,  
    SUM(f.amount_dollar) as amount_dollar  
FROM  
    gold.fact_payments f  
INNER JOIN  
    gold.dim_date_payments d ON d.date_key = f.date_key  
GROUP BY  
    d.year, d.quarter, d.month  
ORDER BY  
    d.year, d.quarter, d.month  


year,quarter,month,amount_dollar
2013,1,2,12.9
2013,1,3,817.75
2013,2,4,1672.65
2013,2,5,2716.71
2013,2,6,3775.3
2013,3,7,4760.96
2013,3,8,5834.3
2013,3,9,6672.1
2013,4,10,7886.12
2013,4,11,9195.45


#### Top riders per month

In [0]:
%sql
WITH RiderAmounts AS (    
    SELECT     
        d.year, d.quarter, d.month, r.rider_key,    
        sum(f.amount_dollar) as amount_dollar,    
        ROW_NUMBER() OVER(PARTITION BY d.year, d.quarter, d.month ORDER BY sum(f.amount_dollar) DESC) as rn    
    FROM    
        gold.fact_payments f    
    INNER JOIN    
        gold.dim_date_payments d ON d.date_key = f.date_key    
    INNER JOIN    
        gold.dim_riders r ON r.rider_key = f.rider_key    
    GROUP BY    
        d.year, d.quarter, d.month, r.rider_key    
)    
SELECT     
    year, quarter, month, rider_key, amount_dollar     
FROM     
    RiderAmounts     
WHERE     
    rn = 1    
ORDER BY    
    year, quarter, month  


year,quarter,month,rider_key,amount_dollar
2013,1,2,12687,12.9
2013,1,3,7243,24.92
2013,2,4,22100,23.4
2013,2,5,25353,24.82
2013,2,6,65498,24.75
2013,3,7,73973,24.9
2013,3,8,40202,24.84
2013,3,9,48580,24.98
2013,4,10,17404,24.8
2013,4,11,45576,24.98


#### Money spent per member based on rides/minutes per month

In [0]:
%sql
SELECT  
    ft.rider_key,  
    d.year,  
    d.month,  
    COUNT(1) as number_of_rides,  
    SUM(ft.duration_in_minutes) as total_minutes,  
    SUM(fp.amount_dollar) as money_spent  
FROM  
    gold.fact_trips ft  
INNER JOIN  
    gold.fact_payments fp ON fp.rider_key = ft.rider_key AND date_format(fp.date_key, 'yyyy-MM') = date_format(ft.date_key, 'yyyy-MM')  
INNER JOIN  
    gold.dim_date_payments d ON d.date_key = fp.date_key  
GROUP BY  
    ft.rider_key,  
    d.year,  
    d.month  
ORDER BY  
    d.year,  
    d.month,  
    money_spent DESC  
LIMIT 100  


rider_key,year,month,number_of_rides,total_minutes,money_spent
33666,2021,2,17,156.35,357.0
3564,2021,2,11,314.75,258.06
67815,2021,2,11,98.27,248.71
45395,2021,2,10,104.87,245.3
68942,2021,2,12,239.31,236.88
54174,2021,2,25,431.3,225.0
8673,2021,2,9,1583.35,221.58
5184,2021,2,10,139.1,218.3
63911,2021,2,9,180.23,217.62
26409,2021,2,11,154.36,213.18
