In [18]:
-- Rider type by category overall

select user_type,
count(*) / (cast(sum(count(*)) over() as float)) as percentage
from bluebikes_2019
group by user_type

user_type,percentage
Customer,0.211719392024775
Subscriber,0.7882806079752249


In [19]:
-- Rider type by category by quarter

with q as(

select user_type,
ntile(4) over(order by start_time) as quarter

from bluebikes_2018)

select user_type, quarter,
count(*) as rides,
count(*) / (cast(sum(count(*)) over(partition by quarter) as float)) as percentage

from q

group by quarter, user_type
order by quarter, percentage desc

user_type,quarter,rides,percentage
Subscriber,1,386792,0.8751900658895083
Customer,1,55160,0.1248099341104916
Subscriber,2,339276,0.7676761277242777
Customer,2,102676,0.2323238722757222
Subscriber,3,333140,0.7537939726349754
Customer,3,108811,0.2462060273650246
Subscriber,4,377469,0.8540969473991461
Customer,4,64482,0.1459030526008539


In [58]:
-- Rider type by category by month

with q as(

select user_type,
ntile(12) over(order by start_time) as month

from bluebikes_2018)

select user_type, month,
count(*) as rides,
round(count(*) / sum(count(*)) over(partition by month), 4) * 100 as percentage

from q

group by month, user_type
order by month, percentage desc

user_type,month,rides,percentage
Subscriber,1,140442,95.33
Customer,1,6876,4.67
Subscriber,2,127292,86.41
Customer,2,20026,13.59
Subscriber,3,119059,80.82
Customer,3,28258,19.18
Subscriber,4,116703,79.22
Customer,4,30614,20.78
Subscriber,5,112285,76.22
Customer,5,35032,23.78


In [10]:
-- Rider type by gender by month

with q as(
select
case when user_gender = 1 then 'male' else 'female' end as gender,
ntile(12) over(order by start_time) as month

from bluebikes_2018)

select gender, month,
count(*) as rides,
round(count(*) / sum(count(*)) over(partition by month), 4) * 100 as percentage

from q

group by month, gender
order by month, percentage desc

gender,month,rides,percentage
male,1,107996,73.31
female,1,39322,26.69
male,2,96758,65.68
female,2,50560,34.32
male,3,93375,63.38
female,3,53942,36.62
male,4,91260,61.95
female,4,56057,38.05
male,5,89239,60.58
female,5,58078,39.42


In [32]:
select * from bluebikes_2018
limit 10

bike_id,start_time,end_time,start_station_id,end_station_id,user_type,user_birth_year,user_gender
2749,2018-10-01T00:01:57.186000,2018-10-01T00:06:34.232000,67,189,Subscriber,1996,1
3416,2018-10-01T00:03:22.310000,2018-10-01T00:19:11.505000,45,103,Subscriber,1992,1
3647,2018-10-01T00:05:49.220000,2018-10-01T00:51:18.540000,160,160,Customer,1969,0
3673,2018-10-01T00:06:21.292000,2018-10-01T00:49:45.876000,160,160,Customer,1969,0
3065,2018-10-01T00:06:46.653000,2018-10-01T00:20:22.714000,4,190,Subscriber,1998,2
3301,2018-10-01T00:06:53.431000,2018-10-01T00:17:11.447000,103,233,Subscriber,1996,1
3607,2018-10-01T00:08:20.504000,2018-10-01T00:10:46.318000,74,97,Subscriber,1999,1
3905,2018-10-01T00:08:24.845000,2018-10-01T00:33:18.243000,33,120,Subscriber,1982,1
3434,2018-10-01T00:08:54.932000,2018-10-01T00:24:57.315000,11,70,Customer,1969,0
3395,2018-10-01T00:08:59.323000,2018-10-01T00:28:35.436000,134,106,Customer,1997,1


In [44]:
-- Riders by age group

select 
count(1) as riders,
-- 2018 - trunc(cast(user_birth_year as numeric), 0), # this is for sanity testing
case 
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '12 years' then 'under_12'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '20 years' then 'teenager'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '30 years' then '20s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '40 years' then '30s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '50 years' then '40s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '60 years' then '50s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '70 years' then '60s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '80 years' then '70s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
else 'older' end as age_group,
round(count(1) / sum(count(1)) over(), 4) * 100 as percentage
from bluebikes_2018
group by age_group
order by percentage desc
-- limit 500

riders,age_group,percentage
699781,20s,39.58
465235,30s,26.32
383585,40s,21.7
122924,50s,6.95
48958,60s,2.77
30925,teenager,1.75
9994,older,0.57
6125,70s,0.35
279,80s,0.02


In [18]:
-- Riders by gender and age group

select count(1) as riders,
       case
           when user_gender = 1 then 'male'
           when user_gender = 2 then 'female'
           else 'unspecified'
       end as gender, -- 2018 - trunc(cast(user_birth_year as numeric), 0), # this is for sanity testing
case
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '12 years' then 'under_12'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '20 years' then 'teenager'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '30 years' then '20s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '40 years' then '30s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '50 years' then '40s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '60 years' then '50s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '70 years' then '60s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '80 years' then '70s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
    when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
    else 'older'
                                                                                                        end as age_group,
                                                                                                        round(count(1) / sum(count(1)) over(), 4) * 100 as percentage
                                                                                                        -- round(count(1) / sum(count(1)) over(partition by age_group), 4) * 100 as age_group_percent
from bluebikes_2018
group by age_group,
         gender
order by age_group,
         gender -- limit 500

riders,gender,age_group,percentage
198274,female,20s,11.22
497868,male,20s,28.16
3639,unspecified,20s,0.21
111795,female,30s,6.32
349456,male,30s,19.77
3984,unspecified,30s,0.23
35141,female,40s,1.99
139249,male,40s,7.88
209195,unspecified,40s,11.83
32777,female,50s,1.85


In [21]:
-- Riders by gender and age group WORKING!!

select riders,
       gender,
       age_group,
       total_percent,
       round(riders / sum(riders) over(partition by age_group), 4) * 100 as age_group_percent
from
    (select count(1) as riders,
            case
                when user_gender = 1 then 'male'
                when user_gender = 2 then 'female'
                else 'unspecified'
            end as gender, -- 2018 - trunc(cast(user_birth_year as numeric), 0), # this is for sanity testing
 case
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '12 years' then 'under_12'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '20 years' then 'teenager'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '30 years' then '20s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '40 years' then '30s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '50 years' then '40s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '60 years' then '50s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '70 years' then '60s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '80 years' then '70s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
     when (trunc(cast(user_birth_year as numeric), 0) || '-01-01')::timestamp > '2018-01-01'::timestamp - interval '90 years' then '80s'
     else 'older'
 end as age_group,
 round(count(1) / sum(count(1)) over(), 4) * 100 as total_percent
     from bluebikes_2018
     group by age_group,
              gender
     order by age_group,
              gender) big
group by age_group,
         gender,
         riders,
         total_percent
order by age_group,
         gender

riders,gender,age_group,total_percent,age_group_percent
198274,female,20s,11.22,28.33
497868,male,20s,28.16,71.15
3639,unspecified,20s,0.21,0.52
111795,female,30s,6.32,24.03
349456,male,30s,19.77,75.11
3984,unspecified,30s,0.23,0.86
35141,female,40s,1.99,9.16
139249,male,40s,7.88,36.3
209195,unspecified,40s,11.83,54.54
32777,female,50s,1.85,26.66


In [53]:
select * from bluebikes_2018
order by start_time
limit 10

bike_id,start_time,end_time,start_station_id,end_station_id,user_type,user_birth_year,user_gender
643,2018-01-01T00:16:33,2018-01-01T00:23:01,178,107,Subscriber,1992.0,2
1581,2018-01-01T00:42:00,2018-01-01T00:46:25,78,225,Subscriber,1990.0,1
173,2018-01-01T00:42:44,2018-01-01T01:02:11,16,76,Subscriber,1990.0,1
1772,2018-01-01T00:56:50,2018-01-01T01:11:06,69,177,Subscriber,1992.0,1
1183,2018-01-01T01:07:54,2018-01-01T01:16:02,4,46,Subscriber,1993.0,1
1793,2018-01-01T01:15:59,2018-01-01T01:17:21,31,186,Subscriber,1990.0,1
60,2018-01-01T01:35:37,2018-01-01T01:42:35,51,222,Subscriber,1986.0,1
371,2018-01-01T01:35:46,2018-01-01T01:45:11,189,178,Subscriber,1980.0,1
961,2018-01-01T01:45:04,2018-01-01T01:53:40,108,176,Subscriber,1989.0,1
1286,2018-01-01T01:56:58,2018-01-01T02:06:13,185,178,Subscriber,1989.0,1


In [45]:
-- Percentage by gender

select
case when user_gender = 1 then 'male' 
    when user_gender = 2 then 'female'
    else 'unspecified' end as gender,
count(1),
round(count(1) / sum(count(1)) over(), 6) * 100 as percentage
from bluebikes_2018
group by user_gender

gender,count,percentage
unspecified,227677,12.8791
male,1141735,64.5849
female,398394,22.5361
