<a href="https://colab.research.google.com/github/hongwon2/BigQuery-Practice/blob/main/BigQuery%EB%A1%9C_%EB%B6%84%EC%84%9D%ED%95%9C_Hongwon's_Data_%EB%B8%94%EB%A1%9C%EA%B7%B8_%5B2%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# @title Setup
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

project = 'liquid-terra-362502' # Project ID inserted based on the query results selected to explore
location = 'asia-northeast3' # Location inserted based on the query results selected to explore
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()
auth.authenticate_user()

## 접속 요일과 시간대 알아보기

In [None]:
job = client.get_job('bquxjob_3e5f081c_18471802eaa')
print(job.query)

with time_stamp as (
select 
  timestamp_seconds(cast(cast(event_timestamp as INT64)/1000000 as INT64)) + interval '9' hour as event_time_stamp
from `analytics_310464334.events_*`
where _TABLE_SUFFIX between '20221009'and'20221112' and event_name = 'session_start'
order by event_time_stamp
),
access_log_with_dow as (
select 
  event_time_stamp,
  --일요일(0) ~ 토요일(6)
  extract(dayofweek from timestamp(event_time_stamp)) - 1 as dow, 
  --시간을 초로 변환
  cast(substring(cast(event_time_stamp as STRING),12,2) as INT64) * 60 * 60
  + cast(substring(cast(event_time_stamp as STRING),15,2) as INT64) * 60
  + cast(substring(cast(event_time_stamp as STRING),18,2) as INT64)
  as whole_seconds,
  --시간 간격 (1시간(3600초)) 지정
  60 * 60 as interval_seconds
from time_stamp
),
access_log_with_floor_seconds as (
select 
  event_time_stamp,
  dow,
  cast((floor(whole_seconds/interval_seconds) * interval_seconds) as INT64) as floor_seconds
from access_log_with_dow
),
access_log_with_index

In [None]:
job = client.get_job('bquxjob_3e5f081c_18471802eaa')
results = job.to_dataframe()
results

Unnamed: 0,index_time,sun,mon,tue,wed,thu,fri,sat
0,00:00:00,5,6,8,8,14,6,10
1,01:00:00,2,1,3,7,10,5,4
2,02:00:00,0,0,1,8,5,1,9
3,03:00:00,1,2,2,2,2,0,6
4,04:00:00,0,3,0,1,6,1,1
5,05:00:00,3,1,0,0,0,3,2
6,06:00:00,1,4,1,0,0,1,5
7,07:00:00,0,2,3,2,2,3,3
8,08:00:00,0,3,1,4,8,3,5
9,09:00:00,1,10,7,13,10,13,3


## 페이지 완독률 알아보기

In [None]:
job = client.get_job('bquxjob_1e3e5ea7_18474e71713')
print(job.query)

with 
page_view_cnt as (
  select 
    event_params.value.string_value as page,
    count(event_params.value.string_value) as cnt
  from `analytics_310464334.events_*`,UNNEST(event_params) AS event_params
  where _TABLE_SUFFIX between '20221009'and'20221112' and event_name = 'page_view' and key = 'page_location'
  group by event_params.value.string_value
),
page_scroll_cnt as (
  select
    event_params.value.string_value as page,
    count(event_params.value.string_value) as cnt
  from `analytics_310464334.events_*`,UNNEST(event_params) AS event_params
  where _TABLE_SUFFIX between '20221009'and'20221112' and event_name = 'scroll' and key = 'page_location'
  group by event_params.value.string_value
)
select 
  v.page as page,
  v.cnt as view_cnt,
  p.cnt as scroll_cnt,
  round(p.cnt / v.cnt * 100,2) as complete_reading_rate
from page_view_cnt as v join page_scroll_cnt as p
on v.page = p.page
where v.cnt >= 10
order by complete_reading_rate desc,view_cnt desc


In [None]:
job = client.get_job('bquxjob_1e3e5ea7_18474e71713')
results = job.to_dataframe()
results

Unnamed: 0,page,view_cnt,scroll_cnt,complete_reading_rate
0,https://khw742002.tistory.com/59,22,19,86.36
1,https://khw742002.tistory.com/33,19,16,84.21
2,https://khw742002.tistory.com/category/Special...,27,22,81.48
3,https://khw742002.tistory.com/40,10,8,80.0
4,https://khw742002.tistory.com/47,10,8,80.0
5,https://khw742002.tistory.com/55,10,8,80.0
6,https://khw742002.tistory.com/category/Special...,25,19,76.0
7,https://khw742002.tistory.com/category/ETC/AWS...,11,8,72.73
8,https://khw742002.tistory.com/category/Special...,14,10,71.43
9,https://khw742002.tistory.com/27,20,14,70.0


## 성장 지수

In [3]:
job = client.get_job('bquxjob_694255ab_1847b3cc130')
print(job.query)

with
unique_action_log as (
select distinct
  user_pseudo_id as user_id,
  substr(cast(timestamp_seconds(cast(cast(event_timestamp as INT64)/1000000 as INT64)) + interval '9' hour as string),1,10) as action_date
from `analytics_310464334.events_*`
where _TABLE_SUFFIX between '20221009'and'20221112'
), mst_calendar AS (
  -- 집계하고 싶은 기간을 캘린더 테이블로 생성
  -- generate_series 등으로 동적 생성도 가능
            SELECT '2022-10-09' AS dt
  UNION ALL SELECT '2022-10-10' AS dt
  UNION ALL SELECT '2022-10-11' AS dt
  UNION ALL SELECT '2022-10-12' AS dt
  UNION ALL SELECT '2022-10-13' AS dt
  UNION ALL SELECT '2022-10-14' AS dt
  UNION ALL SELECT '2022-10-15' AS dt
  UNION ALL SELECT '2022-10-16' AS dt
  UNION ALL SELECT '2022-10-17' AS dt
  UNION ALL SELECT '2022-10-18' AS dt
  UNION ALL SELECT '2022-10-19' AS dt
  UNION ALL SELECT '2022-10-20' AS dt
  UNION ALL SELECT '2022-10-21' AS dt
  UNION ALL SELECT '2022-10-22' AS dt
  UNION ALL SELECT '2022-10-23' AS dt
  UNION ALL SELECT '

In [4]:
job = client.get_job('bquxjob_694255ab_1847b3cc130')
results = job.to_dataframe()
results

Unnamed: 0,target_date,user_id,is_new,is_access,was_access
0,2022-10-23,1010569990.1659289160,1,1,
1,2022-10-24,1010569990.1659289160,0,0,1
2,2022-10-25,1010569990.1659289160,0,0,0
3,2022-10-26,1010569990.1659289160,0,0,0
4,2022-10-27,1010569990.1659289160,0,0,0
...,...,...,...,...,...
14230,2022-11-04,863412843.1666649078,0,0,0
14231,2022-11-05,863412843.1666649078,0,0,0
14232,2022-11-06,863412843.1666649078,0,0,0
14233,2022-11-07,863412843.1666649078,0,0,0


In [5]:
job = client.get_job('bquxjob_79d9f2f8_1847b4e44e3')
print(job.query)

with
unique_action_log as (
select distinct
  user_pseudo_id as user_id,
  substr(cast(timestamp_seconds(cast(cast(event_timestamp as INT64)/1000000 as INT64)) + interval '9' hour as string),1,10) as action_date
from `analytics_310464334.events_*`
where _TABLE_SUFFIX between '20221009'and'20221112'
), mst_calendar AS (
  -- 집계하고 싶은 기간을 캘린더 테이블로 생성
  -- generate_series 등으로 동적 생성도 가능
            SELECT '2022-10-09' AS dt
  UNION ALL SELECT '2022-10-10' AS dt
  UNION ALL SELECT '2022-10-11' AS dt
  UNION ALL SELECT '2022-10-12' AS dt
  UNION ALL SELECT '2022-10-13' AS dt
  UNION ALL SELECT '2022-10-14' AS dt
  UNION ALL SELECT '2022-10-15' AS dt
  UNION ALL SELECT '2022-10-16' AS dt
  UNION ALL SELECT '2022-10-17' AS dt
  UNION ALL SELECT '2022-10-18' AS dt
  UNION ALL SELECT '2022-10-19' AS dt
  UNION ALL SELECT '2022-10-20' AS dt
  UNION ALL SELECT '2022-10-21' AS dt
  UNION ALL SELECT '2022-10-22' AS dt
  UNION ALL SELECT '2022-10-23' AS dt
  UNION ALL SELECT '

In [6]:
job = client.get_job('bquxjob_79d9f2f8_1847b4e44e3')
results = job.to_dataframe()
results

Unnamed: 0,target_date,first_visit,reactivation,deactivation,growth_index
0,2022-10-09,15,0,0,15
1,2022-10-10,14,0,-11,3
2,2022-10-11,21,0,-18,3
3,2022-10-12,31,0,-20,11
4,2022-10-13,34,3,-31,6
5,2022-10-14,22,0,-38,-16
6,2022-10-15,15,0,-20,-5
7,2022-10-16,11,0,-17,-6
8,2022-10-17,38,1,-11,28
9,2022-10-18,33,0,-39,-6
