In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import json
import csv
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
from google.cloud import bigquery
from sqlalchemy import create_engine
from dotenv import load_dotenv
from scipy import stats
%matplotlib inline

In [2]:
load_dotenv(".env2")

host=os.environ.get('DB_HOST')
port=os.environ.get('DB_PORT')
database=os.environ.get('DB_DATABASE')
user=os.environ.get('DB_USER')
password=os.environ.get('DB_PASSWORD')
project_id = os.environ.get('PROJECT_ID')

In [3]:
conn_string = f"postgresql://{user}:{password}@{host}/{database}"
postgres_engine = create_engine(conn_string)

In [4]:
bigquery_id = project_id

# Initialize the BigQuery client
client = bigquery.Client(project=bigquery_id)

# SQL query to get a sample of the data
query = """
select *
from `bigquery-public-data.google_analytics_sample.ga_sessions_*`
where _table_suffix between '20161201' and '20161231'
"""

# Run the query
query_job = client.query(query)
results = query_job.result()

# Convert the results to a pandas DataFrame
df_original = results.to_dataframe()
df_original

Unnamed: 0,visitorId,visitNumber,visitId,visitStartTime,date,totals,trafficSource,device,geoNetwork,customDimensions,hits,fullVisitorId,userId,clientId,channelGrouping,socialEngagementType
0,,1,1482735558,1482735558,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 12, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Asia', 'subContinent': 'Souther...","[{'index': 4, 'value': 'APAC'}]","[{'hitNumber': 1, 'time': 0, 'hour': 22, 'minu...",0872037965610022777,,,Organic Search,Not Socially Engaged
1,,1,1482680435,1482680435,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 11, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Asia', 'subContinent': 'Souther...","[{'index': 4, 'value': 'APAC'}]","[{'hitNumber': 1, 'time': 0, 'hour': 7, 'minut...",3650567674170993427,,,Organic Search,Not Socially Engaged
2,,1,1482679045,1482679045,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 13, 'ti...","{'referralPath': '/', 'campaign': '(not set)',...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 7, 'minut...",7595424386639223686,,,Referral,Not Socially Engaged
3,,1,1482725794,1482725794,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 15, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Safari', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 20, 'minu...",9566340125561170915,,,Direct,Not Socially Engaged
4,,1,1482727701,1482727701,20161225,"{'visits': 1, 'hits': 21, 'pageviews': 18, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Safari', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 20, 'minu...",2742591188773845569,,,Organic Search,Not Socially Engaged
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79119,,1,1481204631,1481204631,20161208,"{'visits': 1, 'hits': 1, 'pageviews': 1, 'time...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 5, 'minut...",5518235530708170012,,,Organic Search,Not Socially Engaged
79120,,1,1481219514,1481219514,20161208,"{'visits': 1, 'hits': 1, 'pageviews': 1, 'time...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Asia', 'subContinent': 'Souther...","[{'index': 4, 'value': 'APAC'}]","[{'hitNumber': 1, 'time': 0, 'hour': 9, 'minut...",3133531130777311663,,,Organic Search,Not Socially Engaged
79121,,6,1481174969,1481174969,20161207,"{'visits': 1, 'hits': 110, 'pageviews': 80, 't...","{'referralPath': '/', 'campaign': '(not set)',...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 21, 'minu...",9713078881816410558,,,Referral,Not Socially Engaged
79122,,4,1481150112,1481150112,20161207,"{'visits': 1, 'hits': 125, 'pageviews': 92, 't...","{'referralPath': '/', 'campaign': '(not set)',...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 14, 'minu...",7036011728115463948,,,Referral,Not Socially Engaged


In [5]:
df = df_original.copy()
df.head()

Unnamed: 0,visitorId,visitNumber,visitId,visitStartTime,date,totals,trafficSource,device,geoNetwork,customDimensions,hits,fullVisitorId,userId,clientId,channelGrouping,socialEngagementType
0,,1,1482735558,1482735558,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 12, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Asia', 'subContinent': 'Souther...","[{'index': 4, 'value': 'APAC'}]","[{'hitNumber': 1, 'time': 0, 'hour': 22, 'minu...",872037965610022777,,,Organic Search,Not Socially Engaged
1,,1,1482680435,1482680435,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 11, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Asia', 'subContinent': 'Souther...","[{'index': 4, 'value': 'APAC'}]","[{'hitNumber': 1, 'time': 0, 'hour': 7, 'minut...",3650567674170993427,,,Organic Search,Not Socially Engaged
2,,1,1482679045,1482679045,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 13, 'ti...","{'referralPath': '/', 'campaign': '(not set)',...","{'browser': 'Chrome', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 7, 'minut...",7595424386639223686,,,Referral,Not Socially Engaged
3,,1,1482725794,1482725794,20161225,"{'visits': 1, 'hits': 15, 'pageviews': 15, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Safari', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 20, 'minu...",9566340125561170915,,,Direct,Not Socially Engaged
4,,1,1482727701,1482727701,20161225,"{'visits': 1, 'hits': 21, 'pageviews': 18, 'ti...","{'referralPath': None, 'campaign': '(not set)'...","{'browser': 'Safari', 'browserVersion': 'not a...","{'continent': 'Americas', 'subContinent': 'Nor...","[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 20, 'minu...",2742591188773845569,,,Organic Search,Not Socially Engaged


In [6]:
normalized_data = []

for i in df.index:    
    df_hits = df['hits'][i]
    df_hits = pd.json_normalize(df_hits)
    df_hits.columns = [f"hits_{col}" for col in df_hits.columns]
    df_hits.insert(0, 'visit_id', df['visitId'][i])  # Insert the 'visitId' column as the first column
    normalized_data.append(df_hits)

pd.set_option('display.max_columns', None)

result_df = pd.concat(normalized_data)
result_df

Unnamed: 0,visit_id,hits_hitNumber,hits_time,hits_hour,hits_minute,hits_isSecure,hits_isInteraction,hits_isEntrance,hits_isExit,hits_referer,hits_transaction,hits_item,hits_contentInfo,hits_eventInfo,hits_product,hits_promotion,hits_promotionActionInfo,hits_refund,hits_experiment,hits_publisher,hits_customVariables,hits_customDimensions,hits_customMetrics,hits_type,hits_latencyTracking,hits_sourcePropertyInfo,hits_dataSource,hits_publisher_infos,hits_page.pagePath,hits_page.hostname,hits_page.pageTitle,hits_page.searchKeyword,hits_page.searchCategory,hits_page.pagePathLevel1,hits_page.pagePathLevel2,hits_page.pagePathLevel3,hits_page.pagePathLevel4,hits_appInfo.name,hits_appInfo.version,hits_appInfo.id,hits_appInfo.installerId,hits_appInfo.appInstallerId,hits_appInfo.appName,hits_appInfo.appVersion,hits_appInfo.appId,hits_appInfo.screenName,hits_appInfo.landingScreenName,hits_appInfo.exitScreenName,hits_appInfo.screenDepth,hits_exceptionInfo.description,hits_exceptionInfo.isFatal,hits_exceptionInfo.exceptions,hits_exceptionInfo.fatalExceptions,hits_eCommerceAction.action_type,hits_eCommerceAction.step,hits_eCommerceAction.option,hits_social.socialInteractionNetwork,hits_social.socialInteractionAction,hits_social.socialInteractions,hits_social.socialInteractionTarget,hits_social.socialNetwork,hits_social.uniqueSocialInteractions,hits_social.hasSocialSourceReferral,hits_social.socialInteractionNetworkAction,hits_contentGroup.contentGroup1,hits_contentGroup.contentGroup2,hits_contentGroup.contentGroup3,hits_contentGroup.contentGroup4,hits_contentGroup.contentGroup5,hits_contentGroup.previousContentGroup1,hits_contentGroup.previousContentGroup2,hits_contentGroup.previousContentGroup3,hits_contentGroup.previousContentGroup4,hits_contentGroup.previousContentGroup5,hits_contentGroup.contentGroupUniqueViews1,hits_contentGroup.contentGroupUniqueViews2,hits_contentGroup.contentGroupUniqueViews3,hits_contentGroup.contentGroupUniqueViews4,hits_contentGroup.contentGroupUniqueViews5,hits_promotionActionInfo.promoIsView,hits_promotionActionInfo.promoIsClick,hits_transaction.transactionId,hits_transaction.transactionRevenue,hits_transaction.transactionTax,hits_transaction.transactionShipping,hits_transaction.affiliation,hits_transaction.currencyCode,hits_transaction.localTransactionRevenue,hits_transaction.localTransactionTax,hits_transaction.localTransactionShipping,hits_transaction.transactionCoupon,hits_item.transactionId,hits_item.productName,hits_item.productCategory,hits_item.productSku,hits_item.itemQuantity,hits_item.itemRevenue,hits_item.currencyCode,hits_item.localItemRevenue,hits_eventInfo.eventCategory,hits_eventInfo.eventAction,hits_eventInfo.eventLabel,hits_eventInfo.eventValue,hits_latencyTracking.pageLoadSample,hits_latencyTracking.pageLoadTime,hits_latencyTracking.pageDownloadTime,hits_latencyTracking.redirectionTime,hits_latencyTracking.speedMetricsSample,hits_latencyTracking.domainLookupTime,hits_latencyTracking.serverConnectionTime,hits_latencyTracking.serverResponseTime,hits_latencyTracking.domLatencyMetricsSample,hits_latencyTracking.domInteractiveTime,hits_latencyTracking.domContentLoadedTime,hits_latencyTracking.userTimingValue,hits_latencyTracking.userTimingSample,hits_latencyTracking.userTimingVariable,hits_latencyTracking.userTimingCategory,hits_latencyTracking.userTimingLabel
0,1482735558,1,0,22,59,,True,True,,https://www.google.co.in/,,,,,[],[],,,[],,[],[],[],PAGE,,,,[],/home,www.googlemerchandisestore.com,Google Online Store,,,/home,,,,,,,,,,,,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/yourinfo.html,0,,True,,,0,1,,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(entrance),(entrance),(entrance),(entrance),(entrance),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1482735558,2,59004,23,0,,True,,,https://www.google.co.in/,,,,,[],[],,,[],,[],[],[],PAGE,,,,[],/home,www.googlemerchandisestore.com,Google Online Store,,,/home,,,,,,,,,,,,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/yourinfo.html,0,,True,,,0,1,,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1482735558,3,75732,23,0,,True,,,,,,,,[],"[{'promoId': 'Apparel Row 1', 'promoName': 'Ap...",,,[],,[],[],[],PAGE,,,,[],/home,shop.googlemerchandisestore.com,Home,,,/home,,,,,,,,,,,,shop.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/yourinfo.html,0,,True,,,0,1,,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),(not set),,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1482735558,4,83614,23,0,,True,,,,,,,,"[{'productSKU': 'GGOEGAAX0358', 'v2ProductName...",[],,,[],,[],[],[],PAGE,,,,[],/google+redesign/apparel/men++s/men++s+outerwear,shop.googlemerchandisestore.com,Men's Outerwear | Apparel | Google Merchandise...,,,/google+redesign/,/apparel/,/men++s/,/men++s+outerwear,,,,,,,,,shop.googlemerchandisestore.com/google+redesig...,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/yourinfo.html,0,,True,,,0,1,,,,,,(not set),,No,:,(not set),Apparel,Mens,(not set),(not set),(not set),(not set),(not set),(not set),(not set),,1.0,1.0,,,,,,,,,,USD,,,,,,,,,,,USD,,,,,,,,,,,,,,,,,,,,,
4,1482735558,5,88999,23,0,,True,,,,,,,,"[{'productSKU': 'GGOEGAAX0313', 'v2ProductName...",[],,,[],,[],[],[],EVENT,,,,[],/google+redesign/apparel/men++s/men++s+outerwear,shop.googlemerchandisestore.com,Men's Outerwear | Apparel | Google Merchandise...,,,/google+redesign/,/apparel/,/men++s/,/men++s+outerwear,,,,,,,,,shop.googlemerchandisestore.com/google+redesig...,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/yourinfo.html,0,,True,,,1,1,,,,,,(not set),,No,:,(not set),Apparel,Mens,(not set),(not set),(not set),Apparel,Mens,(not set),(not set),,,,,,,,,,,,,,,,,,,,,,,,,,Enhanced Ecommerce,Quickview Click,Google Tri-blend Hoodie Grey,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,1481132948,149,2905730,10,37,,True,,,,,,,,"[{'productSKU': 'GGOEGFKQ020399', 'v2ProductNa...",[],,,[],,[],[],[],PAGE,,,,[],/payment.html,shop.googlemerchandisestore.com,Payment Method,,,/payment.html,,,,,,,,,,,,shop.googlemerchandisestore.com/payment.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/ordercompleted...,0,,True,,,5,2,Payment,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),Bags,(not set),(not set),(not set),,,,,,,,,,,,,USD,,,,,,,,,,,USD,,,,,,,,,,,,,,,,,,,,,
149,1481132948,150,2948131,10,38,,True,,,,,,,,"[{'productSKU': 'GGOEGFKQ020399', 'v2ProductNa...",[],,,[],,[],[],[],PAGE,,,,[],/payment.html,shop.googlemerchandisestore.com,Payment Method,,,/payment.html,,,,,,,,,,,,shop.googlemerchandisestore.com/payment.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/ordercompleted...,0,,True,,,5,2,Payment,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),Bags,(not set),(not set),(not set),,,,,,,,,,,,,USD,,,,,,,,,,,USD,,,,,,,,,,,,,,,,,,,,,
150,1481132948,151,3029563,10,39,,True,,,,,,,,"[{'productSKU': 'GGOEGFKQ020399', 'v2ProductNa...",[],,,[],,[],[],[],PAGE,,,,[],/revieworder.html,shop.googlemerchandisestore.com,Checkout Review,,,/revieworder.html,,,,,,,,,,,,shop.googlemerchandisestore.com/revieworder.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/ordercompleted...,0,,True,,,5,3,Review,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),Bags,(not set),(not set),(not set),,,,,,,,,,,,,USD,,,,,,,,,,,USD,,,,,,,,,,,,,,,,,,,,,
151,1481132948,152,3072642,10,40,,True,,,,,,,,"[{'productSKU': 'GGOEGFKQ020399', 'v2ProductNa...",[],,,[],,[],[],[],PAGE,,,,[],/ordercompleted.html,shop.googlemerchandisestore.com,Checkout Confirmation,,,/ordercompleted.html,,,,,,,,,,,,shop.googlemerchandisestore.com/ordercompleted...,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/ordercompleted...,0,,True,,,6,1,,,,,,(not set),,No,:,(not set),(not set),(not set),(not set),(not set),(not set),Bags,(not set),(not set),(not set),,,,,,,,ORD201612072868,1.243850e+09,148770000.0,423240000.0,Google Merchandise Store,USD,1.243850e+09,148770000.0,423240000.0,,ORD201612072868,,,,,,USD,,,,,,,,,,,,,,,,,,,,,


In [106]:
result_df2 = result_df.copy()

normalized_data2 = []

for i in result_df2.index:    
    df_product = result_df2['hits_product'][i]
    df_product = pd.json_normalize(df_product)
    df_product.columns = [f"product_{col}" for col in df_product.columns]
    df_product.insert(0, 'visit_id', df['visitId'][i])  # Insert the 'visitId' column as the first column
    normalized_data2.append(df_product)

df_product = pd.concat(normalized_data2)
df_product

In [96]:
hits_cols = result_df.columns

for cols in hits_cols:
    print(cols)

visit_id
hits_hitNumber
hits_time
hits_hour
hits_minute
hits_isSecure
hits_isInteraction
hits_isEntrance
hits_isExit
hits_referer
hits_transaction
hits_item
hits_contentInfo
hits_eventInfo
hits_product
hits_promotion
hits_promotionActionInfo
hits_refund
hits_experiment
hits_publisher
hits_customVariables
hits_customDimensions
hits_customMetrics
hits_type
hits_latencyTracking
hits_sourcePropertyInfo
hits_dataSource
hits_publisher_infos
hits_page.pagePath
hits_page.hostname
hits_page.pageTitle
hits_page.searchKeyword
hits_page.searchCategory
hits_page.pagePathLevel1
hits_page.pagePathLevel2
hits_page.pagePathLevel3
hits_page.pagePathLevel4
hits_appInfo.name
hits_appInfo.version
hits_appInfo.id
hits_appInfo.installerId
hits_appInfo.appInstallerId
hits_appInfo.appName
hits_appInfo.appVersion
hits_appInfo.appId
hits_appInfo.screenName
hits_appInfo.landingScreenName
hits_appInfo.exitScreenName
hits_appInfo.screenDepth
hits_exceptionInfo.description
hits_exceptionInfo.isFatal
hits_exceptionIn

In [21]:
select_cols = ['visit_id', 'hits_hitNumber', 'hits_type', 'hits_eCommerceAction.action_type', 'hits_time', 'hits_hour', 'hits_minute', 'hits_page.pagePath', 'hits_isEntrance', 
    'hits_isExit', 'hits_appInfo.screenName', 'hits_appInfo.landingScreenName', 'hits_appInfo.exitScreenName']
df_hits = df_hits[select_cols]
df_hits

Unnamed: 0,visit_id,hits_hitNumber,hits_type,hits_eCommerceAction.action_type,hits_time,hits_hour,hits_minute,hits_page.pagePath,hits_isEntrance,hits_isExit,hits_appInfo.screenName,hits_appInfo.landingScreenName,hits_appInfo.exitScreenName
0,1483087697,1,PAGE,0,0,0,48,/home,True,,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home
1,1483087697,2,PAGE,0,7534,0,48,/home,,,shop.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home
2,1483087697,3,PAGE,0,14983,0,48,/google+redesign/accessories/stickers,,,shop.googlemerchandisestore.com/google+redesig...,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home
3,1483087697,4,PAGE,0,38484,0,48,/google+redesign/bags,,,shop.googlemerchandisestore.com/google+redesig...,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home
4,1483087697,5,PAGE,0,52012,0,49,/google+redesign/electronics,,,shop.googlemerchandisestore.com/google+redesig...,www.googlemerchandisestore.com/home,www.googlemerchandisestore.com/home
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,1481598847,83,PAGE,0,1914226,19,46,/basket.html,,,shop.googlemerchandisestore.com/basket.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/basket.html
83,1481598847,84,PAGE,0,1948858,19,46,/basket.html,,,shop.googlemerchandisestore.com/basket.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/basket.html
84,1481598847,85,EVENT,4,2039666,19,48,/basket.html,,,shop.googlemerchandisestore.com/basket.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/basket.html
85,1481598847,86,PAGE,0,2046742,19,48,/basket.html,,,shop.googlemerchandisestore.com/basket.html,www.googlemerchandisestore.com/home,shop.googlemerchandisestore.com/basket.html


In [22]:
df_hits.to_sql("hits", postgres_engine, if_exists='replace', index=False)
print("Completed exporting to sql server")

Completed exporting to sql server


In [29]:
json_cols = ['totals', 'trafficSource', 'device', 'geoNetwork']

for column in json_cols:
    normalized_df = pd.json_normalize(df[column])
    normalized_df.columns = [f"{column}_{col}" for col in normalized_df.columns]
    df = pd.concat([df.drop(columns=column), normalized_df], axis=1)

df.head()

Unnamed: 0,visitorId,visitNumber,visitId,visitStartTime,date,customDimensions,hits,fullVisitorId,userId,clientId,channelGrouping,socialEngagementType,totals_visits,totals_hits,totals_pageviews,totals_timeOnSite,totals_bounces,totals_transactions,totals_transactionRevenue,totals_newVisits,totals_screenviews,totals_uniqueScreenviews,totals_timeOnScreen,totals_totalTransactionRevenue,totals_sessionQualityDim,trafficSource_referralPath,trafficSource_campaign,trafficSource_source,trafficSource_medium,trafficSource_keyword,trafficSource_adContent,trafficSource_isTrueDirect,trafficSource_campaignCode,trafficSource_adwordsClickInfo.campaignId,trafficSource_adwordsClickInfo.adGroupId,trafficSource_adwordsClickInfo.creativeId,trafficSource_adwordsClickInfo.criteriaId,trafficSource_adwordsClickInfo.page,trafficSource_adwordsClickInfo.slot,trafficSource_adwordsClickInfo.criteriaParameters,trafficSource_adwordsClickInfo.gclId,trafficSource_adwordsClickInfo.customerId,trafficSource_adwordsClickInfo.adNetworkType,trafficSource_adwordsClickInfo.targetingCriteria,trafficSource_adwordsClickInfo.isVideoAd,trafficSource_adwordsClickInfo.targetingCriteria.boomUserlistId,device_browser,device_browserVersion,device_browserSize,device_operatingSystem,device_operatingSystemVersion,device_isMobile,device_mobileDeviceBranding,device_mobileDeviceModel,device_mobileInputSelector,device_mobileDeviceInfo,device_mobileDeviceMarketingName,device_flashVersion,device_javaEnabled,device_language,device_screenColors,device_screenResolution,device_deviceCategory,geoNetwork_continent,geoNetwork_subContinent,geoNetwork_country,geoNetwork_region,geoNetwork_metro,geoNetwork_city,geoNetwork_cityId,geoNetwork_networkDomain,geoNetwork_latitude,geoNetwork_longitude,geoNetwork_networkLocation
0,,2,1483087697,1483087697,20161230,"[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 0, 'minut...",7385871189843591760,,,Organic Search,Not Socially Engaged,1,14,13.0,922.0,,,,,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Macintosh,not available in demo dataset,False,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,desktop,Americas,Northern America,United States,California,San Francisco-Oakland-San Jose CA,San Francisco,not available in demo dataset,comcastbusiness.net,not available in demo dataset,not available in demo dataset,not available in demo dataset
1,,1,1483086672,1483086672,20161230,[],"[{'hitNumber': 1, 'time': 0, 'hour': 0, 'minut...",7925884217149710796,,,Organic Search,Not Socially Engaged,1,14,11.0,301.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Windows,not available in demo dataset,False,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,desktop,Europe,Southern Europe,Macedonia (FYROM),not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,unknown.unknown,not available in demo dataset,not available in demo dataset,not available in demo dataset
2,,2,1483128188,1483128188,20161230,"[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 12, 'minu...",224580975515679767,,,Organic Search,Not Socially Engaged,1,14,9.0,412.0,,,,,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Safari,not available in demo dataset,not available in demo dataset,iOS,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,mobile,Americas,Northern America,United States,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,(not set),not available in demo dataset,not available in demo dataset,not available in demo dataset
3,,1,1483126904,1483126904,20161230,"[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 11, 'minu...",1712564087609515628,,,Organic Search,Not Socially Engaged,1,14,10.0,741.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Android,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,mobile,Americas,Northern America,United States,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,mesh.net,not available in demo dataset,not available in demo dataset,not available in demo dataset
4,,1,1483145960,1483145960,20161230,"[{'index': 4, 'value': 'North America'}]","[{'hitNumber': 1, 'time': 0, 'hour': 16, 'minu...",7139371055947807427,,,Paid Search,Not Socially Engaged,1,17,15.0,320.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,1.0,Top,not available in demo dataset,CjwKEAiAqJjDBRCG5KK6hq_juDwSJABRm03hIwoLAVmWAa...,,Google Search,,False,,Chrome,not available in demo dataset,not available in demo dataset,Android,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,tablet,Americas,Northern America,United States,New York,New York NY,New York,not available in demo dataset,verizon.net,not available in demo dataset,not available in demo dataset,not available in demo dataset


In [30]:
pd.set_option('display.max_columns', None)  # None means unlimited columns

In [31]:
drop_cols = ['visitorId', 'customDimensions', 'hits']
df = df.drop(columns = drop_cols)
df.head()

Unnamed: 0,visitNumber,visitId,visitStartTime,date,fullVisitorId,userId,clientId,channelGrouping,socialEngagementType,totals_visits,totals_hits,totals_pageviews,totals_timeOnSite,totals_bounces,totals_transactions,totals_transactionRevenue,totals_newVisits,totals_screenviews,totals_uniqueScreenviews,totals_timeOnScreen,totals_totalTransactionRevenue,totals_sessionQualityDim,trafficSource_referralPath,trafficSource_campaign,trafficSource_source,trafficSource_medium,trafficSource_keyword,trafficSource_adContent,trafficSource_isTrueDirect,trafficSource_campaignCode,trafficSource_adwordsClickInfo.campaignId,trafficSource_adwordsClickInfo.adGroupId,trafficSource_adwordsClickInfo.creativeId,trafficSource_adwordsClickInfo.criteriaId,trafficSource_adwordsClickInfo.page,trafficSource_adwordsClickInfo.slot,trafficSource_adwordsClickInfo.criteriaParameters,trafficSource_adwordsClickInfo.gclId,trafficSource_adwordsClickInfo.customerId,trafficSource_adwordsClickInfo.adNetworkType,trafficSource_adwordsClickInfo.targetingCriteria,trafficSource_adwordsClickInfo.isVideoAd,trafficSource_adwordsClickInfo.targetingCriteria.boomUserlistId,device_browser,device_browserVersion,device_browserSize,device_operatingSystem,device_operatingSystemVersion,device_isMobile,device_mobileDeviceBranding,device_mobileDeviceModel,device_mobileInputSelector,device_mobileDeviceInfo,device_mobileDeviceMarketingName,device_flashVersion,device_javaEnabled,device_language,device_screenColors,device_screenResolution,device_deviceCategory,geoNetwork_continent,geoNetwork_subContinent,geoNetwork_country,geoNetwork_region,geoNetwork_metro,geoNetwork_city,geoNetwork_cityId,geoNetwork_networkDomain,geoNetwork_latitude,geoNetwork_longitude,geoNetwork_networkLocation
0,2,1483087697,1483087697,20161230,7385871189843591760,,,Organic Search,Not Socially Engaged,1,14,13.0,922.0,,,,,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Macintosh,not available in demo dataset,False,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,desktop,Americas,Northern America,United States,California,San Francisco-Oakland-San Jose CA,San Francisco,not available in demo dataset,comcastbusiness.net,not available in demo dataset,not available in demo dataset,not available in demo dataset
1,1,1483086672,1483086672,20161230,7925884217149710796,,,Organic Search,Not Socially Engaged,1,14,11.0,301.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Windows,not available in demo dataset,False,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,desktop,Europe,Southern Europe,Macedonia (FYROM),not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,unknown.unknown,not available in demo dataset,not available in demo dataset,not available in demo dataset
2,2,1483128188,1483128188,20161230,224580975515679767,,,Organic Search,Not Socially Engaged,1,14,9.0,412.0,,,,,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Safari,not available in demo dataset,not available in demo dataset,iOS,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,mobile,Americas,Northern America,United States,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,(not set),not available in demo dataset,not available in demo dataset,not available in demo dataset
3,1,1483126904,1483126904,20161230,1712564087609515628,,,Organic Search,Not Socially Engaged,1,14,10.0,741.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,,,not available in demo dataset,,,,,,,Chrome,not available in demo dataset,not available in demo dataset,Android,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,mobile,Americas,Northern America,United States,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,mesh.net,not available in demo dataset,not available in demo dataset,not available in demo dataset
4,1,1483145960,1483145960,20161230,7139371055947807427,,,Paid Search,Not Socially Engaged,1,17,15.0,320.0,,,,1.0,,,,,,,(not set),(direct),(none),,,,,,,,,1.0,Top,not available in demo dataset,CjwKEAiAqJjDBRCG5KK6hq_juDwSJABRm03hIwoLAVmWAa...,,Google Search,,False,,Chrome,not available in demo dataset,not available in demo dataset,Android,not available in demo dataset,True,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,not available in demo dataset,,not available in demo dataset,not available in demo dataset,not available in demo dataset,tablet,Americas,Northern America,United States,New York,New York NY,New York,not available in demo dataset,verizon.net,not available in demo dataset,not available in demo dataset,not available in demo dataset


In [34]:
df_cols = df.columns

for cols in df_cols:
    print(cols)

visitNumber
visitId
visitStartTime
date
fullVisitorId
userId
clientId
channelGrouping
socialEngagementType
totals_visits
totals_hits
totals_pageviews
totals_timeOnSite
totals_bounces
totals_transactions
totals_transactionRevenue
totals_newVisits
totals_screenviews
totals_uniqueScreenviews
totals_timeOnScreen
totals_totalTransactionRevenue
totals_sessionQualityDim
trafficSource_referralPath
trafficSource_campaign
trafficSource_source
trafficSource_medium
trafficSource_keyword
trafficSource_adContent
trafficSource_isTrueDirect
trafficSource_campaignCode
trafficSource_adwordsClickInfo.campaignId
trafficSource_adwordsClickInfo.adGroupId
trafficSource_adwordsClickInfo.creativeId
trafficSource_adwordsClickInfo.criteriaId
trafficSource_adwordsClickInfo.page
trafficSource_adwordsClickInfo.slot
trafficSource_adwordsClickInfo.criteriaParameters
trafficSource_adwordsClickInfo.gclId
trafficSource_adwordsClickInfo.customerId
trafficSource_adwordsClickInfo.adNetworkType
trafficSource_adwordsClickInf

In [56]:
df['totals_uniqueScreenviews'].unique()

array([None], dtype=object)

In [37]:
df_sess = df.copy()
new_cols = {'visitId': 'visit_id', 'fullVisitorId': 'full_visit_id', 'visitNumber': 'visit_number', 'visitStartTime': 'visit_start_time', 'date':'visit_date',
            'channelGrouping': 'channel_group'}
df_sess.rename(columns=new_cols, inplace=True)
select_cols = ['visit_id', 'full_visit_id', 'visit_number', 'visit_start_time', 'visit_date', 'trafficSource_source', 'trafficSource_medium', 'trafficSource_campaign', 
    'trafficSource_adContent', 'trafficSource_isTrueDirect', 'totals_bounces', 'totals_hits', 'totals_newVisits', 'totals_transactionRevenue',
    'totals_transactions', 'device_browser', 'device_deviceCategory', 'device_operatingSystem', 'channel_group']
df_sess = df_sess[select_cols]
df_sess

Unnamed: 0,visit_id,full_visit_id,visit_number,visit_start_time,visit_date,trafficSource_source,trafficSource_medium,trafficSource_campaign,trafficSource_adContent,trafficSource_isTrueDirect,totals_bounces,totals_hits,totals_newVisits,totals_transactionRevenue,totals_transactions,device_browser,device_deviceCategory,device_operatingSystem,channel_group
0,1483087697,7385871189843591760,2,1483087697,20161230,(direct),(none),(not set),,,,14,,,,Chrome,desktop,Macintosh,Organic Search
1,1483086672,7925884217149710796,1,1483086672,20161230,(direct),(none),(not set),,,,14,1.0,,,Chrome,desktop,Windows,Organic Search
2,1483128188,224580975515679767,2,1483128188,20161230,(direct),(none),(not set),,,,14,,,,Safari,mobile,iOS,Organic Search
3,1483126904,1712564087609515628,1,1483126904,20161230,(direct),(none),(not set),,,,14,1.0,,,Chrome,mobile,Android,Organic Search
4,1483145960,7139371055947807427,1,1483145960,20161230,(direct),(none),(not set),,,,17,1.0,,,Chrome,tablet,Android,Paid Search
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79119,1481534615,6439926865693529882,1,1481534615,20161212,(direct),(none),(not set),,,,45,1.0,,,Safari,mobile,iOS,Paid Search
79120,1481532768,4601438455383424117,3,1481532768,20161212,(direct),(none),(not set),,True,,45,,,,Internet Explorer,mobile,Windows,Organic Search
79121,1481566509,2968685844689158816,1,1481566509,20161212,(direct),(none),(not set),,,,56,1.0,,,Chrome,mobile,Android,Organic Search
79122,1481540012,0071895336593296958,1,1481540012,20161212,youtube.com,referral,(not set),,,,74,1.0,,,Chrome,mobile,Android,Social


In [54]:
from datetime import datetime
df_sess['visit_start_time'] = pd.to_datetime(df_sess['visit_start_time'], unit='s')

print(df_sess.dtypes)

visit_id                              Int64
full_visit_id                        object
visit_number                          Int64
visit_start_time              datetime64[s]
visit_date                           object
trafficSource_source                 object
trafficSource_medium                 object
trafficSource_campaign               object
trafficSource_adContent              object
trafficSource_isTrueDirect           object
totals_bounces                      float64
totals_hits                           int64
totals_newVisits                    float64
totals_transactionRevenue           float64
totals_transactions                 float64
device_browser                       object
device_deviceCategory                object
device_operatingSystem               object
channel_group                        object
dtype: object


In [57]:
df_sess.to_sql("session", postgres_engine, if_exists='replace', index=False)
print("Completed exporting to sql server")

Completed exporting to sql server


In [58]:
query = """
select *
from "session"
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df

Unnamed: 0,visit_id,full_visit_id,visit_number,visit_start_time,visit_date,trafficSource_source,trafficSource_medium,trafficSource_campaign,trafficSource_adContent,trafficSource_isTrueDirect,totals_bounces,totals_hits,totals_newVisits,totals_transactionRevenue,totals_transactions,device_browser,device_deviceCategory,device_operatingSystem,channel_group
0,1483087697,7385871189843591760,2,2016-12-30 08:48:17,20161230,(direct),(none),(not set),,,,14,,,,Chrome,desktop,Macintosh,Organic Search
1,1483086672,7925884217149710796,1,2016-12-30 08:31:12,20161230,(direct),(none),(not set),,,,14,1.0,,,Chrome,desktop,Windows,Organic Search
2,1483128188,224580975515679767,2,2016-12-30 20:03:08,20161230,(direct),(none),(not set),,,,14,,,,Safari,mobile,iOS,Organic Search
3,1483126904,1712564087609515628,1,2016-12-30 19:41:44,20161230,(direct),(none),(not set),,,,14,1.0,,,Chrome,mobile,Android,Organic Search
4,1483145960,7139371055947807427,1,2016-12-31 00:59:20,20161230,(direct),(none),(not set),,,,17,1.0,,,Chrome,tablet,Android,Paid Search
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79119,1481534615,6439926865693529882,1,2016-12-12 09:23:35,20161212,(direct),(none),(not set),,,,45,1.0,,,Safari,mobile,iOS,Paid Search
79120,1481532768,4601438455383424117,3,2016-12-12 08:52:48,20161212,(direct),(none),(not set),,True,,45,,,,Internet Explorer,mobile,Windows,Organic Search
79121,1481566509,2968685844689158816,1,2016-12-12 18:15:09,20161212,(direct),(none),(not set),,,,56,1.0,,,Chrome,mobile,Android,Organic Search
79122,1481540012,0071895336593296958,1,2016-12-12 10:53:32,20161212,youtube.com,referral,(not set),,,,74,1.0,,,Chrome,mobile,Android,Social


In [86]:
# Daily user count, daily visit number, average daily visit number

query = """
with 
temp_01 as (
	select to_char(date_trunc('day', visit_start_time), 'yyyy-mm-dd') as d_day
    	, count(distinct visit_id) as daily_user_cnt
    	, count(visit_number) as daily_visit_number 
	from session group by to_char(date_trunc('day', visit_start_time), 'yyyy-mm-dd')
)
select * 
	, 1.0*daily_visit_number/daily_user_cnt as avg_daily_visit_number
from temp_01
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,d_day,daily_user_cnt,daily_visit_number,avg_daily_visit_number
0,2016-12-01,2968,3045,1.025943
1,2016-12-02,3782,3866,1.02221
2,2016-12-03,3152,3210,1.018401
3,2016-12-04,2919,2980,1.020898
4,2016-12-05,3994,4096,1.025538
5,2016-12-06,3444,3517,1.021196
6,2016-12-07,2730,2786,1.020513
7,2016-12-08,2970,3043,1.024579
8,2016-12-09,2926,2983,1.019481
9,2016-12-10,2224,2266,1.018885


In [88]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(
    x=df['d_day'],
    y=df['daily_user_cnt'],
    name='daily user count'), secondary_y=False)
fig.add_trace(go.Scatter(
    x=df['d_day'],
    y=df['daily_visit_number'],
    name='daily visit number'), secondary_y=False)
fig.add_trace(go.Scatter(
    x=df['d_day'],
    y=df['avg_daily_visit_number'],
    name='avg daily visit number'), secondary_y=True)



fig.update_yaxes(range=(1.0, 1.2),  secondary_y=True)
fig.update_xaxes(type='category')

fig.show()

In [79]:
# DAU (Daily Active Users)

query = """
select date_trunc('day', visit_start_time)::date as d_day, count(distinct visit_id) as dail_user_cnt
from session 
group by date_trunc('day', visit_start_time)::date
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df

Unnamed: 0,d_day,dail_user_cnt
0,2016-12-01,2968
1,2016-12-02,3782
2,2016-12-03,3152
3,2016-12-04,2919
4,2016-12-05,3994
5,2016-12-06,3444
6,2016-12-07,2730
7,2016-12-08,2970
8,2016-12-09,2926
9,2016-12-10,2224


In [90]:
# WAU (Weekly Active Users)

query = """
select date_trunc('week', visit_start_time)::date as week_day, count(distinct visit_id) as user_cnt 
from session 
group by date_trunc('week', visit_start_time)::date;
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df

Unnamed: 0,week_day,user_cnt
0,2016-11-28,12821
1,2016-12-05,20373
2,2016-12-12,19516
3,2016-12-19,15252
4,2016-12-26,9627


In [81]:
# MAU (Monthly Active Users)

query = """
select date_trunc('month', visit_start_time)::date as month_day, count(distinct visit_id) as user_cnt 
from session 
group by date_trunc('month', visit_start_time)::date;
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df

Unnamed: 0,month_day,user_cnt
0,2016-12-01,77283
1,2017-01-01,306


In [62]:
# Generate DAU by past date, WAU by past 7 days, MAU by past 30 days on a daily basis
# Stickiness Metric (How many users out of the monthly users visit periodically?)

query = """
create table daily_dau as

with temp_00 as (
select generate_series('2016-12-01'::date , '2016-12-31'::date, '1 day'::interval)::date as curr_date
)
select b.curr_date, count(distinct visit_id) as dau
from session a
	cross join temp_00 b
where visit_start_time >= (b.curr_date - interval '1 days') and visit_start_time < b.curr_date
group by b.curr_date
;

create table daily_wau as
with temp_00 as (
select generate_series('2016-12-01'::date , '2016-12-31'::date, '1 day'::interval)::date as curr_date
)
select b.curr_date, count(distinct visit_id) as wau
from session a
	cross join temp_00 b
where visit_start_time >= (b.curr_date - interval '7 days') and visit_start_time < b.curr_date
group by b.curr_date
;

create table daily_mau as
with temp_00 as (
select generate_series('2016-12-01'::date , '2016-12-31'::date, '1 day'::interval)::date as curr_date
)
select b.curr_date, count(distinct visit_id) as mau
from session a
	cross join temp_00 b
where visit_start_time >= (b.curr_date - interval '30 days') and visit_start_time < b.curr_date
group by b.curr_date
;

create table daily_acquisitions as
select a.curr_date, a.dau, b.wau, c.mau
from daily_dau a
	join daily_wau b on a.curr_date = b.curr_date
	join daily_mau c on a.curr_date = c.curr_date
;

select *, round(100.0 * dau/mau, 2) as stickieness
	, round(avg(100.0 * dau/mau) over(), 2) as avg_stickieness
from daily_acquisitions
where curr_date between to_date('2016-12-01', 'yyyy-mm-dd') and to_date('2016-12-31', 'yyyy-mm-dd')
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df

Unnamed: 0,curr_date,dau,wau,mau,stickieness,avg_stickieness
0,2016-12-02,2968,2968,2968,100.0,12.63
1,2016-12-03,3782,6750,6750,56.03,12.63
2,2016-12-04,3152,9902,9902,31.83,12.63
3,2016-12-05,2919,12821,12821,22.77,12.63
4,2016-12-06,3994,16815,16815,23.75,12.63
5,2016-12-07,3444,20259,20259,17.0,12.63
6,2016-12-08,2730,22989,22989,11.88,12.63
7,2016-12-09,2970,22991,25959,11.44,12.63
8,2016-12-10,2926,22135,28885,10.13,12.63
9,2016-12-11,2224,21207,31109,7.15,12.63


In [63]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = go.Figure()

fig.add_trace(go.Scatter(x=df['curr_date'], y=df['stickieness'], name='daily stickieness'))
fig.add_trace(go.Scatter(x=df['curr_date'], y=df['avg_stickieness'],  name='avg stickieness'))
fig.show()

In [11]:
query = """
with temp_01 as (
	select visit_id, date_trunc('week', visit_start_time)::date as week, count(*) as weekly_user_cnt  
	from session  		
	group by visit_id, date_trunc('week', visit_start_time)::date 
), 
temp_02 as ( 
	select week
		,case when weekly_user_cnt = 1 then '0_only_first_session'
		      when weekly_user_cnt between 2 and 3 then '2_between_3'
		      when weekly_user_cnt between 4 and 8 then '4_between_8'
		      when weekly_user_cnt between 9 and 14 then '9_between_14'
		      when weekly_user_cnt between 15 and 25 then '15_between_25'
		      when weekly_user_cnt >= 26 then 'over_26' end as range
		, count(*) as user_cnt 
	from temp_01 
	group by week, 
			 case when weekly_user_cnt = 1 then '0_only_first_session'
			      when weekly_user_cnt between 2 and 3 then '2_between_3'
			      when weekly_user_cnt between 4 and 8 then '4_between_8'
			      when weekly_user_cnt between 9 and 14 then '9_between_14'
			      when weekly_user_cnt between 15 and 25 then '15_between_25'
			      when weekly_user_cnt >= 26 then 'over_26' end
)
select week, 
	sum(case when range='0_only_first_session' then user_cnt else 0 end) as "0_only_first_session"
	,sum(case when range='2_between_3' then user_cnt else 0 end) as "2_between_3"
	,sum(case when range='4_between_8' then user_cnt else 0 end) as "4_between_8"
	,sum(case when range='9_between_14' then user_cnt else 0 end) as "9_between_14"
	,sum(case when range='15_between_25' then user_cnt else 0 end) as "15_between_25"
	,sum(case when range='over_26' then user_cnt else 0 end) as "over_26"
from temp_02 
group by week order by 1
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,week,0_only_first_session,2_between_3,4_between_8,9_between_14,15_between_25,over_26
0,2016-11-28,12547.0,274.0,0.0,0.0,0.0,0.0
1,2016-12-05,19944.0,427.0,2.0,0.0,0.0,0.0
2,2016-12-12,19090.0,425.0,1.0,0.0,0.0,0.0
3,2016-12-19,14994.0,258.0,0.0,0.0,0.0,0.0
4,2016-12-26,9513.0,114.0,0.0,0.0,0.0,0.0
