In [1]:
import pandas as pd
import pandas_gbq
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

In [4]:
query= """with initial_dataset as (
  select
    event_name,
    optional_label,
    event_timestamp,
    event_date,
    user_id,
    user_pseudo_id,
    ga_session_id,
    user_history_event_number,
    session_event_number,
    platform,
    device_language,
    country
  from
    analytics_151430920.firebase_facts_events as eve
  where
    eve.event_timestamp >= '2020-11-01'
  and
    eve.event_name in ('sv__OnBoardingTutorialView','sv__WelcomeCarouselDialog','sv__NewPremiumAct','TryToBuyNewPAct','TryToBuyNewPF','ClickOnWholeView','StartListening','sv__StoryDetails','StartNStory','StartBekids','FabClickedPremium','PremiumBarClickedSD','PremiumBarClickedMain','AAPageA','ProPageA','GoldPageA','AAPageF','ProPageF','GoldPageF','EnterFcMore','sv__GlossaryF','LibraryClicked','sv__Libraries','PlayPrevParagraphFromButton','PlayNextParagraphFromButton','in_app_purchase','PurchaseNormal')
),   
premium_promo_shown as (
  select distinct
    user_pseudo_id,
    first_value(event_timestamp) over (partition by user_pseudo_id order by event_timestamp) as first_premium_promo_shown_timestamp
  from
    initial_dataset
  where
    event_name = 'sv__WelcomeCarouselDialog'
),
purchases as (
  select distinct
    'in_app_purchase' as event_name,
    first_value(event_timestamp) over (partition by user_pseudo_id order by event_timestamp) as first_purchase_timestamp,
    first_value(event_date) over (partition by user_pseudo_id order by event_timestamp) as first_purchase_date,
    first_value(optional_label) over (partition by user_pseudo_id order by event_timestamp) as optional_label,
    last_value(user_id) over (partition by user_pseudo_id order by event_timestamp) as user_id,
    user_pseudo_id,
    first_value(ga_session_id) over (partition by user_pseudo_id order by event_timestamp) as ga_session_id,
    first_value(user_history_event_number) over (partition by user_pseudo_id order by event_timestamp) as user_history_event_number,
    first_value(session_event_number) over (partition by user_pseudo_id order by event_timestamp) as session_event_number,
    first_value(platform) over (partition by user_pseudo_id order by event_timestamp) as platform,
    first_value(device_language) over (partition by user_pseudo_id order by event_timestamp) as device_language,
    first_value(country) over (partition by user_pseudo_id order by event_timestamp) as country
  from
    initial_dataset
  where
    event_name in ('in_app_purchase','PurchaseNormal')
),
relevant_purchases as (
  select
    pu.event_name,
    pu.first_purchase_timestamp,
    pu.first_purchase_date,
    ps.first_premium_promo_shown_timestamp,
    pu.optional_label,
    pu.user_id,
    pu.user_pseudo_id,
    pu.ga_session_id,
    pu.user_history_event_number,
    pu.session_event_number,
    pu.platform,
    pu.device_language,
    pu.country
  from
    purchases as pu
  join
    premium_promo_shown as ps on ps.user_pseudo_id = pu.user_pseudo_id
  where
    pu.first_purchase_timestamp > timestamp_add(ps.first_premium_promo_shown_timestamp, interval 30 minute)
),
relevant_events as (
  select
    ini.event_name,
    ini.event_timestamp,
    ini.event_date,
    ini.optional_label,
    ini.user_id,
    ini.user_pseudo_id,
    ini.ga_session_id,
    ini.user_history_event_number,
    ini.session_event_number,
    ini.platform,
    ini.device_language,
    ini.country
  from
    initial_dataset as ini
  left join
    premium_promo_shown as ps on ps.user_pseudo_id = ini.user_pseudo_id
  where
    event_name != 'in_app_purchase'
  and
    ini.event_timestamp between ps.first_premium_promo_shown_timestamp and timestamp_add(first_premium_promo_shown_timestamp, interval 7 day)
  union all
  select
    rep.event_name,
    rep.first_purchase_timestamp as event_timestamp,
    rep.first_purchase_date as event_date,
    rep.optional_label,
    rep.user_id,
    rep.user_pseudo_id,
    rep.ga_session_id,
    rep.user_history_event_number,
    rep.session_event_number,
    rep.platform,
    rep.device_language,
    rep.country
  from
    relevant_purchases as rep
  where
    rep.first_purchase_timestamp between rep.first_premium_promo_shown_timestamp and timestamp_add(rep.first_premium_promo_shown_timestamp, interval 7 day)
)
select
  user_pseudo_id,
  event_name,
  count(event_timestamp) as occurrences_count,
  min(event_timestamp) as first_event_timestamp,
  max(event_timestamp) as last_event_timestamp
from
  relevant_events
group by 1,2;"""
df = pandas_gbq.read_gbq(query, project_id= 'beelinguapp')

In [81]:
query="""with initial_dataset as (
  select
    case 
      when event_name like 'sv__OnBoardingTutorialVi%' then 'sv__OnBoardingTutorialView'
      else event_name
    end as event_name,
    optional_label,
    event_timestamp,
    event_date,
    user_id,
    user_pseudo_id,
    ga_session_id,
    user_history_event_number,
    session_event_number,
    platform,
    device_language,
    country
  from
    analytics_151430920.firebase_facts_events as eve
  where
    eve.event_timestamp >= '2020-11-01'
  and
    eve.event_name in ('sv__OnBoardingTutorialVi','sv__WelcomeCarouselDialog','sv__NewPremiumAct','TryToBuyNewPAct','TryToBuyNewPF','ClickOnWholeView','StartListening','sv__StoryDetails','StartNStory','StartBekids','FabClickedPremium','PremiumBarClickedSD','PremiumBarClickedMain','sv__AAPageA','sv__ProPageA','sv_GoldPageA','sv__AAPageF','sv__ProPageF','sv__GoldPageF','EnterFcMore','sv__GlossaryF','LibraryClicked','sv__Libraries','PlayPrevParagraph','PlayNextParagraphFromBut','sv__OnBoardingTutorialVie',"in_app_purchase","PurchaseNormal")
),   
premium_promo_shown as (
  select distinct
    user_pseudo_id,
    first_value(event_timestamp) over (partition by user_pseudo_id order by event_timestamp) as first_premium_promo_shown_timestamp
  from
    initial_dataset
  where
    event_name = 'sv__WelcomeCarouselDialog'
),
purchases as (
  select distinct
    'in_app_purchase' as event_name,
    first_value(event_timestamp) over (partition by user_pseudo_id order by event_timestamp) as first_purchase_timestamp,
    first_value(event_date) over (partition by user_pseudo_id order by event_timestamp) as first_purchase_date,
    first_value(optional_label) over (partition by user_pseudo_id order by event_timestamp) as optional_label,
    last_value(user_id) over (partition by user_pseudo_id order by event_timestamp) as user_id,
    user_pseudo_id,
    first_value(ga_session_id) over (partition by user_pseudo_id order by event_timestamp) as ga_session_id,
    first_value(user_history_event_number) over (partition by user_pseudo_id order by event_timestamp) as user_history_event_number,
    first_value(session_event_number) over (partition by user_pseudo_id order by event_timestamp) as session_event_number,
    first_value(platform) over (partition by user_pseudo_id order by event_timestamp) as platform,
    first_value(device_language) over (partition by user_pseudo_id order by event_timestamp) as device_language,
    first_value(country) over (partition by user_pseudo_id order by event_timestamp) as country
  from
    initial_dataset
  where
    event_name in ('in_app_purchase','PurchaseNormal')
),
relevant_purchases as (
  select
    pu.event_name,
    pu.first_purchase_timestamp,
    pu.first_purchase_date,
    ps.first_premium_promo_shown_timestamp,
    pu.optional_label,
    pu.user_id,
    pu.user_pseudo_id,
    pu.ga_session_id,
    pu.user_history_event_number,
    pu.session_event_number,
    pu.platform,
    pu.device_language,
    pu.country
  from
    purchases as pu
  join
    premium_promo_shown as ps on ps.user_pseudo_id = pu.user_pseudo_id
  where
    pu.first_purchase_timestamp > timestamp_add(ps.first_premium_promo_shown_timestamp, interval 30 minute)
),
relevant_events as (
  select
    ini.event_name,
    ini.event_timestamp,
    ini.event_date,
    ini.optional_label,
    ini.user_id,
    ini.user_pseudo_id,
    ini.ga_session_id,
    ini.user_history_event_number,
    ini.session_event_number,
    ini.platform,
    ini.device_language,
    ini.country
  from
    initial_dataset as ini
  left join
    premium_promo_shown as ps on ps.user_pseudo_id = ini.user_pseudo_id
  where
    event_name != 'in_app_purchase'
  and
    ini.event_timestamp between ps.first_premium_promo_shown_timestamp and timestamp_add(first_premium_promo_shown_timestamp, interval 7 day)
  union all
  select
    rep.event_name,
    rep.first_purchase_timestamp as event_timestamp,
    rep.first_purchase_date as event_date,
    rep.optional_label,
    rep.user_id,
    rep.user_pseudo_id,
    rep.ga_session_id,
    rep.user_history_event_number,
    rep.session_event_number,
    rep.platform,
    rep.device_language,
    rep.country
  from
    relevant_purchases as rep
  where
    rep.first_purchase_timestamp between rep.first_premium_promo_shown_timestamp and timestamp_add(rep.first_premium_promo_shown_timestamp, interval 7 day)
)
select
  user_pseudo_id,
  event_name,
  count(event_timestamp) as occurrences_count,
  min(event_timestamp) as first_event_timestamp,
  max(event_timestamp) as last_event_timestamp
from
  relevant_events
group by 1,2;"""
df2 = pandas_gbq.read_gbq(query, project_id= 'beelinguapp')

In [None]:
df2.to_csv("second_query.csv")

In [84]:
df2.user_pseudo_id.nunique()

59628

In [83]:
df2[df2.event_name=="in_app_purchase"]

Unnamed: 0,user_pseudo_id,event_name,occurrences_count,first_event_timestamp,last_event_timestamp
100,2097f09dfd14dcfb44a424343ce254cf,in_app_purchase,1,2020-11-06 15:11:16.406000+00:00,2020-11-06 15:11:16.406000+00:00
199,e4a8071d250bd9d2e8a6c0dd98228928,in_app_purchase,1,2020-11-27 18:46:31.134000+00:00,2020-11-27 18:46:31.134000+00:00
200,9bd5e4cf5be1e4072d27c671a73c693a,in_app_purchase,1,2020-11-21 23:20:28.867000+00:00,2020-11-21 23:20:28.867000+00:00
384,9d63b45e83fc7300e564eb9d12b6be7e,in_app_purchase,1,2020-12-02 15:46:02.262000+00:00,2020-12-02 15:46:02.262000+00:00
385,d59e6f54e906069b8634bbb9eada0e69,in_app_purchase,1,2020-11-22 14:24:57.813000+00:00,2020-11-22 14:24:57.813000+00:00
477,b064d3daecf3cb7767616465cc2d8df7,in_app_purchase,1,2020-12-01 23:37:16.759000+00:00,2020-12-01 23:37:16.759000+00:00
478,fcaa2d100e944d8a8461cbc922e657db,in_app_purchase,1,2020-11-17 23:28:48.446000+00:00,2020-11-17 23:28:48.446000+00:00
576,bcc733d2fc30ee68b6358825d6c73d58,in_app_purchase,1,2020-11-21 13:23:59.476000+00:00,2020-11-21 13:23:59.476000+00:00
665,d5a65c207fcd05209ea248b8277c4ae1,in_app_purchase,1,2020-12-03 18:25:44.857000+00:00,2020-12-03 18:25:44.857000+00:00
666,d8d039ff29d649b54abde4faaccca595,in_app_purchase,1,2020-11-08 01:46:45.085000+00:00,2020-11-08 01:46:45.085000+00:00


In [13]:
df = pd.read_csv("first_query.csv",index_col=0)

In [14]:
df.head()

Unnamed: 0,user_pseudo_id,event_name,occurrences_count,first_event_timestamp,last_event_timestamp
0,5d0bf23c4d314d6dd54446fe1da2a275,TryToBuyNewPF,1,2020-11-27 10:07:46.803015+00:00,2020-11-27 10:07:46.803015+00:00
1,d9c82813e1d53bfa7f056f9258bc7144,TryToBuyNewPF,6,2020-11-22 18:57:37.797000+00:00,2020-11-27 18:11:58.344024+00:00
2,1744eaa784da30ef774663b49fe9a051,EnterFcMore,1,2020-11-27 02:24:51.477000+00:00,2020-11-27 02:24:51.477000+00:00
3,3c33ac7d0736acc0141260a319a2b434,EnterFcMore,1,2020-11-28 18:55:12.668000+00:00,2020-11-28 18:55:12.668000+00:00
4,adaf7c7783fb0e5f8c2a8337bff3bad5,TryToBuyNewPF,1,2020-11-28 18:38:26.389028+00:00,2020-11-28 18:38:26.389028+00:00


In [18]:
df.user_pseudo_id.nunique()

59628

In [71]:
df["event_name"].value_counts()

sv__WelcomeCarouselDialog    59628
sv__Libraries                55143
sv__StoryDetails             45163
ClickOnWholeView             44735
StartListening               39521
StartNStory                  34345
LibraryClicked               18947
sv__NewPremiumAct            15360
PremiumBarClickedMain         8566
sv__GlossaryF                 4306
PremiumBarClickedSD           3745
StartBekids                   3707
FabClickedPremium             2208
TryToBuyNewPAct               1859
EnterFcMore                   1614
TryToBuyNewPF                  663
in_app_purchase                 58
Name: event_name, dtype: int64

In [51]:
df_log = df.pivot_table(index=["user_pseudo_id"],columns=["event_name"],fill_value=0).reset_index()

In [52]:
df_log.columns = df_log.columns.to_series().str.join('_')

In [53]:
df_log.head()

Unnamed: 0,user_pseudo_id_,occurrences_count_ClickOnWholeView,occurrences_count_EnterFcMore,occurrences_count_FabClickedPremium,occurrences_count_LibraryClicked,occurrences_count_PremiumBarClickedMain,occurrences_count_PremiumBarClickedSD,occurrences_count_StartBekids,occurrences_count_StartListening,occurrences_count_StartNStory,occurrences_count_TryToBuyNewPAct,occurrences_count_TryToBuyNewPF,occurrences_count_in_app_purchase,occurrences_count_sv__GlossaryF,occurrences_count_sv__Libraries,occurrences_count_sv__NewPremiumAct,occurrences_count_sv__StoryDetails,occurrences_count_sv__WelcomeCarouselDialog
0,0000892c8eff256f940b90c82d8c6d23,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
1,0000bf6a4004cd40d8f190bee72aa98e,3,0,0,0,0,0,1,3,2,0,0,0,0,3,0,5,1
2,0000d2213ba1e2c0d3d6221e8f092888,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,2
3,0002b6303b89915e39a76130ca7f2730,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
4,00039a05ee528224e4fb0c122e34828a,2,0,0,1,0,0,0,2,2,0,0,0,0,4,0,3,1


In [73]:
y = df_log['occurrences_count_in_app_purchase']
X = df_log[["occurrences_count_sv__WelcomeCarouselDialog","occurrences_count_sv__Libraries"]]
X = sm.add_constant(X)
model = sm.Logit(y, X).fit()
model.summary()

Optimization terminated successfully.
         Current function value: 0.006664
         Iterations 12


0,1,2,3
Dep. Variable:,occurrences_count_in_app_purchase,No. Observations:,59628.0
Model:,Logit,Df Residuals:,59625.0
Method:,MLE,Df Model:,2.0
Date:,"Tue, 08 Dec 2020",Pseudo R-squ.:,0.1366
Time:,20:37:08,Log-Likelihood:,-397.37
converged:,True,LL-Null:,-460.23
Covariance Type:,nonrobust,LLR p-value:,5.04e-28

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-7.3385,0.451,-16.288,0.000,-8.222,-6.455
occurrences_count_sv__WelcomeCarouselDialog,-0.1311,0.408,-0.322,0.748,-0.930,0.668
occurrences_count_sv__Libraries,0.0509,0.004,11.554,0.000,0.042,0.060
