In [196]:
import pandas as pd
import numpy as np
import json

## DATA

In [197]:
session_data_raw = pd.read_csv(filepath_or_buffer="data/Session_data.csv")
signup_data_raw = pd.read_csv(filepath_or_buffer="data/Signup_data.csv")

with open("data/ltv.json", "r") as json_file:
    ltv_data = json.load(json_file)

### Initial look at data

In [198]:
session_data_raw[:10]

Unnamed: 0,Session ID,User ID,Market,Sign-up,Experiment ID,Variant ID
0,7BIO49O640BIASIBALZJ,004YOVNZC1NW68QLXU,Norway,False,Experiment032,Control
1,WLD3L4N54AUGPRUHUJG9,007TH913EDSVIDR248,Finland,False,Experiment037,Variant 1
2,3ALVA866B3LL9RNDNLCY,00B1YBLAAG6DEIFCOH,Denmark,False,,
3,H638ZR9TECATB7QRTP1O,00L73RO9N9YQW4TLFG,Denmark,True,Experiment037,Variant 1
4,WNKHTTGTYZVBPJO2KT07,00QYV1X2MN5MBBKTZ9,Norway,False,Experiment037,Control
5,KLHKJ2O7AF207MRNU549,00RDZ0X9037G65ARSC,Sweden,False,Experiment037,Variant 2
6,JYBY694NAPC8FYM7HU7D,00U5F18PAZLFL54IZM,Sweden,False,Experiment037,Variant 1
7,X9EO00FDBDUO888MMY56,00V1ZT63REHRX5LOC8,Norway,True,Experiment037,Variant 2
8,CA10G00GLY79NUJG2M5G,00VRKG3QBA3K69CEJQ,Norway,False,Experiment037,Variant 1
9,CHIRLT84GOA0I4ECW13I,00X9DQBSH6ADZSURQ2,Sweden,False,Experiment037,Variant 1


In [199]:
signup_data_raw[:10]

Unnamed: 0,Session ID,Sign-up
0,0HYQ2HO0GC2XCTOY3NGQ,advanced
1,7X5AHWLLOSZ2VUT2DG11,advanced
2,I0ZS1QTW56E7KC6KPR3J,advanced
3,S2G1I1F2QPW4CUZ9VLMK,advanced
4,BK1RMVLBGZZXLPWI5OME,advanced
5,G90BYI0AWJYCFFBRWBLV,advanced
6,R46RPPGNY5VP2PLQ0YMH,advanced
7,M6EN0WNDSD3S44IMNJ4W,advanced
8,BP4FUZS1HNXZ45BYBA9L,advanced
9,WL2O2UONPOD0W2IGDEEK,advanced


In [200]:
ltv_data

{'dropout': 0, 'free': 3120, 'basic': 5280, 'advanced': 21800}

### Data Transformations

In [201]:
# Filtering off irrelevant experiments
session_data = session_data_raw[session_data_raw['Experiment ID'] == "Experiment037"]

In [202]:
# Joining two tables
all_data = pd.merge(
    left = session_data,
    right = signup_data_raw,
    how = 'left',
    on = 'Session ID',
    suffixes=('_session', '_signup')
)

all_data.rename(columns={'Sign-up_session': 'Sign-up', 'Sign-up_signup': 'Subscription'}, inplace = True)
all_data['Subscription'].fillna("dropout", inplace=True)

market_mappings = {
    "Denmark": "Denmark",
    "Sweden": "Sweden",
    "Finland": "Finland",
    "Norway": "Norway",
    "Suomi": "Finland",
    "Sverige": "Sweden",
    "swe": "Sweden",
    "danmark": "Denmark",
    "denmark": "Denmark"
}

for key in market_mappings.keys():
    all_data['Market'] = all_data['Market'].replace(key, market_mappings[key])

grouped_data = all_data.groupby(by = ['Market', 'Variant ID', 'Sign-up', 'Subscription']) \
                       .agg(session_count = ('Session ID', 'nunique')) \
                       .sort_values(by = 'session_count', ascending = False) \
                       .reset_index()

grouped_data

Unnamed: 0,Market,Variant ID,Sign-up,Subscription,session_count
0,Denmark,Control,False,dropout,2288
1,Denmark,Variant 2,False,dropout,2216
2,Denmark,Variant 1,False,dropout,2185
3,Sweden,Variant 2,False,dropout,2051
4,Sweden,Control,False,dropout,2045
5,Sweden,Variant 1,False,dropout,1992
6,Finland,Control,False,dropout,1569
7,Finland,Variant 2,False,dropout,1411
8,Finland,Variant 1,False,dropout,1401
9,Norway,Control,False,dropout,1300


### Data Exploration

In [203]:
all_data \
    .groupby(by = 'Variant ID') \
    .agg(
        session_count=('Session ID', 'nunique')
    ) \
    .sort_values(by = 'session_count', ascending = False) \
    .reset_index()

Unnamed: 0,Variant ID,session_count
0,Control,8434
1,Variant 2,8050
2,Variant 1,7988


### Data Analysis

In [204]:
# Set the markets filter
markets = [
    'Finland', 
    'Norway', 
    'Sweden', 
    'Denmark'
]

### Signups and dropouts!

In [205]:
# Total sessions broken into Variants
total_sessions = grouped_data[grouped_data['Market'].isin(markets)] \
                .groupby(by = 'Variant ID') \
                .agg(total_sessions = ('session_count', 'sum')) \
                .sort_values(by = 'total_sessions', ascending = False) \
                .reset_index()

# Total dropouts broken into Variants
dropouts = grouped_data[(grouped_data['Market'].isin(markets)) & (grouped_data['Sign-up'] == False)] \
                .groupby(by = 'Variant ID') \
                .agg(dropout_count = ('session_count', 'sum')) \
                .sort_values(by = 'dropout_count', ascending = False) \
                .reset_index()

# Total signups broken into Variants
signups = grouped_data[(grouped_data['Market'].isin(markets)) & (grouped_data['Sign-up'] == True)] \
                .groupby(by = 'Variant ID') \
                .agg(signup_count = ('session_count', 'sum')) \
                .sort_values(by = 'signup_count', ascending = False) \
                .reset_index()

signups_all = pd.merge(
        left = pd.merge(
                left = total_sessions, 
                right = signups, 
                how = 'inner', 
                on = 'Variant ID'
        ),
        right = dropouts,
        how = 'inner',
        on = 'Variant ID'
)

signups_all['signup_rate'] = ( signups_all['signup_count'] * 100 ) / signups_all['total_sessions']
signups_all['dropout_rate'] = ( signups_all['dropout_count'] * 100 ) / signups_all['total_sessions']
signups_all.sort_values(by = 'dropout_rate')

Unnamed: 0,Variant ID,total_sessions,signup_count,dropout_count,signup_rate,dropout_rate
2,Variant 1,7988,1180,6808,14.772158,85.227842
1,Variant 2,8050,1179,6871,14.645963,85.354037
0,Control,8434,1232,7202,14.607541,85.392459


### Subscriptions

In [206]:
# Set the subscription_type filter
subscription_type = [
    'free',
    'basic',
    'advanced'
]

In [213]:
signed_up_sessions = grouped_data[(grouped_data['Market'].isin(markets)) & (grouped_data['Subscription'].isin(subscription_type))]

signed_up_sessions

Unnamed: 0,Market,Variant ID,Sign-up,Subscription,session_count
12,Denmark,Variant 1,True,free,256
13,Sweden,Control,True,free,254
14,Sweden,Variant 1,True,free,244
15,Denmark,Control,True,free,241
16,Denmark,Variant 2,True,free,238
17,Sweden,Variant 2,True,free,238
18,Finland,Control,True,free,166
19,Norway,Control,True,free,158
20,Finland,Variant 2,True,free,158
21,Finland,Variant 1,True,free,154
