In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly import graph_objects as go

# E-Commerce Conversion Funnel

In [2]:
import os
import re

In [3]:
def extract_file_name(path):
    name_pattern = r"(.*)_page_table\.csv$"
    return re.findall(name_pattern, path)[0]

In [4]:
name_pattern = r"(.*)\_page\_table\.csv$"
re.findall(name_pattern, "payment_confirmation_table.csv")

[]

In [9]:
extract_file_name("payment_confirmation_table.csv")

IndexError: list index out of range

In [5]:
files = {}
for file in os.listdir("data/"):
    
    files[] = 

SyntaxError: invalid syntax (<ipython-input-5-c4fac51aa8f4>, line 4)

In [6]:
home_page = pd.read_csv("data/home_page_table.csv")
search_page = pd.read_csv("data/search_page_table.csv")
payment_page = pd.read_csv("data/payment_page_table.csv")
payment_confirmation = pd.read_csv("data/payment_confirmation_table.csv")
user = pd.read_csv("data/user_table.csv")

In [7]:
home_page.head()

Unnamed: 0,user_id,page
0,313593,home_page
1,468315,home_page
2,264005,home_page
3,290784,home_page
4,639104,home_page


In [8]:
user.head()

Unnamed: 0,user_id,date,device,sex
0,450007,2015-02-28,Desktop,Female
1,756838,2015-01-13,Desktop,Male
2,568983,2015-04-09,Desktop,Male
3,190794,2015-02-18,Desktop,Female
4,537909,2015-01-15,Desktop,Male


In [9]:
user.dtypes

user_id     int64
date       object
device     object
sex        object
dtype: object

In [11]:
user["date"] = pd.to_datetime(user["date"])

## Data prep

For modelling a funnel, we need a dataset that looks like:

| user_id | date | device | sex | home_page | search_page | payment_page | payment_confirmation |
|---------|------|--------|-----|-----------|-------------|--------------|----------------------|
| 13661 | ... | ... | ... | 1 | 1 | 0 | 0 |


In [12]:
df = (user
      .merge(home_page, how="inner", on="user_id")
      .rename(columns={"page": "home_page"})
      .merge(search_page, how="left", on="user_id")
      .rename(columns={"page": "search_page"})
      .merge(payment_page, how="left", on="user_id")
      .rename(columns={"page": "payment_page"})
      .merge(payment_confirmation, how="left", on="user_id")
      .rename(columns={"page": "payment_confirmation"})).fillna(0)
 

In [13]:
df

Unnamed: 0,user_id,date,device,sex,home_page,search_page,payment_page,payment_confirmation
0,450007,2015-02-28,Desktop,Female,home_page,0,0,0
1,756838,2015-01-13,Desktop,Male,home_page,0,0,0
2,568983,2015-04-09,Desktop,Male,home_page,search_page,0,0
3,190794,2015-02-18,Desktop,Female,home_page,search_page,0,0
4,537909,2015-01-15,Desktop,Male,home_page,0,0,0
...,...,...,...,...,...,...,...,...
90395,307667,2015-03-30,Desktop,Female,home_page,0,0,0
90396,642989,2015-02-08,Desktop,Female,home_page,search_page,0,0
90397,659645,2015-04-13,Desktop,Male,home_page,search_page,0,0
90398,359779,2015-03-23,Desktop,Male,home_page,0,0,0


In [14]:
df[["home_page", "search_page", "payment_page", "payment_confirmation"]] = np.where(df[["home_page", "search_page", "payment_page", "payment_confirmation"]] == 0, 0, 1)

In [15]:
df.head()

Unnamed: 0,user_id,date,device,sex,home_page,search_page,payment_page,payment_confirmation
0,450007,2015-02-28,Desktop,Female,1,0,0,0
1,756838,2015-01-13,Desktop,Male,1,0,0,0
2,568983,2015-04-09,Desktop,Male,1,1,0,0
3,190794,2015-02-18,Desktop,Female,1,1,0,0
4,537909,2015-01-15,Desktop,Male,1,0,0,0


# Funnel

In [17]:
df[["home_page", "search_page", "payment_page", "payment_confirmation"]].sum()

home_page               90400
search_page             45200
payment_page             6030
payment_confirmation      452
dtype: int64

In [18]:
pages = ["home_page", "search_page", "payment_page", "payment_confirmation"]

In [23]:
fig = go.Figure(go.Funnel(y=pages,
                          x=df[pages].sum().values,
                          textinfo="value+percent previous"))
fig.show()

## Split by device

In [27]:
df_by_device = df.groupby("device", as_index=False).agg({"home_page": "sum",
                                                         "search_page": "sum",
                                                         "payment_page": "sum",
                                                         "payment_confirmation": "sum"})

In [28]:
df_by_device

Unnamed: 0,device,home_page,search_page,payment_page,payment_confirmation
0,Desktop,60200,30100,3010,150
1,Mobile,30200,15100,3020,302


In [48]:
fig = go.Figure()

for device in ["Desktop", "Mobile"]:
    fig.add_trace(go.Funnel(name=device,
                            y=pages,
                            x=df_by_device.loc[df_by_device["device"] == device, pages].sum().values,
                            textinfo="value+percent previous"))
    
fig.show()

# Visits over time

In [40]:
df.dtypes

user_id                          int64
date                    datetime64[ns]
device                          object
sex                             object
home_page                        int64
search_page                      int64
payment_page                     int64
payment_confirmation             int64
dtype: object

### Visitors on home_page over time

In [41]:
fig = px.line((df
               .groupby("date")
               .agg({"home_page": "sum"})))

fig.show()

### Visitors for all pages

In [45]:
(df
 .groupby("date", as_index=False)
 .agg({"home_page": "sum",
       "search_page": "sum",
       "payment_page": "sum",
       "payment_confirmation": "sum"}))

Unnamed: 0,date,home_page,search_page,payment_page,payment_confirmation
0,2015-01-01,712,436,76,6
1,2015-01-02,721,447,84,5
2,2015-01-03,760,422,64,7
3,2015-01-04,713,447,76,7
4,2015-01-05,754,462,88,11
...,...,...,...,...,...
115,2015-04-26,792,307,25,1
116,2015-04-27,779,311,26,1
117,2015-04-28,736,304,25,1
118,2015-04-29,713,271,10,0


In [44]:
(df
 .groupby("date", as_index=False)
 .agg({"home_page": "sum",
       "search_page": "sum",
       "payment_page": "sum",
       "payment_confirmation": "sum"})
 .melt(id_vars="date", value_vars=pages))

Unnamed: 0,date,variable,value
0,2015-01-01,home_page,712
1,2015-01-02,home_page,721
2,2015-01-03,home_page,760
3,2015-01-04,home_page,713
4,2015-01-05,home_page,754
...,...,...,...
475,2015-04-26,payment_confirmation,1
476,2015-04-27,payment_confirmation,1
477,2015-04-28,payment_confirmation,1
478,2015-04-29,payment_confirmation,0


In [47]:
fig = px.line((df
               .groupby("date", as_index=False)
               .agg({"home_page": "sum",
                     "search_page": "sum",
                     "payment_page": "sum",
                     "payment_confirmation": "sum"})
               .melt(id_vars="date", value_vars=pages)),
              x="date",
              y="value",
              color="variable")

fig.show()