## Drop-OFF Analysis

In [83]:
import pandas as pd

### Data Loading & Overview

In [84]:
pageviews = pd.read_csv(r"D:\Data Analysis Learning\Portfolio Projects\toy-store-ecommerce-analysis\Toy Store E-Commerce Database\website_pageviews.csv")

In [85]:
pageviews.sort_values('website_session_id').head(100)

Unnamed: 0,website_pageview_id,created_at,website_session_id,pageview_url
0,1,2012-03-19 08:04:16,1,/home
1,2,2012-03-19 08:16:49,2,/home
2,3,2012-03-19 08:26:55,3,/home
3,4,2012-03-19 08:37:33,4,/home
4,5,2012-03-19 09:00:55,5,/home
...,...,...,...,...
96,97,2012-03-19 14:14:07,54,/products
98,99,2012-03-19 14:18:16,54,/the-original-mr-fuzzy
95,96,2012-03-19 14:12:09,54,/home
97,98,2012-03-19 14:15:08,55,/home


### Drop-off Rate Calculation

#### Where do customers drop off in the purchase funnel?

In [86]:

last_page = pd.DataFrame(pageviews.groupby('website_session_id')['pageview_url'].last())

In [87]:
drop_off = pd.DataFrame(last_page['pageview_url'].value_counts())
drop_off['pageview_url'] = drop_off.index

In [88]:
drop_off.index = range(len(drop_off.index))

In [89]:
drop_off = drop_off[['pageview_url', 'count']]


In [90]:
drop_off.rename(columns={'count' : 'drop_off_count'})

Unnamed: 0,pageview_url,drop_off_count
0,/the-original-mr-fuzzy,92568
1,/lander-2,59249
2,/home,57346
3,/products,51017
4,/lander-3,39733
5,/thank-you-for-your-order,32313
6,/cart,30469
7,/lander-1,25330
8,/lander-5,25131
9,/billing-2,17748


#### How many times each page in the funnel is visited?

In [91]:
page_enterance = pd.DataFrame(pageviews.groupby('pageview_url')['website_session_id'].count())
page_enterance['pageview_url'] = page_enterance.index

In [92]:
page_enterance.index = range(len(page_enterance.index))
page_enterance = page_enterance[['pageview_url','website_session_id']]

In [93]:
page_enterance

Unnamed: 0,pageview_url,website_session_id
0,/billing,3617
1,/billing-2,48441
2,/cart,94953
3,/home,137576
4,/lander-1,47574
5,/lander-2,131170
6,/lander-3,79000
7,/lander-4,9385
8,/lander-5,68166
9,/products,261231


#### Mergin optained results to a single dataframe

In [94]:
drop_off_rate = pd.merge(page_enterance, drop_off, left_on='pageview_url', right_on='pageview_url', how='left')
drop_off_rate.columns = ['pageview_url','page_enterance_count', 'drop_off_count']
drop_off_rate

Unnamed: 0,pageview_url,page_enterance_count,drop_off_count
0,/billing,3617,1997
1,/billing-2,48441,17748
2,/cart,94953,30469
3,/home,137576,57346
4,/lander-1,47574,25330
5,/lander-2,131170,59249
6,/lander-3,79000,39733
7,/lander-4,9385,4851
8,/lander-5,68166,25131
9,/products,261231,51017


#### Calculating Drop-off Rates on page visits basis and on total sessions basis

In [95]:
drop_off_rate['drop_off_rate'] = drop_off_rate['drop_off_count'] * 100 / drop_off_rate['page_enterance_count']
drop_off_rate['drop_off_rate_of_total'] = drop_off_rate['drop_off_count'] * 100 / drop_off_rate['drop_off_count'].sum()
drop_off_rate[['drop_off_rate','drop_off_rate_of_total']] = round(drop_off_rate[['drop_off_rate','drop_off_rate_of_total']], 2)


In [96]:
drop_off_rate.sort_values(by='drop_off_rate_of_total', ascending=False)

Unnamed: 0,pageview_url,page_enterance_count,drop_off_count,drop_off_rate,drop_off_rate_of_total
15,/the-original-mr-fuzzy,162525,92568,56.96,19.58
5,/lander-2,131170,59249,45.17,12.53
3,/home,137576,57346,41.68,12.13
9,/products,261231,51017,19.53,10.79
6,/lander-3,79000,39733,50.29,8.4
11,/thank-you-for-your-order,32313,32313,100.0,6.83
2,/cart,94953,30469,32.09,6.44
4,/lander-1,47574,25330,53.24,5.36
8,/lander-5,68166,25131,36.87,5.31
1,/billing-2,48441,17748,36.64,3.75


## Converted üÜö Non-converted Sessions to Orders Behavior Comparison

#### Data Loading

In [97]:
orders = pd.read_csv(r"d:\Data Analysis Learning\Portfolio Projects\toy-store-ecommerce-analysis\Toy Store E-Commerce Database\orders.csv")

sessions = pd.read_csv(r"d:\Data Analysis Learning\Portfolio Projects\toy-store-ecommerce-analysis\Toy Store E-Commerce Database\website_sessions.csv")

#### converted_sessions table construction

In [98]:
# converted_sessions = orders[['website_session_id', 'order_id', 'created_at']]
converted_sessions= pd.merge(orders, sessions, left_on='website_session_id', right_on='website_session_id', how='left',suffixes=("_o","_w"))
converted_sessions = converted_sessions[['website_session_id', 'user_id_w', 'created_at_w', 'is_repeat_session', 'utm_source', 'utm_campaign','utm_content', 'device_type']]

In [99]:
converted_sessions = pd.merge(converted_sessions, pageviews, left_on='website_session_id', right_on='website_session_id', how='left',suffixes=("_c","_p"))

In [100]:
converted_sessions['is_converted_session'] = 1
converted_sessions =  converted_sessions[['website_session_id', 'user_id_w',
       'is_converted_session', 'created_at_w', 'is_repeat_session',
       'utm_source', 'utm_campaign', 'utm_content', 'device_type',
       'website_pageview_id', 'created_at', 'pageview_url']]


In [101]:
converted_sessions.columns

Index(['website_session_id', 'user_id_w', 'is_converted_session',
       'created_at_w', 'is_repeat_session', 'utm_source', 'utm_campaign',
       'utm_content', 'device_type', 'website_pageview_id', 'created_at',
       'pageview_url'],
      dtype='str')

#### non_converted_sessions table construction

In [102]:
non_converted_sessions= pd.merge(orders, sessions, left_on='website_session_id', right_on='website_session_id', how='right_anti',suffixes=("_n","_w"))
non_converted_sessions = non_converted_sessions[['website_session_id','user_id_w','created_at_w', 'is_repeat_session', 'utm_source', 'utm_campaign','utm_content', 'device_type']]

In [103]:
non_converted_sessions = pd.merge(non_converted_sessions, pageviews, left_on='website_session_id', right_on='website_session_id', how='left',suffixes=("_n","_p"))

In [104]:
non_converted_sessions

Unnamed: 0,website_session_id,user_id_w,created_at_w,is_repeat_session,utm_source,utm_campaign,utm_content,device_type,website_pageview_id,created_at,pageview_url
0,1,1,2012-03-19 08:04:16,0,gsearch,nonbrand,g_ad_1,mobile,1,2012-03-19 08:04:16,/home
1,2,2,2012-03-19 08:16:49,0,gsearch,nonbrand,g_ad_1,desktop,2,2012-03-19 08:16:49,/home
2,3,3,2012-03-19 08:26:55,0,gsearch,nonbrand,g_ad_1,desktop,3,2012-03-19 08:26:55,/home
3,4,4,2012-03-19 08:37:33,0,gsearch,nonbrand,g_ad_1,desktop,4,2012-03-19 08:37:33,/home
4,5,5,2012-03-19 09:00:55,0,gsearch,nonbrand,g_ad_1,mobile,5,2012-03-19 09:00:55,/home
...,...,...,...,...,...,...,...,...,...,...,...
961928,472869,394316,2015-03-19 07:55:40,0,gsearch,nonbrand,g_ad_1,mobile,1188116,2015-03-19 07:55:40,/lander-3
961929,472870,394317,2015-03-19 07:56:29,0,gsearch,nonbrand,g_ad_1,desktop,1188118,2015-03-19 07:56:29,/lander-5
961930,472870,394317,2015-03-19 07:56:29,0,gsearch,nonbrand,g_ad_1,desktop,1188119,2015-03-19 07:57:22,/products
961931,472870,394317,2015-03-19 07:56:29,0,gsearch,nonbrand,g_ad_1,desktop,1188121,2015-03-19 07:58:13,/the-original-mr-fuzzy


#### Converted vs Non-converted sessions behavior comparison

**Count Comparison**

In [116]:
# Converted Website Sessions Count
converted_sessions['website_session_id'].nunique()

32313

In [117]:
# Non-Converted Website Sessions Count
non_converted_sessions['website_session_id'].nunique()

440558

**Session Repeatation**

In [None]:
# Converted Website Sessions Repeatation Percentage
sr = converted_sessions.groupby('is_repeat_session')['website_session_id'].nunique()
sr = round(sr * 100 / (converted_sessions['website_session_id'].nunique()), 2)
sr

is_repeat_session
0    80.97
1    19.03
Name: website_session_id, dtype: float64

In [None]:
# Non-Converted Website Sessions Repeatation Percentage
nsr =  non_converted_sessions.groupby('is_repeat_session')['website_session_id'].nunique()
nsr = round(nsr * 100 / (non_converted_sessions['website_session_id'].nunique()), 2)
nsr

is_repeat_session
0    83.57
1    16.43
Name: website_session_id, dtype: float64

**Average Session Duration Comparison**

In [215]:
# Converted Website Sessions Average Duration
import datetime as dt
sad = converted_sessions.groupby('website_session_id').agg({'created_at_w' : 'first', 'created_at' : 'last'})
sad1 = sad['created_at_w'].astype('datetime64[s]')
sad2 = sad['created_at'].astype('datetime64[s]')
sad = sad2 - sad1
sad.mean()

# about 15 mins

Timedelta('0 days 00:14:57')

In [216]:
# Non-Converted Website Sessions Average Duration
non_converted_sessions
nsad = non_converted_sessions.groupby('website_session_id').agg({'created_at_w' : 'first', 'created_at' : 'last'})
nsad1 = nsad['created_at_w'].astype('datetime64[s]')
nsad2 = nsad['created_at'].astype('datetime64[s]')
nsad = nsad2 - nsad1
nsad.mean()

# about 3 mins ==> logical üëç

Timedelta('0 days 00:03:08')

**Average Number of Pages Visited Comparison**

In [217]:
# Average Number of Pages Visited for Converted Session
spv = converted_sessions.groupby('website_session_id')['website_pageview_id'].count()
int(spv.mean())

7

In [None]:
# Average Number of Pages Visited for Non-Converted Session
nspv = non_converted_sessions.groupby('website_session_id')['website_pageview_id'].count()
int(nspv.mean())

# Two pages ==> this is logical too üëç

2

**Mobile üÜö Desktop**

In [224]:
# Mobile üÜö Desktop for Converted Session
smd = converted_sessions.groupby('device_type')['website_session_id'].nunique()
smd = round(smd *100 /converted_sessions['website_session_id'].nunique(),2)
smd

device_type
desktop    86.05
mobile     13.95
Name: website_session_id, dtype: float64

In [225]:
# Mobile üÜö Desktop for Non-Converted Session
nsmd = non_converted_sessions.groupby('device_type')['website_session_id'].nunique()
nsmd = round(nsmd *100 /non_converted_sessions['website_session_id'].nunique(),2)
nsmd

device_type
desktop    67.92
mobile     32.08
Name: website_session_id, dtype: float64