# Importing necessary libraries

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Fetching the dataset

In [6]:
df = pd.read_csv(r"C:\Users\Dhar\Downloads\ecommerce_customer_data.csv")
df.head()

Unnamed: 0,User_ID,Gender,Age,Location,Device_Type,Product_Browsing_Time,Total_Pages_Viewed,Items_Added_to_Cart,Total_Purchases
0,1,Female,23,Ahmedabad,Mobile,60,30,1,0
1,2,Male,25,Kolkata,Tablet,30,38,9,4
2,3,Male,32,Bangalore,Desktop,37,13,5,0
3,4,Male,35,Delhi,Mobile,7,20,10,3
4,5,Male,27,Bangalore,Tablet,35,20,8,2


In [7]:
df.shape

(500, 9)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   User_ID                500 non-null    int64 
 1   Gender                 500 non-null    object
 2   Age                    500 non-null    int64 
 3   Location               500 non-null    object
 4   Device_Type            500 non-null    object
 5   Product_Browsing_Time  500 non-null    int64 
 6   Total_Pages_Viewed     500 non-null    int64 
 7   Items_Added_to_Cart    500 non-null    int64 
 8   Total_Purchases        500 non-null    int64 
dtypes: int64(6), object(3)
memory usage: 35.3+ KB


# Descriptive statistics for numeric data

In [12]:
num_sum = df.describe()
num_sum

Unnamed: 0,User_ID,Age,Product_Browsing_Time,Total_Pages_Viewed,Items_Added_to_Cart,Total_Purchases
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,26.276,30.74,27.182,5.15,2.464
std,144.481833,5.114699,15.934246,13.071596,3.203127,1.740909
min,1.0,18.0,5.0,5.0,0.0,0.0
25%,125.75,22.0,16.0,16.0,2.0,1.0
50%,250.5,26.0,31.0,27.0,5.0,2.0
75%,375.25,31.0,44.0,38.0,8.0,4.0
max,500.0,35.0,60.0,50.0,10.0,5.0


# Descriptive statistics for categorical data

In [15]:
cat_sum = df.describe(include = 'object')
cat_sum

Unnamed: 0,Gender,Location,Device_Type
count,500,500,500
unique,2,8,3
top,Male,Kolkata,Mobile
freq,261,71,178


# Age distribution

In [20]:
fig = px.histogram(df, x='Age', title='Distribution of Age')
fig.show()

# Gender distribution

In [18]:
gender_count = df['Gender'].value_counts().reset_index()
gender_count.columns = ['Gender', 'Count']
fig = px.bar(gender_count, x='Gender', 
             y='Count', 
             title='Gender Distribution')
fig.show()

# Analyzing customer behaviour


# Product Browsing Time vs. Total Pages Viewed

In [19]:
fig = px.scatter(df, x='Product_Browsing_Time', y='Total_Pages_Viewed',
                 title='Product Browsing Time vs. Total Pages Viewed', 
                 trendline='ols')
fig.show()

The above scatter plot shows no consistent pattern or strong association between the time spent browsing products and the total number of pages viewed. It indicates that customers are not necessarily exploring more pages if they spend more time on the website, which might be due to various factors such as the website design, content relevance or individual user preferences.

# Average pages viewed by Gender

In [22]:
gender_grouped = df.groupby('Gender')['Total_Pages_Viewed'].mean().reset_index()
gender_grouped.columns = ['Gender', 'Average_Total_Pages_Viewed']
fig = px.bar(gender_grouped, x='Gender', y='Average_Total_Pages_Viewed',
             title='Average Total Pages Viewed by Gender')
fig.show()

# Average pages viewed by Device type

In [24]:
device_grouped = df.groupby('Device_Type')['Total_Pages_Viewed'].mean().reset_index()
device_grouped.columns = ['Device_Type', 'Average_Total_Pages_Viewed']
fig = px.bar(device_grouped, x='Device_Type', y='Average_Total_Pages_Viewed',
             title='Average Total Pages Viewed by Devices')
fig.show()

# Analyzing Customer Lifetime Value

In [25]:
df['CLTV'] = (df['Total_Purchases'] * df['Total_Pages_Viewed']) / df['Age']

df['Segment'] = pd.cut(df['CLTV'], bins=[1, 2.5, 5, float('inf')],
                         labels=['Low Value', 'Medium Value', 'High Value'])

segment_count = df['Segment'].value_counts().reset_index()
segment_count.columns = ['Segment', 'Count']

# Visualization of the customer segments

In [27]:
fig = px.bar(segment_count, x='Segment', y='Count', 
             title='Customer Segmentation by CLTV')

fig.update_xaxes(title='Segment')
fig.update_yaxes(title='Number of Customers')
fig.show()

# Analyzing Churn Rate

In [29]:
df['Churned'] = df['Total_Purchases'] == 0

churn_rate = df['Churned'].mean()

churn_rate

0.198

A churn rate of 0.198 indicates that a significant portion of customers has churned, and addressing this churn is important for maintaining business growth and profitability.