#### Loading Dataset

In [None]:
import pandas as pd

df = pd.read_csv("dataset/online_retail.csv")
df.head()

In [None]:
df.info()

#### Handle Null Values and add new columns

In [None]:
# Check for null values 
df.isnull().sum()

In [None]:
# Remove all entries with null CustomerID 
df = df[df['CustomerID'].notnull()]

In [None]:
# ADD new column to store Total Price 
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']

#### Data type conversions

In [None]:
# Convert InvoiceDate to Date Type 
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [None]:
df.head()

#### Analysis

In [None]:
# Determine last date for Retail Data
ref_date = df['InvoiceDate'].max() + pd.Timedelta(days=1)

In [None]:
# Grouping For Recency ,Frequency, Monetary
rfm = df.groupby('CustomerID').agg({
    'InvoiceDate': lambda x : (ref_date - x.max()).days,
    'InvoiceNo': 'nunique',
    'TotalPrice': 'sum'
}).reset_index()

rfm.rename(columns={
    'InvoiceDate': 'Recency',
    'InvoiceNo': 'Frequency',
    'TotalPrice': 'Monetary'
}, inplace=True)
rfm.head()

In [None]:
rfm['R_Score'] = pd.qcut(rfm['Recency'], 5, labels=[5, 4, 3, 2, 1])
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 5, labels=[1, 2, 3, 4, 5])
rfm['M_Score'] = pd.qcut(rfm['Monetary'], 5, labels=[1, 2, 3, 4, 5])

rfm['rfm_score'] = rfm[['R_Score', 'F_Score', 'M_Score']].sum(axis=1)
rfm.head()

In [None]:
def segment(rfm):
    if rfm['rfm_score'] >= 12:
        return 'Champians'
    elif rfm['rfm_score'] >= 9:
        return 'Loyal Customers'
    elif rfm['rfm_score'] >= 7:
        return 'Potential Customers'
    elif rfm['rfm_score'] >= 4:
        return 'At Risk'
    else:
        return 'Lost'
    
rfm['Segment'] = rfm.apply(segment, axis=1)

rfm.head()


In [None]:
segment = rfm.groupby('Segment').agg({'CustomerID': 'count'})
segment

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")

plt.figure(figsize=(8, 6))
palette = sns.color_palette("viridis",  n_colors=rfm['Segment'].nunique())

ax = sns.countplot(data=rfm, x='Segment', order=rfm['Segment'].value_counts().index, hue='Segment', palette=palette, legend=False)

for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() /2., p.get_height()), ha='center', va='bottom', fontsize=10, fontweight='bold')
plt.title("Customer Segments Distribution", fontsize=16, weight='bold')
plt.xlabel("Segment", fontsize=12, weight='bold')
plt.ylabel("No of Customers", fontsize=12, weight='bold')
plt.xticks(rotation=30, ha='right')
plt.tight_layout()
plt.show()