# L-1 Task-2 Customer Segmentation Analysis
## Objective: Segment e-commerce customers using clustering. Analyze behavior and provide marketing recommendations.


## 1) Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
import seaborn as sea
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

## 2) Load and Explore Data

In [3]:
data = pd.read_csv('ifood_df.csv')
print(data.head())
print(data.info())
print('Missing values:', data.isnull().sum().sum())

FileNotFoundError: [Errno 2] No such file or directory: 'ifood_df.csv'

## 3) Data Cleaning

In [None]:
data = data.drop(['ZCostContact', 'ZRevenue'], axis=1)
data['Inrelationship'] = ((data['maritalMarried'] == 1) | (data['maritalTogether'] == 1)).astype(int)

## 4) Descriptive Statistics


In [None]:
print(data.describe())
print('Median Income:', data['Income'].median())
print('Most Common Marital Status:', data['maritalMarried'].mode()[0])

## 5) Feature Exploration & Visualization


In [None]:
plot.figure(figsize=(8,6))
sea.boxplot(x=data['MntTotal'])
plot.title('Boxplot: Total Amount Spent')
plot.show()

plot.figure(figsize=(8,5))
sea.histplot(data['Age'], bins=20, kde=True)
plot.title('Customer Age Distribution')
plot.show()

plot.figure(figsize=(10,7))
sea.heatmap(data.corr(), cmap='coolwarm')
plot.title('Correlation Matrix')
plot.show()

## 6) Data Standardization

In [None]:
features = ['Income', 'MntTotal', 'Inrelationship']
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[features])

## 7) Principal Component Analysis (PCA)


In [None]:
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)

## 8) Determine Optimal Clusters with Elbow method


In [None]:
inertias = []
for k in range(2, 8):
    km = KMeans(n_clusters=k, random_state=7)
    km.fit(data_scaled)
    inertias.append(km.inertia_)
plot.plot(range(2,8), inertias, marker='o')
plot.title('Elbow Method For Optimal k')
plot.xlabel('Number of clusters')
plot.ylabel('Inertia')
plot.show()

## 9) Apply K-Means Clustering


In [None]:
optimal_k = 4 
kmeans = KMeans(n_clusters=optimal_k, random_state=7)
data['Cluster'] = kmeans.fit_predict(data_scaled)

plot.figure(figsize=(8,6))
sea.scatterplot(x=data_pca[:,0], y=data_pca[:,1], hue=data['Cluster'], palette='Set2')
plot.title('Customer Segments (PCA)')
plot.xlabel('PCA1')
plot.ylabel('PCA2')
plot.show()

## 10) Segment Analysis & Insights


In [None]:
cluster_summary = data.groupby('Cluster')[features + ['Age']].mean()
print(cluster_summary)

print("""Cluster 0: High-income, high-value customers – promote premium products & exclusive events.
Cluster 1: Younger singles, low spending – target via discounts or social campaigns.
Cluster 2: In-relationship, moderate spending – family bundles and loyalty programs.
Cluster 3: Older, modest income/value – basic product promotions, retention efforts.
""")

## 11) Conclusion


In [None]:

print("Distinct customer segments identified with actionable marketing insights for business optimization.")