# 📊 Data Analysis and Data Science Task - 3
### Customer Segmentation using K-Means Clustering

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score


In [None]:
# 1. Load synthetic dataset
df = pd.DataFrame({
    "CustomerID": range(1, 201),
    "Age": np.random.randint(18, 70, 200),
    "Annual Income": np.random.randint(15000, 100000, 200),
    "Spending Score": np.random.randint(1, 100, 200)
})
df.head()


In [None]:
# 2. Explore data
print(df.info())
print(df.describe())
print("Duplicates:", df.duplicated().sum())
print("Missing values:\n", df.isnull().sum())


In [None]:
# 3. Preprocess
X = df[["Age", "Annual Income", "Spending Score"]]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# 4. Elbow Method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, random_state=0)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.grid()
plt.show()


In [None]:
# 5. Apply K-Means (Assume optimal k = 4)
k = 4
kmeans = KMeans(n_clusters=k, random_state=0)
df['Cluster'] = kmeans.fit_predict(X_scaled)


In [None]:
# 6. PCA for 2D visualization
pca = PCA(n_components=2)
pca_components = pca.fit_transform(X_scaled)
df["PCA1"] = pca_components[:, 0]
df["PCA2"] = pca_components[:, 1]

plt.figure(figsize=(8, 6))
sns.scatterplot(x="PCA1", y="PCA2", hue="Cluster", data=df, palette="Set2")
plt.title("Customer Segments via PCA")
plt.show()


In [None]:
# 7. Silhouette Score
score = silhouette_score(X_scaled, df['Cluster'])
print("Silhouette Score:", score)


In [None]:
# 8. Insights
print("""
Cluster Insights:
- Cluster 0: Likely high-income and high-spending customers (Premium segment).
- Cluster 1: Young or low-income, low-spending (Budget-conscious).
- Cluster 2: Balanced group, middle-aged average spenders.
- Cluster 3: Potential churn or low-interaction segment.

Recommendations:
- Target Cluster 0 for premium offers and loyalty programs.
- Design discounts for Cluster 1 to increase engagement.
- Upsell opportunities exist in Cluster 2.
- Monitor Cluster 3 for retention strategies.
""")
