In [None]:
!pip install numpy pandas matplotlib seaborn scikit-learn


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [None]:
os.environ["OMP_NUM_THREADS"] = "1"


In [None]:
# Load dataset
df = pd.read_csv("Mall_Customers.csv")

# Display first few rows
df.head()


In [None]:
print(df.info())  # Check data types and missing values
print(df.isnull().sum())  # Check missing values count


In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="Gender", data=df, palette="coolwarm")
plt.title("Gender Distribution")
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df["Gender"], palette="coolwarm")
plt.title("Spending Score vs Annual Income")
plt.show()


In [None]:
X = df[["Age", "Annual Income (k$)", "Spending Score (1-100)"]]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
inertia = []
K_range = range(1, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42, n_init='auto')
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

# Plot Elbow Curve
plt.figure(figsize=(8,6))
plt.plot(K_range, inertia, marker="o", linestyle="--")
plt.xlabel("Number of Clusters")
plt.ylabel("Inertia")
plt.title("Elbow Method to Determine Optimal K")
plt.show()


In [None]:
# Set number of clusters based on elbow method (example: K=5)
optimal_k = 5

# Apply K-Means with fixes
kmeans = KMeans(n_clusters=optimal_k, init='k-means++', random_state=42, n_init='auto')
df["Cluster"] = kmeans.fit_predict(X_scaled)

# Show cluster counts
df["Cluster"].value_counts()


In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df["Cluster"], palette="viridis", s=100)
plt.title("Customer Segments Based on Income and Spending Score")
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(X_pca[:, 0], X_pca[:, 1], X_pca[:, 2], c=df["Cluster"], cmap="viridis", s=50)
ax.set_title("3D View of Customer Segments")
ax.set_xlabel("PCA 1")
ax.set_ylabel("PCA 2")
ax.set_zlabel("PCA 3")
plt.show()


In [None]:
# Import required libraries for Hierarchical Clustering
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering

# Create Dendrogram to find optimal clusters
plt.figure(figsize=(10, 6))
dendrogram(linkage(X_scaled, method="ward"))
plt.title("Dendrogram for Hierarchical Clustering")
plt.xlabel("Customers")
plt.ylabel("Distance")
plt.show()

# Apply Hierarchical Clustering
h_cluster = AgglomerativeClustering(n_clusters=5, linkage="ward")
df["H_Cluster"] = h_cluster.fit_predict(X_scaled)

# Visualize Hierarchical Clusters
plt.figure(figsize=(10,6))
sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df["H_Cluster"], palette="viridis", s=100)
plt.title("Hierarchical Customer Segments")
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.show()


In [None]:
df.groupby("Cluster").mean(numeric_only=True)


In [None]:
df.to_csv("Mall_Customers_Clustered.csv", index=False)


In [None]:
pip install streamlit pandas scikit-learn joblib


In [None]:
%%writefile app.py

import streamlit as st
import pandas as pd
import joblib

# Load trained model
model = joblib.load("customer_segmentation_model.pkl")

st.title("Customer Segmentation Predictor")

# Input fields
income = st.number_input("Annual Income (k$)", min_value=0, max_value=200, value=50)
spending = st.number_input("Spending Score (1-100)", min_value=0, max_value=100, value=50)

# Prediction
if st.button("Predict Segment"):
    df = pd.DataFrame([[income, spending]], columns=["Annual Income (k$)", "Spending Score (1-100)"])
    cluster = model.predict(df)
    st.success(f"Predicted Customer Segment: {cluster[0]}")


In [None]:
import pandas as pd
import joblib
from sklearn.cluster import KMeans

# Load dataset
df = pd.read_csv("Mall_Customers.csv")

# Select features for clustering
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

# Train KMeans model
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(X)

# Save the trained model
joblib.dump(kmeans, "customer_segmentation_model.pkl")

print("Model saved successfully as customer_segmentation_model.pkl!")


In [None]:
import os

model_path = os.path.abspath("customer_segmentation_model.pkl")
model = joblib.load(model_path)


In [None]:
#streamlit run app.py On Command Prompt