In [1]:
!pip install scikit-learn==1.8.0



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [3]:
df = pd.read_csv("/content/indian_house_price_dataset.csv")

In [4]:
print(df.head())

   House_Size_sqft  BHK  Location_Score         Price
0       824.507123  2.0        2.943484  2.573379e+06
1       729.260355  2.0        1.887308  2.825979e+06
2       847.153281  2.0        1.665056  2.529316e+06
3       978.454478  1.0        4.909015  2.524355e+06
4       714.876994  2.0        2.079356  2.078805e+06


In [5]:
#Discarding Null
df = df.dropna()

In [6]:
#REMOVING OUTLIERS
numeric_cols = ["House_Size_sqft", "BHK", "Location_Score", "Price"]

Q1 = df[numeric_cols].quantile(0.25)
Q3 = df[numeric_cols].quantile(0.75)
IQR = Q3 - Q1

df = df[~((df[numeric_cols] < (Q1 - 1.5 * IQR)) |
          (df[numeric_cols] > (Q3 + 1.5 * IQR))).any(axis=1)]

print("After cleaning:", df.shape)

After cleaning: (1753, 4)


In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

features = ["House_Size_sqft", "BHK", "Location_Score", "Price"]

# Scale
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[features])

# Train
kmeans = KMeans(n_clusters=4, random_state=42)
df["Cluster"] = kmeans.fit_predict(scaled_data)

print(df["Cluster"].value_counts())

Cluster
0    461
3    439
2    427
1    426
Name: count, dtype: int64


In [8]:
# Get cluster centers in original scale
centers = scaler.inverse_transform(kmeans.cluster_centers_)
centers_df = pd.DataFrame(centers, columns=features)

centers_df["Cluster"] = range(4)

# Sort clusters by Price
centers_sorted = centers_df.sort_values("Price").reset_index(drop=True)

print(centers_sorted)

   House_Size_sqft       BHK  Location_Score         Price  Cluster
0       830.907534  1.549763        3.715571  3.190087e+06        2
1      1225.393274  2.392704        6.290235  6.007411e+06        0
2      2180.717413  3.564920        8.020522  1.490924e+07        3
3      3790.588220  5.000000        9.019600  3.480729e+07        1


In [9]:
import pickle

# Save scaler
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Save kmeans
with open("kmeans.pkl", "wb") as f:
    pickle.dump(kmeans, f)

print("‚úÖ Models saved as pkl")

‚úÖ Models saved as pkl


In [10]:
segment_map = {}

segment_map[centers_sorted.loc[0, "Cluster"]] = "Budget Segment"
segment_map[centers_sorted.loc[1, "Cluster"]] = "Mid-Range Segment"
segment_map[centers_sorted.loc[2, "Cluster"]] = "Premium Segment"
segment_map[centers_sorted.loc[3, "Cluster"]] = "Luxury Segment"

print(segment_map)

{np.int64(2): 'Budget Segment', np.int64(0): 'Mid-Range Segment', np.int64(3): 'Premium Segment', np.int64(1): 'Luxury Segment'}


In [11]:
import gradio as gr

def predict_cluster(size, bhk, location, price_cr):

    # Convert Crores ‚Üí Rupees
    price = price_cr * 10000000

    input_data = np.array([[size, bhk, location, price]])

    # Scale input
    input_scaled = scaler.transform(input_data)

    # Predict cluster
    cluster = kmeans.predict(input_scaled)[0]

    return f"Predicted Market Segment: {segment_map[cluster]}"


interface = gr.Interface(
    fn=predict_cluster,
    inputs=[
        gr.Slider(300, 5000, step=50, label="House Size (sqft)"),
        gr.Slider(1, 6, step=1, label="BHK"),
        gr.Slider(1, 10, step=0.5, label="Location Score"),
        gr.Slider(0.2, 5, step=0.1, label="Price (Crores ‚Çπ)")
    ],
    outputs="text",
    title="üè† Indian Housing Market Segment Predictor"
)

interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f45e512305a4c45ab7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


