<a href="https://colab.research.google.com/github/Varun-55-gvh/ML_23AG1A66D2/blob/main/house%20prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ========================
# House Price Prediction by Area
# ========================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

# ------------------------
# 1. Sample Dataset
# ------------------------
data = {
    'Area': ['Downtown', 'Uptown', 'Suburb', 'Downtown', 'Uptown', 'Suburb'],
    'Bedrooms': [3, 4, 2, 2, 5, 3],
    'Bathrooms': [2, 3, 1, 1, 4, 2],
    'Size (sqft)': [1500, 2000, 1200, 1300, 2500, 1600],
    'Price': [450000, 600000, 300000, 350000, 750000, 400000]
}
df = pd.DataFrame(data)

# ------------------------
# 2. Define Features & Target
# ------------------------
X = df[['Area', 'Bedrooms', 'Bathrooms', 'Size (sqft)']]
y = df['Price']

# ------------------------
# 3. Preprocessing (One-Hot Encode Area)
# ------------------------
preprocessor = ColumnTransformer(
    transformers=[
        ('area', OneHotEncoder(), ['Area'])
    ],
    remainder='passthrough'
)

# ------------------------
# 4. Build Pipeline
# ------------------------
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# ------------------------
# 5. Split Data & Train
# ------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

# ------------------------
# 6. Evaluate Model
# ------------------------
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# ------------------------
# 7. Predict for New Areas
# ------------------------
new_data = pd.DataFrame({
    'Area': ['Downtown', 'Uptown', 'Suburb'],
    'Bedrooms': [3, 5, 2],
    'Bathrooms': [2, 4, 1],
    'Size (sqft)': [1600, 2400, 1150]
})

predicted_prices = model.predict(new_data)
new_data['Predicted Price'] = predicted_prices.astype(int)

print("\nPredicted Prices for New Areas:")
print(new_data)


Mean Squared Error: 2078365499.15

Predicted Prices for New Areas:
       Area  Bedrooms  Bathrooms  Size (sqft)  Predicted Price
0  Downtown         3          2         1600           419231
1    Uptown         5          4         2400           719231
2    Suburb         2          1         1150           284615
