In [1]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [2]:
# Step 2: Load Dataset
df = pd.read_csv("data/Housing.csv")  # Make sure your CSV is in this path
df.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [3]:
# Step 3: Preprocess Data

# List of categorical columns to encode
categorical_features = ['mainroad', 'guestroom', 'basement',
                        'hotwaterheating', 'airconditioning',
                        'prefarea', 'furnishingstatus']

# Feature columns (excluding price)
X = df.drop('price', axis=1)

# Target column
y = df['price']

# Column Transformer with OneHotEncoding for categorical columns
column_transformer = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(drop='first'), categorical_features)
    ],
    remainder='passthrough'
)


In [4]:
# Step 4: Create and Train the Model

# Create a pipeline with preprocessor and linear regression model
pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('model', LinearRegression())
])

# Split dataset for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

print("âœ… Model trained successfully.")


âœ… Model trained successfully.


In [5]:
# Step 5: Evaluate (optional)
score = pipeline.score(X_test, y_test)
print(f"ðŸ“Š Model R^2 Score: {score:.4f}")


ðŸ“Š Model R^2 Score: 0.6529


In [7]:
# Step 6: Save the model
import os

# Create directory if it doesn't exist
os.makedirs("models", exist_ok=True)

# Save model
with open("models/house_price_model.pkl", "wb") as f:
    pickle.dump((pipeline, X.columns), f)

print("ðŸ’¾ Model saved as models/house_price_model.pkl")


ðŸ’¾ Model saved as models/house_price_model.pkl
