# ***************** House Price Prediction model *****************

# Importing Requirement.txt Libraries

In [203]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
%matplotlib inline

# Reads the dataset from a CSV file into a pandas DataFrame
- Assign the vaule of features of house from the dataset to X and y

In [204]:
housedata = pd.read_csv("House Dataset.csv")
X = housedata[['bhk' , 'size','location']]
y = housedata['price']
print(X)
print(y)


      bhk  size         location
0       3  1500  CHRISTIAN BASTI
1       3  1751       LAL GANESH
2       2   750         BORAGAON
3       2  1100       BAGHARBARI
4       3  1630          BELTOLA
...   ...   ...              ...
3406    2   965          GARCHUK
3407    3  1375          LALMATI
3408    2   860           DISPUR
3409    3  1400       BORMOTORIA
3410    3  1450       GANESHGURI

[3411 rows x 3 columns]
0       45.00
1       78.79
2       31.50
3       50.00
4       65.20
        ...  
3406    43.72
3407    65.00
3408    43.00
3409    60.20
3410    58.10
Name: price, Length: 3411, dtype: float64


# Train-Test Split
- Spliting the dataset into training and testing sets.

In [205]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)

# Preprocessing Pipeline
- Creating a preprocessing pipeline for numerical features (scaling)
- categorical features (one-hot encoding).

In [206]:
num_features = ['bhk','size']
cat_features = ['location']

In [207]:
num_transformer = Pipeline(steps=[('scaler', StandardScaler())])
cat_transformer =Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [208]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num',num_transformer,num_features),
        ('cat',cat_transformer,cat_features)
    ])

# Linear Regression Model
- Creates a pipeline that applies the preprocessing steps and then fits a linear regression model

In [209]:
model = Pipeline(steps=[('preprocessor' , preprocessor),
                        ('regressor',LinearRegression())])

# Fit and Train the Model 
- Fits the model on the training data

In [210]:
model.fit(X_train,y_train)

# Predictions and Model Evaluation
- Predicting the target variable on the test set
- Evaluates the model using the R-squared score

In [211]:
y_pred = model.predict(X_test)

In [212]:
print("Coefficients : ",model.named_steps['regressor'].coef_)

Coefficients :  [ 5.64151108e+00  2.72581040e+01 -1.54080681e+01 -4.68737854e+00
 -2.10649901e+01 -4.38380096e+00 -1.58696623e+01 -2.05138697e+01
  3.23408615e+00 -1.03572910e+00  2.47283287e+02  2.24980166e+01
 -1.81442045e+01 -1.09863272e+01 -2.60051506e-01 -1.07198177e+00
 -2.05317641e+01 -2.52656693e+01 -2.14142416e+01 -1.73819001e+01
 -1.32436179e+01 -1.34478476e+01 -3.56169780e+01 -5.67982384e+00
 -3.75822898e+00 -9.81439071e+00  2.03312484e+00  4.14471953e+01
 -1.52622258e+01 -7.03651180e+00 -2.99011999e+01 -2.21800757e+01
 -4.14966700e+00 -1.88691220e+00 -1.10678304e+01 -3.33001239e+01
 -1.03615164e+01  3.94718387e+00 -3.09499634e+01 -6.35374419e+00
  3.35941608e+00 -1.28008899e+01 -1.57632565e+01 -2.33978224e+01
 -7.07945934e-01 -1.52451566e+01 -8.65308855e+00  8.69678752e+02
 -5.16764999e+00 -9.68085745e+00 -9.55011371e+00  1.79865682e+01
 -1.87557251e+01 -8.06592611e+00  5.82856193e+00 -1.34039082e+01
 -1.90044528e+01 -6.64589494e+00 -2.06971504e+01 -1.66753190e+01
 -3.43601

In [213]:
r2 = r2_score(y_test , y_pred)
print("R-squared score :" ,r2)

R-squared score : 0.7096124829658774


# New Data for Prediction

In [214]:
new_data = pd.DataFrame({
    'bhk':[3,2,4],
    'size':[1200,900,1600],
    'location':['BELTOLA','LAL GANESH','SIX MILE']
})

# Make Predictions on New Data

In [215]:
new_data_predictions=model.predict(new_data)

# Display Predictions for New Data

In [216]:
print('Predictions for new data:')
print(new_data_predictions)

Predictions for new data:
[70.71210891 39.77839384 92.55285205]


# New User Data for Prediction

In [217]:
bhk = int(input("Enter BHK :"))
size = int(input("Enter Size :"))
location = str(input("Enter Location :"))

In [218]:
userdata = pd.DataFrame({
    'bhk':[bhk,],
    'size':[size,],
    'location':[location,]
})

In [219]:
userdata_prediction = model.predict(userdata)

In [220]:
print("Prediction for user data")
print(userdata_prediction)

Prediction for user data
[54.59575314]
