## Machine Learning model to predict house
## price using linear regression only
## Jupyter notebook code..

#### Importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#### Load the dataset

In [2]:
df = pd.read_excel(r"C:\Users\Yogesh Choudhary\Downloads\HousePricePrediction.xlsx")

#### Display basic information about the dataset

In [3]:
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2919 entries, 0 to 2918
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Id            2919 non-null   int64  
 1   MSSubClass    2919 non-null   int64  
 2   MSZoning      2915 non-null   object 
 3   LotArea       2919 non-null   int64  
 4   LotConfig     2919 non-null   object 
 5   BldgType      2919 non-null   object 
 6   OverallCond   2919 non-null   int64  
 7   YearBuilt     2919 non-null   int64  
 8   YearRemodAdd  2919 non-null   int64  
 9   Exterior1st   2918 non-null   object 
 10  BsmtFinSF2    2918 non-null   float64
 11  TotalBsmtSF   2918 non-null   float64
 12  SalePrice     1460 non-null   float64
dtypes: float64(3), int64(6), object(4)
memory usage: 296.6+ KB
None
   Id  MSSubClass MSZoning  LotArea LotConfig BldgType  OverallCond  \
0   0          60       RL     8450    Inside     1Fam            5   
1   1          20       RL     9600  

#### Data Preprocessing
#### Handling missing values

In [4]:
df.dropna(inplace=True)

#### Selecting features and target variable

In [5]:
selected_features = ['LotArea', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'BsmtFinSF2', 'TotalBsmtSF']
X = df[selected_features]
y = df['SalePrice']

#### Splitting the dataset into training and testing sets

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Creating and training the Linear Regression model

In [7]:
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

#### Model evaluation

In [8]:
train_predictions = model.predict(X_train)
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
print("Train RMSE:", train_rmse)
test_predictions = model.predict(X_test)
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
print("Test RMSE:", test_rmse)

Train RMSE: 53058.131315409046
Test RMSE: 57148.83762660893


#### Predicting house prices

In [9]:
new_data = pd.DataFrame({
    'LotArea': [8000],
    'OverallCond': [5],
    'YearBuilt': [2000],
    'YearRemodAdd': [2010],
    'BsmtFinSF2': [100],
    'TotalBsmtSF': [1200]
})
predicted_price = model.predict(new_data)
print("Predicted Price:", predicted_price)

Predicted Price: [224007.74777095]
