# Predicting Home Prices Using Multivariate Linear Regression

In [31]:
# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
import math

### Load the dataset

In [4]:
df = pd.read_csv("home_prices.csv")
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


### Display the initial dataframe

In [34]:
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


# Data Preparation

## Handling Missing Values

### Calculate the median of bedrooms and fill missing values

In [37]:
median_bedrooms = math.floor(df.bedrooms.median())
print(f"Median Bedrooms: {median_bedrooms}")

Median Bedrooms: 4


### Fill missing bedroom values with the calculated median

In [12]:
df.bedrooms = df.bedrooms.fillna(median_bedrooms)
df
# Now the data is prepared to train the ML Model

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


### Display the dataframe after handling missing values

In [38]:
print("DataFrame after filling missing values:")
df

DataFrame after filling missing values:


Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


# Train the Multivariate Linear Regression Model

### Feature Selection and fitting the model

In [39]:
reg = linear_model.LinearRegression()
X = df[["area", "bedrooms", "age"]]
y = df["price"]
reg.fit(X, y)

### Display coefficients and intercept

In [40]:
print("Coefficients:", reg.coef_)
print("Intercept:", reg.intercept_)

Coefficients: [  112.06244194 23388.88007794 -3231.71790863]
Intercept: 221323.0018654043


# Model Equation

### The model equation can be represented as:
### y = m1 * area + m2 * bedrooms + m3 * age + b
### where:
### m1, m2, m3 = coefficients
### b = intercept


# Predicting Home Price

### Example Prediction
#### Predict the price of a home with area = 3000 sq ft, bedrooms = 3, and age = 40 years

In [41]:
predicted_price = reg.predict([[3000, 3, 40]])[0]  # Accessing the first element from the prediction
print(f"Predicted Price for a home with area 3000 sq ft, 3 bedrooms, and age 40: ${predicted_price:.2f}")

Predicted Price for a home with area 3000 sq ft, 3 bedrooms, and age 40: $498408.25




# Verifying the prediction using manual calculation

In [42]:
manual_prediction = (reg.coef_[0] * 3000) + (reg.coef_[1] * 3) + (reg.coef_[2] * 40) + reg.intercept_
print(f"Manual Calculation of Predicted Price: ${manual_prediction:.2f}")


Manual Calculation of Predicted Price: $498408.25


# The model has been trained and is capable of predicting home prices based on area, bedrooms, and age.