# Important Liabraries to be imported 

In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression #Used for tasks such as predicting numerical values based on input features.
from sklearn.tree import DecisionTreeRegressor #Useful for both classification and regression tasks
from sklearn.ensemble import RandomForestRegressor #Effective in handling larger datasets and improving predictive performance
from sklearn.model_selection import train_test_split #Splits the dataset into training and testing sets
from sklearn.metrics import mean_squared_error, r2_score #Measures the average squared difference between predicted and actual values
# r2_score - Calculates the coefficient of determination
from sklearn.preprocessing import LabelEncoder #used to transform categorical text data into numerical labels

# Load Dataset

In [3]:
data = pd.read_csv ("E:\\Jupyter\\Housing.csv")
data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [18]:
# encode it to numeric values
label_encoder = LabelEncoder()
data['furnishingstatus'] = label_encoder.fit_transform(data['furnishingstatus'])

In [16]:
# Perform one-hot encoding for binary categorical variables
columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

for col in columns:
    data[col] = data[col].apply(lambda x: 1 if x == 'yes' else 0)

In [40]:
data.tail() #all the columns are converted in numeric value

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
540,1820000,3000,2,1,1,0,0,0,0,0,2,0,2
541,1767150,2400,3,1,1,0,0,0,0,0,0,0,1
542,1750000,3620,2,1,1,0,0,0,0,0,0,0,2
543,1750000,2910,3,1,1,0,0,0,0,0,0,0,0
544,1750000,3850,3,1,2,0,0,0,0,0,0,0,2


## Define features and target variable

In [20]:
X = data.drop('price', axis=1)  # Features
y = data['price']  # Target variable

In [21]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Train Linear Regression Model

In [22]:
# Initialize and fit Linear Regression model
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

In [23]:
# Make predictions on the test set
linear_reg_predictions = linear_reg.predict(X_test)

## Train Decision Tree Regressor

In [24]:
# Initialize and fit Decision Tree Regressor model
decision_tree_reg = DecisionTreeRegressor(random_state=42)
decision_tree_reg.fit(X_train, y_train)

In [25]:
# Make predictions on the test set
decision_tree_predictions = decision_tree_reg.predict(X_test)

## Train Random Forest Regressor

In [26]:
# Initialize and fit Random Forest Regressor model
random_forest_reg = RandomForestRegressor(random_state=42)
random_forest_reg.fit(X_train, y_train)

In [27]:
# Make predictions on the test set
random_forest_predictions = random_forest_reg.predict(X_test)

 ## Evaluate the Models

In [28]:
# Calculate mean squared error for each model
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
decision_tree_mse = mean_squared_error(y_test, decision_tree_predictions)
random_forest_mse = mean_squared_error(y_test, random_forest_predictions)

In [29]:
# Print mean squared errors
print(f"Linear Regression Mean Squared Error: {linear_reg_mse}")
print(f"Decision Tree Regressor Mean Squared Error: {decision_tree_mse}")
print(f"Random Forest Regressor Mean Squared Error: {random_forest_mse}")

Linear Regression Mean Squared Error: 2240240375247.1094
Decision Tree Regressor Mean Squared Error: 3634242630532.5176
Random Forest Regressor Mean Squared Error: 2469090106408.897


## Predicting Prices using Trained Models

In [30]:
# Predict prices using each model on the test set
linear_reg_predictions = linear_reg.predict(X_test)
decision_tree_predictions = decision_tree_reg.predict(X_test)
random_forest_predictions = random_forest_reg.predict(X_test)

In [42]:
# Assuming the features for the new house
new_house_features = np.array([3850,3,1,2,0,0,0,0,0,0,0,2]).reshape(1, -1)

# Predicting price for the new house using Linear Regression model
new_house_price_linear_reg = linear_reg.predict(new_house_features)

print(f"Predicted price for the new house (Linear Regression): {new_house_price_linear_reg[0]}")


Predicted price for the new house (Linear Regression): 3585602.0271815853




In [43]:
# Predicting price for the new house using decision tree model
new_house_price_decision_tree_reg = decision_tree_reg.predict(new_house_features)
print(f"Predicted price for the new house (decision tree): {new_house_price_decision_tree_reg[0]}")

Predicted price for the new house (decision tree): 2432500.0




In [44]:
# Predicting price for the new house using random forest model
new_house_price_random_forest_reg = random_forest_reg.predict(new_house_features)
print(f"Predicted price for the new house (random forest): {new_house_price_random_forest_reg[0]}")

Predicted price for the new house (random forest): 2569822.5


