<a href="https://colab.research.google.com/github/dibya-8/Elevate-Labs-/blob/main/House_Price_Analysis_and_Prediction%F0%9F%93%8A%F0%9F%8F%A0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
harishkumardatalab_housing_price_prediction_path = kagglehub.dataset_download('harishkumardatalab/housing-price-prediction')
muzammilbaloch_pricehouse_path = kagglehub.dataset_download('muzammilbaloch/pricehouse')

print('Data source import complete.')


In [None]:
pip install --progress-bar off --upgrade --quiet pycaret[full]

In [None]:
!pip install --quiet pycaret

In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

import json

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

from pycaret.regression import *
import ast
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px
import seaborn as sns

<a id="3"></a>
<div style="font-family: 'Courier New', Courier, monospace; font-weight: bold; letter-spacing: 1px; color: #1976D2; font-size: 140%; text-align: left; padding: 10px; background: #BBDEFB; border-bottom: 8px solid #0D47A1;">
    Assessing Data ....
</div>

In [None]:
data = pd.read_csv("/kaggle/input/pricehouse/Housing.csv")
data.head(5)

In [None]:
data.nunique()

In [None]:
data.info()


<a id="3"></a>
<div style="font-family: 'Courier New', Courier, monospace; font-weight: bold; letter-spacing: 1px; color: #1976D2; font-size: 140%; text-align: left; padding: 10px; background: #BBDEFB; border-bottom: 8px solid #0D47A1;">
    <h2 style="text-align: left; font-size: 28px; color: #0F3271;">Setting up PyCaret</h2>
    <p style="font-size: 18px;">Configure PyCaret for the machine learning experiment:</p>
</div>

In [None]:
s = setup(data=data,
          target= 'price',
          experiment_name='HousePrice',
          session_id=42,
          train_size=0.8)

In [None]:
best_model = compare_models()

In [None]:
finalized_model = finalize_model(best_model)

<a id="3"></a>
<div style="font-family: 'Courier New', Courier, monospace; font-weight: bold; letter-spacing: 1px; color: #1976D2; font-size: 140%; text-align: left; padding: 10px; background: #BBDEFB; border-bottom: 8px solid #0D47A1;">
    Evaluate the model and make predictions
</div>

In [None]:
predict_model(finalized_model)

In [None]:
plot_model(finalized_model, plot='feature')

Based on the feature importance order provided, let's focus on creating visualizations for the following features in relation to predicting scores:

1. Bathrooms
2. Air Conditioning
3. Hotwater Heating
4. Prefarea
5. Stories
6. MainRoad
7. Basement
8. Guestroom
9. Parking
10. Furnishing Status

The goal is to visually explore how these features impact anime scores according to Random Forest Classifier model findings.

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create subplots
fig = make_subplots(rows=5, cols=2, subplot_titles=('Bathrooms vs Price',
                                                    'Air Conditioning vs Price',
                                                    'Hot Water Heating vs Price',
                                                    'Preferred Area vs Price',
                                                    'Stories vs Price',
                                                    'Main Road vs Price',
                                                    'Basement vs Price',
                                                    'Guestroom vs Price',
                                                    'Parking vs Price',
                                                    'Furnishing Status vs Price'),
                    vertical_spacing=0.08,
                    horizontal_spacing=0.1)

# Plot 1: Relationship between number of bathrooms and house prices
box_plot_bathrooms = go.Box(x=data['bathrooms'], y=data['price'],
                            marker=dict(color='blue'), name='Bathrooms vs Price')

# Plot 2: Impact of air conditioning on house prices
box_plot_air_conditioning = go.Box(x=data['airconditioning'], y=data['price'],
                                   marker=dict(color='orange'), name='Air Conditioning vs Price')

# Plot 3: Impact of hot water heating on house prices
box_plot_hot_water_heating = go.Box(x=data['hotwaterheating'], y=data['price'],
                                    marker=dict(color='green'), name='Hot Water Heating vs Price')

# Plot 4: Impact of being in a preferred area on house prices
box_plot_pref_area = go.Box(x=data['prefarea'], y=data['price'],
                            marker=dict(color='red'), name='Preferred Area vs Price')

# Plot 5: Relationship between number of stories and house prices
box_plot_stories = go.Box(x=data['stories'], y=data['price'],
                          marker=dict(color='purple'), name='Stories vs Price')

# Plot 6: Impact of being on the main road on house prices
box_plot_mainroad = go.Box(x=data['mainroad'], y=data['price'],
                           marker=dict(color='cyan'), name='Main Road vs Price')

# Plot 7: Impact of having a basement on house prices
box_plot_basement = go.Box(x=data['basement'], y=data['price'],
                           marker=dict(color='gray'), name='Basement vs Price')

# Plot 8: Impact of having a guestroom on house prices
box_plot_guestroom = go.Box(x=data['guestroom'], y=data['price'],
                            marker=dict(color='pink'), name='Guestroom vs Price')

# Plot 9: Impact of parking on house prices
box_plot_parking = go.Box(x=data['parking'], y=data['price'],
                          marker=dict(color='brown'), name='Parking vs Price')

# Plot 10: Impact of furnishing status on house prices
box_plot_furnishing_status = go.Box(x=data['furnishingstatus'], y=data['price'],
                                    marker=dict(color='yellow'), name='Furnishing Status vs Price')

# Add plots to subplots
fig.add_trace(box_plot_bathrooms, row=1, col=1)
fig.add_trace(box_plot_air_conditioning, row=1, col=2)
fig.add_trace(box_plot_hot_water_heating, row=2, col=1)
fig.add_trace(box_plot_pref_area, row=2, col=2)
fig.add_trace(box_plot_stories, row=3, col=1)
fig.add_trace(box_plot_mainroad, row=3, col=2)
fig.add_trace(box_plot_basement, row=4, col=1)
fig.add_trace(box_plot_guestroom, row=4, col=2)
fig.add_trace(box_plot_parking, row=5, col=1)
fig.add_trace(box_plot_furnishing_status, row=5, col=2)

# Update layout
fig.update_layout(height=2000, width=1200,
                  title_text="Comprehensive Visualization of House Prices",
                  title_x=0.5,
                  showlegend=False)

# Update x-axis titles
fig.update_xaxes(title_text="Number of Bathrooms", row=1, col=1)
fig.update_xaxes(title_text="Air Conditioning", row=1, col=2)
fig.update_xaxes(title_text="Hot Water Heating", row=2, col=1)
fig.update_xaxes(title_text="Preferred Area", row=2, col=2)
fig.update_xaxes(title_text="Number of Stories", row=3, col=1)
fig.update_xaxes(title_text="Proximity to Main Road", row=3, col=2)
fig.update_xaxes(title_text="Basement", row=4, col=1)
fig.update_xaxes(title_text="Guestroom", row=4, col=2)
fig.update_xaxes(title_text="Parking", row=5, col=1)
fig.update_xaxes(title_text="Furnishing Status", row=5, col=2)

# Update y-axis title
fig.update_yaxes(title_text="Price", col=1)

# Update subplot titles font and size
fig.update_annotations(font=dict(size=20))

# Show plot
fig.show()


In [None]:
# Calculate average increase percentage for number of bathrooms
min_price_bathrooms = data.groupby('bathrooms')['price'].mean().min()
avg_price_bathrooms = data.groupby('bathrooms')['price'].mean()
avg_increase_bathrooms = ((avg_price_bathrooms / min_price_bathrooms) - 1) * 100
bathrooms_increase_dict = avg_increase_bathrooms.to_dict()
print("Average Increase Percentage for Number of Bathrooms:")
for key, value in sorted(bathrooms_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for air conditioning
min_price_air_conditioning = data.groupby('airconditioning')['price'].mean().min()
avg_price_air_conditioning = data.groupby('airconditioning')['price'].mean()
avg_increase_air_conditioning = ((avg_price_air_conditioning / min_price_air_conditioning) - 1) * 100
air_conditioning_increase_dict = avg_increase_air_conditioning.to_dict()
print("\nAverage Increase Percentage for Air Conditioning:")
for key, value in sorted(air_conditioning_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for hot water heating
min_price_hot_water_heating = data.groupby('hotwaterheating')['price'].mean().min()
avg_price_hot_water_heating = data.groupby('hotwaterheating')['price'].mean()
avg_increase_hot_water_heating = ((avg_price_hot_water_heating / min_price_hot_water_heating) - 1) * 100
hot_water_heating_increase_dict = avg_increase_hot_water_heating.to_dict()
print("\nAverage Increase Percentage for Hot Water Heating:")
for key, value in sorted(hot_water_heating_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for preferred area
min_price_pref_area = data.groupby('prefarea')['price'].mean().min()
avg_price_pref_area = data.groupby('prefarea')['price'].mean()
avg_increase_pref_area = ((avg_price_pref_area / min_price_pref_area) - 1) * 100
pref_area_increase_dict = avg_increase_pref_area.to_dict()
print("\nAverage Increase Percentage for Preferred Area:")
for key, value in sorted(pref_area_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for number of stories
min_price_stories = data.groupby('stories')['price'].mean().min()
avg_price_stories = data.groupby('stories')['price'].mean()
avg_increase_stories = ((avg_price_stories / min_price_stories) - 1) * 100
stories_increase_dict = avg_increase_stories.to_dict()
print("\nAverage Increase Percentage for Stories:")
for key, value in sorted(stories_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for being on main road
min_price_mainroad = data.groupby('mainroad')['price'].mean().min()
avg_price_mainroad = data.groupby('mainroad')['price'].mean()
avg_increase_mainroad = ((avg_price_mainroad / min_price_mainroad) - 1) * 100
mainroad_increase_dict = avg_increase_mainroad.to_dict()
print("\nAverage Increase Percentage for Main Road:")
for key, value in sorted(mainroad_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for having a basement
min_price_basement = data.groupby('basement')['price'].mean().min()
avg_price_basement = data.groupby('basement')['price'].mean()
avg_increase_basement = ((avg_price_basement / min_price_basement) - 1) * 100
basement_increase_dict = avg_increase_basement.to_dict()
print("\nAverage Increase Percentage for Basement:")
for key, value in sorted(basement_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for having a guestroom
min_price_guestroom = data.groupby('guestroom')['price'].mean().min()
avg_price_guestroom = data.groupby('guestroom')['price'].mean()
avg_increase_guestroom = ((avg_price_guestroom / min_price_guestroom) - 1) * 100
guestroom_increase_dict = avg_increase_guestroom.to_dict()
print("\nAverage Increase Percentage for Guestroom:")
for key, value in sorted(guestroom_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for parking
min_price_parking = data.groupby('parking')['price'].mean().min()
avg_price_parking = data.groupby('parking')['price'].mean()
avg_increase_parking = ((avg_price_parking / min_price_parking) - 1) * 100
parking_increase_dict = avg_increase_parking.to_dict()
print("\nAverage Increase Percentage for Parking:")
for key, value in sorted(parking_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

# Calculate average increase percentage for furnishing status
min_price_furnishing_status = data.groupby('furnishingstatus')['price'].mean().min()
avg_price_furnishing_status = data.groupby('furnishingstatus')['price'].mean()
avg_increase_furnishing_status = ((avg_price_furnishing_status / min_price_furnishing_status) - 1) * 100
furnishing_status_increase_dict = avg_increase_furnishing_status.to_dict()
print("\nAverage Increase Percentage for Furnishing Status:")
for key, value in sorted(furnishing_status_increase_dict.items()):
    print(f"{key}: {value:.0f}%")

<a id="final-findings"></a>

# <div style="font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 1px; color: #1976D2; font-size: 150%; text-align: center; padding: 10px; background: #BBDEFB; border-bottom: 8px solid #0D47A1;">Final Findings 📊✨</div>

After evaluating the dataset and making predictions, the following conclusions were drawn:

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 100%;">Number of Bathrooms:</div>
- <div style="font-size: 115%;">Houses with more bathrooms experience a substantial price increase.</div>
- <div style="font-size: 115%;">A 4-bathroom house sees a remarkable 191% increase compared to a 1-bathroom house.</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Air Conditioning:</div>
- <div style="font-size: 115%;">Properties with air conditioning systems witness a noticeable 43% increase in average price.</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 100%;">Hot Water Heating:</div>
- <div style="font-size: 115%;">Houses with hot water heating show an 18% increase in average price. </div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Preferred Area:</div>
- <div style="font-size: 115%;">Properties in preferred areas enjoy a significant 33% increase in average price.</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Number of Stories:</div>
- <div style="font-size: 115%;">A 4-story house commands a substantial 73% increase compared to a single-story house.
</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Proximity to Main Road:</div>
- <div style="font-size: 115%;">Houses located on main roads see a 47% increase in average price. </div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Basement and Guestroom:</div>
- <div style="font-size: 115%;">Having a basement corresponds to a 16% increase, while a guestroom leads to a 27% increase in average price.</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Parking:</div>
- <div style="font-size: 115%;">Houses with parking spaces witness significant increases, with a 3-parking space property seeing a 42% increase compared to none.</div>

## <div style="margin-bottom: -30px; font-family: 'Arial', sans-serif; font-weight: bold; letter-spacing: 2px; color: #1976D2; font-size: 90%;">Furnishing Status:</div>
- <div style="font-size: 115%;">Furnished properties command the highest increase, with a 37% rise in average price, followed by semi-furnished properties with a 22% increase compared to unfinsished properties </div>

<div style="background-color: #E3F2FD; padding: 10px;">
    <p style="font-size: 150%; color: #1976D2; font-weight: bold;"> Thank you for taking the time to explore this notebook! I hope you found the insights valuable and the visualizations engaging. If you have any questions, feedback, or suggestions for improvement, feel free to reach out. Happy coding! 😊</p>
</div>