<a href="https://colab.research.google.com/github/iamharshita25/Final_year_project/blob/main/HousePricePredictonModel(HomeHarbour).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('House Price India.csv')

# Drop unnecessary columns (like 'id', 'Date')
data = data.drop(columns=['id', 'Date'])

# Features and target
X = data.drop(columns=['Price'])
y = data['Price']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Random Forest Regressor
model = RandomForestRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

# Predicting on the test set
y_pred = model.predict(X_test_scaled)

# Model evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output evaluation metrics
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")


Mean Absolute Error: 69077.80083447332
Mean Squared Error: 18322286010.429497
R-squared Score: 0.8699799709873561


In [None]:
import numpy as np

# Example input data (replace these values with the actual new data you have)
new_data = np.array([[
    3,          # number of bedrooms
    2.0,        # number of bathrooms
    2000,       # living area (in sqft)
    5000,       # lot area (in sqft)
    1.0,        # number of floors
    0,          # waterfront present (0 or 1)
    2,          # number of views
    3,          # condition of the house
    7,          # grade of the house
    1800,       # Area of the house (excluding basement)
    200,        # Area of the basement
    1995,       # Built Year
    0,          # Renovation Year (0 if never renovated)
    122030,     # Postal Code
    52.75,      # Latitude
    -114.50,    # Longitude
    2000,       # living_area_renov
    5000,       # lot_area_renov
    2,          # Number of schools nearby
    65          # Distance from the airport (in km)
]])

# Ensure the new data has the correct number of features
if new_data.shape[1] != X_train.shape[1]:
    raise ValueError(f"Expected {X_train.shape[1]} features, but got {new_data.shape[1]}.")

# Standardize the new data
new_data_scaled = scaler.transform(new_data)

# Predict the price
predicted_price = model.predict(new_data_scaled)

print(f"Predicted Price: {predicted_price[0]}")

Predicted Price: 405343.38




In [None]:
import numpy as np

# Function to predict house price based on user input
def predict_price(user_input):
    # Convert user input into a DataFrame
    input_df = pd.DataFrame([user_input], columns=X.columns)

    # Preprocess the input data (standardize)
    input_scaled = scaler.transform(input_df)

    # Predict the price
    predicted_price = model.predict(input_scaled)

    return predicted_price[0]

# Function to get user input
def get_user_input():
    user_input_data = {
        'number of bedrooms': int(input("Number of bedrooms: ")),
        'number of bathrooms': float(input("Number of bathrooms: ")),
        'living area': int(input("Living area (sqft): ")),
        'lot area': int(input("Lot area (sqft): ")),
        'number of floors': float(input("Number of floors: ")),
        'waterfront present': int(input("Waterfront present (1 for yes, 0 for no): ")),
        'number of views': int(input("Number of views: ")),
        'condition of the house': int(input("Condition of the house (1-5): ")),
        'grade of the house': int(input("Grade of the house (1-13): ")),
        'Area of the house(excluding basement)': int(input("Area of the house excluding basement (sqft): ")),
        'Area of the basement': int(input("Area of the basement (sqft): ")),
        'Built Year': int(input("Built Year: ")),
        'Renovation Year': int(input("Renovation Year (0 if never renovated): ")),
        'Postal Code': int(input("Postal Code: ")),
        'Lattitude': float(input("Latitude: ")),
        'Longitude': float(input("Longitude: ")),
        'living_area_renov': int(input("Living area after renovation (sqft): ")),
        'lot_area_renov': int(input("Lot area after renovation (sqft): ")),
        'Number of schools nearby': int(input("Number of schools nearby: ")),
        'Distance from the airport': int(input("Distance from the airport (km): "))
    }
    return user_input_data

# Get user input
user_input_data = get_user_input()

# Predict the price using the user input
predicted_price = predict_price(user_input_data)
print(f"Predicted House Price: {predicted_price}")

Number of bedrooms: 4
Number of bathrooms: 5
Living area (sqft): 2000
Lot area (sqft): 1500
Number of floors: 3
Waterfront present (1 for yes, 0 for no): 0
Number of views: 0
Condition of the house (1-5): 4
Grade of the house (1-13): 11
Area of the house excluding basement (sqft): 1500
Area of the basement (sqft): 0
Built Year: 2007
Renovation Year (0 if never renovated): 0
Postal Code: 390019
Latitude: 45
Longitude: 114
Living area after renovation (sqft): 2000
Lot area after renovation (sqft): 1500
Number of schools nearby: 3
Distance from the airport (km): 20
Predicted House Price: 495935.67
