<a href="https://colab.research.google.com/github/dasarihemu/House-Price-Predictor/blob/main/Hprice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

# Load dataset
df = pd.read_csv("/content/house-prices.csv")

# Feature selection (modify according to dataset)
X = df[['SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Brick', 'Neighborhood']]
y = df['Price']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # sparse=False for compatibility with LinearRegression

# Fit and transform the categorical columns ('Brick' and 'Neighborhood')
categorical_features = ['Brick', 'Neighborhood']  # List of categorical columns
encoded_features = encoder.fit_transform(X[categorical_features])
encoded_features_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_features))

# Concatenate the encoded data with the original features
X = pd.concat([X, encoded_features_df], axis=1)

# Drop the original 'Brick' column (optional)
X.drop(categorical_features, axis=1, inplace=True)
# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluating the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Output results
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared Score: {r2}")

# Predicting on new data (example input)
new_house = pd.DataFrame([{'SqFt': 1790, 'Bedrooms': 3, 'Bathrooms': 3, 'Offers': 3, 'Brick': 'No', 'Neighborhood': 'East'}])  # Example values as a DataFrame
# Transform the new data using the same encoder
encoded_new_house = encoder.transform(new_house[['Brick', 'Neighborhood']])
encoded_new_house_df = pd.DataFrame(encoded_new_house, columns=encoder.get_feature_names_out(['Brick', 'Neighborhood']))

# Concatenate the encoded data with the original features for the new house
new_house = pd.concat([new_house, encoded_new_house_df], axis=1)
new_house.drop(['Brick', 'Neighborhood'], axis=1, inplace=True)  # Drop original categorical columns
predicted_price = model.predict(new_house)
print(f"Predicted House Price: {predicted_price[0]}")


Mean Absolute Error: 8901.290743964839
Mean Squared Error: 114170418.45453785
Root Mean Squared Error: 10685.05584704815
R-squared Score: 0.8062934622232144
Predicted House Price: 105487.43096840294
