In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
df = pd.read_csv("train.csv")


In [None]:
df.head()


In [None]:
df.isnull().sum()

In [None]:
data = df[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'SalePrice']]
data = data.dropna()


In [None]:
X = data[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = data['SalePrice']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R^2 Score:", r2_score(y_test, y_pred))


In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted")
plt.show()


In [None]:
average_price = y_pred.mean()
print(f"🏠 The average predicted house price is: ₹\033[1m{average_price:,.2f}\033[0m")


In [None]:
summary = pd.DataFrame({
    "Average Predicted Price": [round(average_price, 2)],
    "RMSE": [np.sqrt(mean_squared_error(y_test, y_pred))],
    "R² Score": [r2_score(y_test, y_pred)]
})
display(summary)
