In [1]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np


In [3]:
# Step 2: Load dataset
df = pd.read_csv("house_dataset.csv")

# Define features (X) and target (y)
X = df[["Size_sqft", "Rooms", "Age_years", "Distance_km", "HasGarage", "NearSchool", "CrimeIndex"]]
y = df["Price"]


In [4]:
# Step 3: Split the data into training and testing sets
# 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Step 4: Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)


LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [6]:
# Step 5: Check model coefficients
print("Intercept (β₀):", model.intercept_)
coefficients = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})
print(coefficients)


Intercept (β₀): 30391.99005255688
       Feature   Coefficient
0    Size_sqft    112.000387
1        Rooms  15046.970496
2    Age_years  -1199.640871
3  Distance_km   -884.456021
4    HasGarage  10616.687362
5   NearSchool   7495.489764
6   CrimeIndex    -77.739815


In [7]:
# Step 6: Predict on test data
y_pred = model.predict(X_test)


In [13]:
# Step 7: Evaluate model performance
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:,.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:,.2f}")
print(f"R-squared (R²): {r2:.3f}")
k=X_test.shape[1]
n=X_test.shape[0]
adjust_R2=1-((1-r2)*(n-1)/(n-k-1))
print("adjusted R2 ",adjust_R2)


Mean Absolute Error (MAE): 28,835.01
Root Mean Squared Error (RMSE): 35,918.59
R-squared (R²): 0.750
adjusted R2  0.7412513524901103
