In [9]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load dataset
data = pd.read_csv("boston_housing.csv")

# Step 2: Handle missing values (cleaning)
data = data.dropna()  # Alternatively: data.fillna(data.mean(numeric_only=True))

# Step 3: Split data into features and target
X = data.drop("MEDV", axis=1)  # 'MEDV' is target
y = data["MEDV"]

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Step 7: Output (cleaned and rounded)
rounded_coeffs = [round(float(c), 2) for c in model.coef_]
print("Model Coefficients:", rounded_coeffs)
print("Intercept:", round(float(model.intercept_), 2))
print("Mean Squared Error:", round(mse, 2))
print("R² Score:", round(r2, 2))

# Step 8: Static Sample Prediction (Output in INR only)
sample_input = [0.1, 18.0, 2.31, 0, 0.538, 6.575, 65.2, 4.09, 1, 296, 15.3, 396.9, 4.98]

# Convert to DataFrame
sample_df = pd.DataFrame([sample_input], columns=X.columns)

# Make prediction
prediction = model.predict(sample_df)[0]  # in 1000s USD
inr_price = prediction * 1000 * 83.5  # Convert to INR

print(f"Predicted House Price: Rs,{inr_price:,.2f}")


Model Coefficients: [-0.11, 0.04, 0.03, 1.98, -17.08, 4.26, -0.02, -1.42, 0.24, -0.01, -0.98, 0.01, -0.39]
Intercept: 33.65
Mean Squared Error: 31.45
R² Score: 0.63
Predicted House Price: Rs,2,476,347.86
