In [6]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Load Data
train_df = pd.read_csv("../data/train.csv")
test_df = pd.read_csv("../data/test.csv")
print("Training Data Preview:")
display(train_df.head())

# Step 3: Check for null values (optional)
print("\nMissing Values:\n", train_df.isnull().sum())

# Step 4: Select Features and Target
X_train = train_df[['square_feet', 'num_bedrooms', 'num_bathrooms']]
y_train = train_df['price']
X_test = test_df[['square_feet', 'num_bedrooms', 'num_bathrooms']]

# Step 5: Train Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Predict on Test Data
y_pred = model.predict(X_test)

# Step 7: Evaluate on Train Data
train_preds = model.predict(X_train)
mse = mean_squared_error(y_train, train_preds)
r2 = r2_score(y_train, train_preds)
print(f"\nModel Evaluation:\nMSE: {mse:.2f} \nR² Score: {r2:.2f}")

# Step 8: Save Predictions
submission_df = pd.DataFrame({
    'id': test_df['id'],
    'predicted_price': y_pred
})
submission_df.to_csv("../data/predictions.csv", index=False)
print("\n✅ Predictions saved to data/predictions.csv")


Training Data Preview:


Unnamed: 0,id,square_feet,num_bedrooms,num_bathrooms,price
0,1,1200,3,2,250000
1,2,800,2,1,180000
2,3,1500,4,3,320000
3,4,600,1,1,150000
4,5,1100,3,2,240000



Missing Values:
 id               0
square_feet      0
num_bedrooms     0
num_bathrooms    0
price            0
dtype: int64

Model Evaluation:
MSE: 5000000.00 
R² Score: 1.00

✅ Predictions saved to data/predictions.csv
