# Prodigy ML Task 1 – Linear Regression

**Goal**: Predict `SalePrice` using **GrLivArea**, **BedroomAbvGr**, and **TotalBath**.

---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

%matplotlib inline
sns.set(style='whitegrid')

## 1. Load & Inspect Data

In [None]:
df = pd.read_csv('dataset.csv')
print('Shape:', df.shape)
df[['GrLivArea','BedroomAbvGr','FullBath','HalfBath','SalePrice']].head()

## 2. Feature Engineering

In [None]:
# Total bathrooms (full = 1, half = 0.5)
df['TotalBath'] = df['FullBath'] + 0.5 * df['HalfBath']

# Select features + target
features = ['GrLivArea', 'BedroomAbvGr', 'TotalBath']
X = df[features]
y = df['SalePrice']

## 3. Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f'Train: {X_train.shape[0]}, Test: {X_test.shape[0]}')

## 4. Train Linear Regression

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Coefficients
coeffs = pd.Series(model.coef_, index=features)
intercept = model.intercept_
print('Intercept:', intercept)
print('Coefficients:\n', coeffs)

## 5. Evaluation

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f'R²  : {r2:.4f}')
print(f'RMSE: ${rmse:,.0f}')
print(f'MAE : ${mae:,.0f}')

## 6. Visualizations

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18,5))

# 1. Actual vs Predicted
sns.scatterplot(x=y_test, y=y_pred, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
axes[0].set_xlabel('Actual Price')
axes[0].set_ylabel('Predicted Price')
axes[0].set_title('Actual vs Predicted')

# 2. Residuals
residuals = y_test - y_pred
sns.scatterplot(x=y_pred, y=residuals, ax=axes[1])
axes[1].axhline(0, color='r', linestyle='--')
axes[1].set_xlabel('Predicted Price')
axes[1].set_ylabel('Residual')
axes[1].set_title('Residual Plot')

# 3. Correlation heatmap
corr = df[features + ['SalePrice']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', ax=axes[2])
axes[2].set_title('Feature Correlations')

plt.tight_layout()
plt.savefig('model_plots.png', dpi=300, bbox_inches='tight')
plt.show()

---
**End of Notebook** – `model_plots.png` is saved in the repo.