<a href="https://colab.research.google.com/github/barun-kumar-ghosh-95/PRODIGY_ML_01/blob/main/Task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
## We'll load the dataset and select only the features we care about.
# Load dataset
df = pd.read_csv("train.csv")

# Select important features
features = ['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath']
target = 'SalePrice'
house_df = df[features + [target]].copy()
house_df.head()



Unnamed: 0,GrLivArea,BedroomAbvGr,FullBath,HalfBath,SalePrice
0,1710,3,2,1,208500
1,1262,3,2,0,181500
2,1786,3,2,1,223500
3,1717,3,1,0,140000
4,2198,4,2,1,250000


In [4]:
### Handle Missing Data & Feature Engineering
print(f"Original shape: {house_df.shape}")
house_df.dropna(inplace=True)
print(f"Shape after dropping NA: {house_df.shape}")
# Create new feature
house_df['TotalBath'] = house_df['FullBath'] + 0.5 * house_df['HalfBath']
final_features = ['GrLivArea', 'BedroomAbvGr', 'TotalBath']

# Define features (X) and target (y)
X = house_df[final_features]
y = house_df[target]

house_df.head()


Original shape: (1460, 5)
Shape after dropping NA: (1460, 5)


Unnamed: 0,GrLivArea,BedroomAbvGr,FullBath,HalfBath,SalePrice,TotalBath
0,1710,3,2,1,208500,2.5
1,1262,3,2,0,181500,2.0
2,1786,3,2,1,223500,2.5
3,1717,3,1,0,140000,1.0
4,2198,4,2,1,250000,2.5


In [5]:
#### Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")


Training samples: 1168
Testing samples: 292


In [6]:
##### Train the Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Show coefficients
for feature, coef in zip(final_features, model.coef_):
    print(f"{feature}: {coef:.2f}")


GrLivArea: 100.64
BedroomAbvGr: -26645.53
TotalBath: 27083.21


In [7]:
 ###### Predictions & Model Evaluation
 # Predictions
y_pred = model.predict(X_test)

# Evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\n--- 📈 Model Performance ---")
print(f"RMSE: ${rmse:,.2f}")
print(f"R² Score: {r2:.4f}")
print("----------------------------")



--- 📈 Model Performance ---
RMSE: $53,371.56
R² Score: 0.6286
----------------------------
