<a href="https://colab.research.google.com/github/Vishak05/SDC-GenAI/blob/main/HousePricePrediction_LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import fetch_california_housing

# Load California Housing dataset
data = fetch_california_housing(as_frame=True)
df = data.frame

# Define threshold to classify houses as 'expensive' (1) or 'not expensive' (0)
threshold = df["MedHouseVal"].median()
df["Price_Class"] = (df["MedHouseVal"] > threshold).astype(int)

# Feature selection
features = ["MedInc", "HouseAge", "AveRooms", "AveBedrms"]
X = df[features]
y = df["Price_Class"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Logistic Regression
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(report)

# Predict on new house data
new_house = np.array([[3.0, 20, 6, 1]])
new_house_scaled = scaler.transform(new_house)
prediction = model.predict(new_house_scaled)[0]
print("Prediction for new house:", "Expensive" if prediction == 1 else "Not Expensive")
