<a href="https://colab.research.google.com/github/abishekraja018/SDC-GENAI/blob/main/DATASETS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ Step 1: Install required libraries
!pip install scikit-learn pandas matplotlib seaborn --quiet

# ✅ Step 2: Import libraries
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# ✅ Step 3: Load California Housing dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df["Price"] = data.target

# ✅ Step 4: Convert continuous price into categories
# 0 = Low (<1.5), 1 = Medium (1.5–3), 2 = High (>3)
df["PriceCategory"] = pd.cut(df["Price"], bins=[0, 1.5, 3, df["Price"].max()],
                             labels=[0, 1, 2]).astype(int)

# ✅ Step 5: Select features and labels
features = ["MedInc", "AveRooms", "AveOccup"]
X = df[features]
y = df["PriceCategory"]

# ✅ Step 6: Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ✅ Step 7: Train logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=300)
model.fit(X_train_scaled, y_train)

# ✅ Step 8: Evaluate
y_pred = model.predict(X_test_scaled)
print("\n✅ Classification Report:\n")
print(classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.2f}")

# ✅ Step 9: Predict with user input
print("\n🏡 Predict house price category based on your inputs:")
income = float(input("Enter median income (e.g., 4.5): "))
rooms = float(input("Enter average rooms (e.g., 5.0): "))
occupancy = float(input("Enter average occupancy (e.g., 3.0): "))

user_input = scaler.transform([[income, rooms, occupancy]])
prediction = model.predict(user_input)[0]

labels = {0: "Low (< $150k)", 1: "Medium ($150k–$300k)", 2: "High (> $300k)"}
print(f"\n💰 Predicted Price Category: {labels[prediction]}")




✅ Classification Report:

              precision    recall  f1-score   support

           0       0.70      0.72      0.71      1516
           1       0.62      0.72      0.67      1861
           2       0.78      0.43      0.55       751

    accuracy                           0.67      4128
   macro avg       0.70      0.62      0.64      4128
weighted avg       0.68      0.67      0.66      4128

✅ Accuracy: 0.67

🏡 Predict house price category based on your inputs:
Enter median income (e.g., 4.5): 4
Enter average rooms (e.g., 5.0): 5
Enter average occupancy (e.g., 3.0): 3

💰 Predicted Price Category: Medium ($150k–$300k)


