<a href="https://colab.research.google.com/github/Vishak05/SDC-GenAI/blob/main/HPP_using_SyntheticData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 1: Generate synthetic data for housing prices
X, y = make_regression(n_samples=1000, n_features=5, noise=0.1)

# Convert to a pandas DataFrame
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
df['Price'] = y

# Step 2: Convert housing prices into a binary classification problem
# Let's set a threshold to classify houses as 'high price' (1) or 'low price' (0)
threshold = df['Price'].median()
df['Price_Class'] = (df['Price'] > threshold).astype(int)

# Step 3: Split the data into training and testing sets
X = df.drop(['Price', 'Price_Class'], axis=1)
y = df['Price_Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Predict on the test set
y_pred = model.predict(X_test)

# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output the results
print(f'Accuracy: {accuracy*100:.2f}%')
print('Confusion Matrix:')
print(conf_matrix)
