<a href="https://colab.research.google.com/github/hannahph4m/bus4-118/blob/main/ML_Basics/housing_price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Data source: https://www.opendatabay.com/data/ai-ml/ea28ed1b-cc61-492c-94d7-24c61e977597?utm_source=chatgpt.com
data = pd.read_csv('housing_with_location.csv')
df = pd.DataFrame(data)
# Features and target
X = df[['square_footage', 'Location']]
y = df['price']
# Get all unique locations from the data
all_locations = df['Location'].unique()
# Preprocessing: One-hot encode the location column
preprocessor = ColumnTransformer(
transformers=[
('location', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), ['Location'])
], remainder='passthrough')
# Create pipeline with preprocessing and model
model = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', LinearRegression())
])
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Train model
model.fit(X_train, y_train)
# Make prediction for a new house: 2000 sq ft in Downtown
new_house = pd.DataFrame({'square_footage': [2000], 'Location': ['Downtown']})
predicted_price = model.predict(new_house)
print(f"Predicted price for a 2000 sq ft house in Downtown: ${predicted_price[0]:,.2f}")
# Display model coefficients
feature_names = (model.named_steps['preprocessor']
.named_transformers_['location']
.get_feature_names_out(['Location'])).tolist() + ['square_footage']
coefficients = model.named_steps['regressor'].coef_
print("\nModel Coefficients:")
for feature, coef in zip(feature_names, coefficients):
    print(f"{feature}: {coef:.2f}")

Predicted price for a 2000 sq ft house in Downtown: $6,990,217.92

Model Coefficients:
Location_Downtown: -185565.31
Location_Rural: -358686.80
Location_Suburb: 544252.11
square_footage: 129.46
