# Housing Price Prediction with Linear Regression





In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("notebook", font_scale=1.2)

In [None]:
# Set random seed for reproducibility
np.random.seed(42)
n_samples = 100

# Generate synthetic house data
# Square footage between 1000 and 3000 sq ft
sq_footage = np.random.randint(1000, 3000, n_samples)

# Price is roughly $100-200 per sq ft with some noise
# This creates a linear relationship with noise
price = sq_footage * np.random.uniform(100, 200, n_samples) + np.random.normal(0, 50000, n_samples)

# Create a DataFrame
house_data = pd.DataFrame({
    'square_footage': sq_footage,
    'price': price
})

# Display the first few rows
house_data.head()

In [None]:
# Display basic statistics
house_data.describe()

In [None]:
# Create a scatter plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='square_footage', y='price', data=house_data)
plt.title('House Price vs Square Footage')
plt.xlabel('Square Footage')
plt.ylabel('Price ($)')
plt.show()

In [None]:
# Define features (X) and target variable (y)
X = house_data[['square_footage']]  # Features (in this case, just one feature)
y = house_data['price']             # Target variable

In [None]:
# Split data: 75% for training, 25% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")

In [None]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Generate predictions for both training and test sets
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

In [None]:
# Calculate evaluation metrics
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print("Model Performance:")
print(f"Training RMSE: ${train_rmse:.2f}")
print(f"Testing RMSE: ${test_rmse:.2f}")
print(f"Training R²: {train_r2:.4f}")
print(f"Testing R²: {test_r2:.4f}")
print(f"\nModel Coefficients:")
print(f"Intercept: ${model.intercept_:.2f}")
print(f"Coefficient (price per sq ft): ${model.coef_[0]:.2f}")

In [None]:
# Create a plot showing data points and regression line
plt.figure(figsize=(10, 6))
sns.scatterplot(x='square_footage', y='price', data=house_data, alpha=0.6)

# Create a range of x values for the line
x_range = np.linspace(X['square_footage'].min(), X['square_footage'].max(), 100).reshape(-1, 1)
y_range = model.predict(x_range)

# Plot the regression line
plt.plot(x_range, y_range, color='red', linewidth=2)
plt.title('Linear Regression: House Price vs Square Footage')
plt.xlabel('Square Footage')
plt.ylabel('Price ($)')
plt.show()

In [None]:
def predict_house_price(square_feet):
    """Predict house price based on square footage."""
    if square_feet < 500 or square_feet > 5000:
        return "Input outside reliable prediction range (500-5000 sq ft)"
    
    price = model.predict([[square_feet]])[0]
    return f"Estimated price for {square_feet} sq ft home: ${price:.2f}"

In [None]:
# Test the prediction function with various house sizes
print("Example Predictions:")
for size in [1000, 1500, 2000, 2500, 3000]:
    print(predict_house_price(size))

In [None]:
# Enter your own square footage to get a price prediction
your_sqft = 1800  # Change this value to your desired square footage
predict_house_price(your_sqft)