In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Create housing dataset
def create_housing_dataset():
    data = {
        'sqft': [1100, 1500, 1800, 1250, 2200, 1400, 3200, 1600, 1900, 2100],
        'bedrooms': [2, 3, 3, 2, 4, 3, 5, 3, 4, 4],
        'age': [15, 8, 12, 10, 3, 18, 2, 9, 5, 7],
        'price': [189000, 249000, 279000, 209000, 345000, 239000, 489000, 265000, 299000, 329000]
    }
    return pd.DataFrame(data)

# Create dataset
df = create_housing_dataset()

# Display the dataset
print("Housing Dataset:")
print(df)

# Visualize correlations
plt.figure(figsize=(10, 8))
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix of Housing Features')
plt.tight_layout()
plt.show()

print("\nCorrelation with price:")
print(correlation_matrix['price'].sort_values(ascending=False))

# Function to perform and visualize simple linear regression
def linear_regression_feature(feature):
    # Prepare data
    X = df[[feature]].values
    y = df['price'].values

    # Create and train model
    model = LinearRegression()
    model.fit(X, y)

    # Get model parameters
    slope = model.coef_[0]
    intercept = model.intercept_

    # Make predictions
    y_pred = model.predict(X)

    # Calculate R-squared
    r2 = r2_score(y, y_pred)

    # Test prediction on a new house
    if feature == 'sqft':
        new_value = 1800
    elif feature == 'bedrooms':
        new_value = 3
    else:  # age
        new_value = 10

    predicted_price = model.predict([[new_value]])[0]

    # Print results
    print(f"\n=== Linear Regression with {feature} ===")
    print(f"Equation: Price = {slope:.2f} × {feature} + {intercept:.2f}")
    print(f"R² score: {r2:.4f}")
    print(f"Predicted price for a house with {feature}={new_value}: ${predicted_price:.2f}")

    # Visualize
    plt.figure(figsize=(10, 6))

    # Plot data points
    plt.scatter(X, y, color='blue', label='Houses')

    # Plot regression line
    plt.plot(X, y_pred, color='red', linewidth=2, label='Regression line')

    # Plot test point
    plt.scatter(new_value, predicted_price, color='green', s=100,
                marker='*', label=f'New House ({feature}={new_value})')

    # Add equation and R² to plot
    equation = f'Price = {slope:.2f} × {feature} + {intercept:.2f}'
    r2_text = f'R² = {r2:.4f}'
    plt.annotate(equation + '\n' + r2_text,
                 xy=(0.05, 0.95),
                 xycoords='axes fraction',
                 bbox=dict(boxstyle="round,pad=0.5", fc="yellow", alpha=0.5))

    # Labels and title
    plt.xlabel(feature, fontsize=12)
    plt.ylabel('House Price ($)', fontsize=12)
    plt.title(f'House Price vs {feature}', fontsize=14)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

    return model, slope, intercept, r2, predicted_price

# Perform regression for each feature
for feature in ['sqft', 'bedrooms', 'age']:
    linear_regression_feature(feature)

# Multiple linear regression with all features
X_multi = df[['sqft', 'bedrooms', 'age']].values
y_multi = df['price'].values

multi_model = LinearRegression()
multi_model.fit(X_multi, y_multi)

y_multi_pred = multi_model.predict(X_multi)
multi_r2 = r2_score(y_multi, y_multi_pred)

print("\n=== Multiple Linear Regression with all features ===")
print(f"Coefficients: sqft={multi_model.coef_[0]:.2f}, bedrooms={multi_model.coef_[1]:.2f}, age={multi_model.coef_[2]:.2f}")
print(f"Intercept: {multi_model.intercept_:.2f}")
print(f"R² score: {multi_r2:.4f}")

# Predict price for a new house with multiple features
new_house = np.array([[1800, 3, 10]])  # sqft, bedrooms, age
multi_predicted_price = multi_model.predict(new_house)[0]
print(f"Predicted price for a house with sqft=1800, bedrooms=3, age=10: ${multi_predicted_price:.2f}")

# Compare predicted vs actual prices
plt.figure(figsize=(10, 6))
plt.scatter(y_multi, y_multi_pred, color='blue')
plt.plot([min(y_multi), max(y_multi)], [min(y_multi), max(y_multi)],
         color='red', linestyle='--')
plt.xlabel('Actual Price', fontsize=12)
plt.ylabel('Predicted Price', fontsize=12)
plt.title('Multiple Linear Regression: Actual vs Predicted Prices', fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.show()