<a href="https://colab.research.google.com/github/farhanasif2003/Projects/blob/main/Simulated_House_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


def generate_custom_dummy_data(num_samples=150):
    np.random.seed(123)

    # Simulated feature columns
    sqft = np.random.randint(600, 5000, num_samples)
    location_score = np.random.uniform(0, 10, num_samples)  # 0 = worst, 10 = best location
    age_of_house = np.random.randint(0, 50, num_samples)    # in years
    near_school = np.random.randint(0, 2, num_samples)       # 0 = No, 1 = Yes
    num_balconies = np.random.randint(0, 4, num_samples)

    # Simulated price formula (purely made-up for this project)
    price = (
        sqft * 120
        + location_score * 10000
        - age_of_house * 500
        + near_school * 20000
        + num_balconies * 7000
        + np.random.randint(-15000, 15000, num_samples)  # Random noise
    )

    df = pd.DataFrame({
        "sqft": sqft,
        "location_score": location_score,
        "age_of_house": age_of_house,
        "near_school": near_school,
        "num_balconies": num_balconies,
        "price": price
    })

    return df


def run_model():
    df = generate_custom_dummy_data()
    print("Sample Dummy Data:\n", df.head())

    X = df.drop("price", axis=1)
    y = df["price"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    print("\n Model Evaluation:")
    print("Mean Squared Error:", round(mean_squared_error(y_test, predictions), 2))
    print("R2 Score:", round(r2_score(y_test, predictions), 4))

    # Demo prediction
    demo_input = np.array([[3000, 8.5, 5, 1, 2]])  # [sqft, location_score, age, near_school, balconies]
    estimated = model.predict(demo_input)[0]
    print(f"\n Predicted Price for test house: ${estimated:,.2f}")


if __name__ == "__main__":
    run_model()


Sample Dummy Data:
    sqft  location_score  age_of_house  near_school  num_balconies  \
0  4182        5.643590             1            0              1   
1  4054        8.069687            21            0              3   
2  1946        3.943701            28            1              0   
3  4660        7.310730             8            1              0   
4  2193        1.610690            40            1              1   

           price  
0  560551.904292  
1  574933.868414  
2  269953.005395  
3  653894.303584  
4  279101.901443  

 Model Evaluation:
Mean Squared Error: 71078542.4
R2 Score: 0.9971

 Predicted Price for test house: $475,935.81


