In [None]:
import random
import pandas as pd

# Set a random seed for reproducibility
random.seed(42)

# Create an empty DataFrame to store the data
data = pd.DataFrame(columns=["Size (sq. ft.)", "Bedrooms", "Bathrooms", "Age (years)", 
                             "Distance to School (miles)", "Distance to Public Transportation (miles)",
                             "Crime Rate", "Selling Price ($)"])

# Generate 1000 random examples
for _ in range(1000):
    size = random.randint(1000, 3500)  # Size between 1000 and 3500 sq. ft.
    bedrooms = random.randint(2, 5)   # Number of bedrooms (2 to 5)
    bathrooms = random.uniform(1.0, 4.0)  # Number of bathrooms (1.0 to 4.0)
    age = random.randint(1, 30)        # Age of the house (1 to 30 years)
    school_distance = random.uniform(0.1, 2.0)  # Distance to school (0.1 to 2.0 miles)
    transport_distance = random.uniform(0.1, 1.5)  # Distance to public transport (0.1 to 1.5 miles)
    crime_rate = random.uniform(0.01, 0.05)  # Crime rate (0.01 to 0.05)
    
    # Generate a hypothetical selling price based on the features (you can adjust this)
    # This is a simple linear combination of features with some random noise.
    price = 100000 + 150 * size + 30000 * bedrooms + 25000 * bathrooms - 500 * age - 20000 * school_distance - 15000 * transport_distance - 50000 * crime_rate + random.randint(-20000, 20000)
    
    data = data.append({
        "Size (sq. ft.)": size,
        "Bedrooms": bedrooms,
        "Bathrooms": bathrooms,
        "Age (years)": age,
        "Distance to School (miles)": school_distance,
        "Distance to Public Transportation (miles)": transport_distance,
        "Crime Rate": crime_rate,
        "Selling Price ($)": price
    }, ignore_index=True)

# Save the generated dataset to a CSV file
data.to_csv("house_price_dataset.csv", index=False)
