In [2]:
import pandas as pd
import numpy as np
from faker import Faker
import random

fake = Faker()

# houses table

In [3]:
state_city_dict = {
    'NY': ['New York', 'Buffalo', 'Rochester', 'Yonkers', 'Syracuse'],
    'NJ': ['Newark', 'Jersey City', 'Paterson', 'Elizabeth', 'Edison'],
    'CT': ['Bridgeport', 'New Haven', 'Stamford', 'Hartford', 'Waterbury']
}

def generate_city(state):
    return random.choice(state_city_dict[state])

def generate_zip_code(state):
    if state == 'NY':
        return fake.zipcode_in_state('NY')
    elif state == 'NJ':
        return fake.zipcode_in_state('NJ')
    elif state == 'CT':
        return fake.zipcode_in_state('CT')
    else:
        return fake.zipcode()

In [4]:
# Generate Houses
houses = []
for i in range(1, 1001):
    state = random.choice(['NY', 'NJ', 'CT'])
    city = generate_city(state)
    houses.append({
        'house_id': i,
        'address': fake.address(),
        'city': city,
        'state': state,
        'zip_code': generate_zip_code(state),
        'type': random.choice(['Single Family', 'Condo', 'Townhouse', 'Multi Family']),
        'bedrooms': random.randint(1, 5),
        'bathrooms': random.randint(1, 4),
        'sqft': random.randint(800, 5000),
        'price': round(random.uniform(100000, 2000000), 2),
        'status': random.choice(['Available', 'Rented', 'Sold', 'Pending'])
    })
houses_df = pd.DataFrame(houses)

houses_df['zip_code'] = houses_df['zip_code'].apply(lambda x: x.zfill(5))
houses_df['zip_code'] = houses_df['zip_code'].astype(str)
houses_df['address'] = houses_df['address'].apply(lambda x: x.split(',')[0])

In [5]:
houses_df.head()

Unnamed: 0,house_id,address,city,state,zip_code,type,bedrooms,bathrooms,sqft,price,status
0,1,05629 Johnson Meadow Suite 977\nLake Luistown,Syracuse,NY,13167,Single Family,1,1,905,1354116.86,Available
1,2,41410 Morgan Loaf Apt. 914\nHayesborough,Waterbury,CT,6039,Townhouse,5,2,1303,480734.92,Sold
2,3,271 Evans Island\nDaniellechester,Edison,NJ,8719,Multi Family,2,2,1399,281964.49,Available
3,4,72119 Jill Mount\nPort Christine,Jersey City,NJ,8405,Townhouse,5,4,1119,1828583.38,Pending
4,5,96490 Jennifer Prairie Suite 632\nTracyburgh,Rochester,NY,12342,Single Family,3,1,3017,458006.49,Pending


In [6]:
houses_df.to_csv('/Users/haoranw/Desktop/Summer 2024/houses.csv', index=False)

# house amenities table

In [7]:
amenities_list = [
    {'amenity_name': 'Swimming Pool', 'description': 'A luxurious swimming pool for relaxation and recreation.'},
    {'amenity_name': 'Garage', 'description': 'A spacious garage for secure vehicle parking and storage.'},
    {'amenity_name': 'Garden', 'description': 'A beautiful garden area with plants, flowers, and outdoor seating.'},
    {'amenity_name': 'Fireplace', 'description': 'A cozy fireplace to keep you warm during the winter months.'},
    {'amenity_name': 'Air Conditioning', 'description': 'Central air conditioning to keep the home cool during hot weather.'},
    {'amenity_name': 'Balcony', 'description': 'A private balcony with a view, perfect for enjoying fresh air.'},
    {'amenity_name': 'Basement', 'description': 'A large basement that can be used for storage or additional living space.'},
    {'amenity_name': 'Solar Panels', 'description': 'Eco-friendly solar panels that help reduce energy costs.'},
    {'amenity_name': 'Home Office', 'description': 'A dedicated home office space for remote work or study.'},
    {'amenity_name': 'Gym', 'description': 'A home gym equipped with various exercise machines and weights.'},
    {'amenity_name': 'Security System', 'description': 'An advanced security system to keep your home safe and secure.'},
    {'amenity_name': 'Smart Home Features', 'description': 'Smart home features including automated lighting and thermostats.'},
    {'amenity_name': 'Hardwood Floors', 'description': 'Elegant hardwood floors throughout the home.'},
    {'amenity_name': 'Walk-in Closet', 'description': 'A spacious walk-in closet with plenty of storage for clothing and accessories.'},
    {'amenity_name': 'Laundry Room', 'description': 'A convenient laundry room with washer and dryer.'}
]

# Generate the amenities table
amenities = [{'amenity_id': i+1, 'amenity_name': amenity['amenity_name'], 'description': amenity['description']} for i, amenity in enumerate(amenities_list)]
amenities_df = pd.DataFrame(amenities)

# Function to generate random amenities for a house
def generate_amenities(house_id, amenities_df):
    num_amenities = random.randint(1, 5)  # Each house has 1 to 5 amenities
    selected_amenities = amenities_df.sample(n=num_amenities)
    return [{'house_id': house_id, 'amenity_id': row['amenity_id'], 'amenity_name': row['amenity_name'], 'description': row['description']} for _, row in selected_amenities.iterrows()]

# Generate house amenities
house_amenities = []
for house_id in range(1, 1001):  # Assuming house IDs from 1 to 1000
    house_amenities.extend(generate_amenities(house_id, amenities_df))

house_amenities_df = pd.DataFrame(house_amenities)

In [8]:
house_amenities_df.head()

Unnamed: 0,house_id,amenity_id,amenity_name,description
0,1,14,Walk-in Closet,A spacious walk-in closet with plenty of stora...
1,1,10,Gym,A home gym equipped with various exercise mach...
2,2,12,Smart Home Features,Smart home features including automated lighti...
3,2,11,Security System,An advanced security system to keep your home ...
4,3,14,Walk-in Closet,A spacious walk-in closet with plenty of stora...


In [9]:
house_amenities_df.to_csv('/Users/haoranw/Desktop/Summer 2024/house_amenities.csv', index=False)

# house records table

In [10]:
def generate_house_records(house_id):
    usage_types = ['Sale', 'Commercial', 'Rental']
    start_date = fake.date_this_decade()
    end_date = fake.date_between(start_date=start_date, end_date='today')
    duration = (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days
    return {
        'house_id': house_id,
        'usage_type': random.choice(usage_types),
        'duration': f"{duration} days",
        'start_date': start_date,
        'end_date': end_date
    }

# Generate house records
house_records = []
for house_id in range(1, 1001):  # Assuming house IDs from 1 to 1000
    house_records.append(generate_house_records(house_id))

house_records_df = pd.DataFrame(house_records)

In [11]:
house_records_df.head()

Unnamed: 0,house_id,usage_type,duration,start_date,end_date
0,1,Rental,1213 days,2020-08-07,2023-12-03
1,2,Sale,573 days,2022-01-27,2023-08-23
2,3,Rental,620 days,2021-11-09,2023-07-22
3,4,Rental,368 days,2023-01-28,2024-01-31
4,5,Rental,847 days,2020-12-22,2023-04-18


In [12]:
house_records_df.to_csv('/Users/haoranw/Desktop/Summer 2024/house_records.csv', index=False)