Centralize All Data Pertaining to a Zillow House Listing in One Master File

# Import Packages

In [43]:
date = '2024-10-09'

In [44]:
import os
import yaml
import datetime
import pandas as pd
import numpy as np

# Load the configuration file
with open(r'C:/Users/Dev/Documents/Real Estate Data/config/config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Directories

In [45]:
# Imported Data Directory
area_dir = config['data']['simplemaps_data']['master']
listing_dir = config['data']['zillow_data']['processed']['listings']['zillow_search_scraper']
census_dir = config['data']['census_data']['master']

# Cleaned Data Directory
master_dir = config['data']['zillow_data']['master']['listings']['zillow_search_scraper']

# Files

In [46]:
# Imported Data 
area = 'simplemaps_master_data.csv'
listings = f'{date}_zillow_house_listings.csv'
census = 'census_master_data.csv'

# Cleaned Data
master_file = f'{date}_zillow_listing_master.csv'

# Import Files

In [47]:
file_path = os.path.join(area_dir, area)
area = pd.read_csv(file_path, low_memory=False)

file_path = os.path.join(listing_dir, listings)
listings = pd.read_csv(file_path, low_memory=False)

file_path = os.path.join(census_dir, census)
census = pd.read_csv(file_path, low_memory=False)

# Add Area Data to listings dataframe

In [48]:
# Assuming both DataFrames have a 'zipcode' column to align on
listings = listings.merge(area[['zipcode', 'metro', 'county']], on='zipcode', how='left')
listings

Unnamed: 0,zipID,city,state,streetAddress,zipcode,latitude,longitude,beds,baths,homeType,...,builderName,newConstructionType,isPremierBuilder,is_newHome,flexFieldText,flexFieldType,url,webURL,metro,county
0,351078673,Huffman,TX,"1647 Plan, Woodland Lakes",77336.0,30.030027,95.112015,3.0,3.0,SINGLE_FAMILY,...,Cyrene Homes,BUILDER_PLAN,True,True,Lake breezes,homeInsight,https://www.zillow.com/community/woodland-lake...,"1647-Plan,-Woodland-Lakes-Huffman-TX-77336.0/3...",Houston-The Woodlands-Sugar Land,Harris
1,351078673,Huffman,TX,"1647 Plan, Woodland Lakes",77336.0,30.030027,95.112015,3.0,3.0,SINGLE_FAMILY,...,Cyrene Homes,BUILDER_PLAN,True,True,Lake breezes,homeInsight,https://www.zillow.com/community/woodland-lake...,"1647-Plan,-Woodland-Lakes-Huffman-TX-77336.0/3...",,Harris
2,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,3.0,1.0,SINGLE_FAMILY,...,,,False,,Showcase,showcase,https://www.zillow.com/homedetails/10407-Royal...,10407-Royal-Oaks-Dr-Houston-TX-77016.0/2802074...,Houston-The Woodlands-Sugar Land,Harris
3,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,3.0,1.0,SINGLE_FAMILY,...,,,False,,Showcase,showcase,https://www.zillow.com/homedetails/10407-Royal...,10407-Royal-Oaks-Dr-Houston-TX-77016.0/2802074...,,Harris
4,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,3.0,1.0,SINGLE_FAMILY,...,,,False,,Showcase,showcase,https://www.zillow.com/homedetails/10407-Royal...,10407-Royal-Oaks-Dr-Houston-TX-77016.0/2802074...,,Harris
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33836,2060332592,Houston,TX,2915 Newington Ln,77047.0,29.620047,95.376785,3.0,2.0,SINGLE_FAMILY,...,,,False,,Modern elegance,homeInsight,https://www.zillow.com/homedetails/2915-Newing...,2915-Newington-Ln-Houston-TX-77047.0/206033259...,,Harris
33837,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,2.0,2.0,SINGLE_FAMILY,...,,,False,,"Price cut: $10,000 (Oct 02)",priceCut,https://www.zillow.com/homedetails/11011-Carlt...,11011-Carlton-Dr-Houston-TX-77047.0/28094072_z...,Houston-The Woodlands-Sugar Land,Harris
33838,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,2.0,2.0,SINGLE_FAMILY,...,,,False,,"Price cut: $10,000 (Oct 02)",priceCut,https://www.zillow.com/homedetails/11011-Carlt...,11011-Carlton-Dr-Houston-TX-77047.0/28094072_z...,Houston-The Woodlands-Sugar Land,Brazoria
33839,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,2.0,2.0,SINGLE_FAMILY,...,,,False,,"Price cut: $10,000 (Oct 02)",priceCut,https://www.zillow.com/homedetails/11011-Carlt...,11011-Carlton-Dr-Houston-TX-77047.0/28094072_z...,,Harris


## Property Specific Calculations

In [49]:
# Calculate Price per Square Foot
# Calculate Price per Square Foot and round to 2 decimal places
listings['pricePerSqft'] = (listings['price'] / listings['sqft']).round(2)

# Calculate Beds to Baths Ratio, round to 1 decimal place
listings['bedToBathRatio'] = (np.where(listings['baths'] != 0, (listings['beds'] / listings['baths']).round(1), np.nan)).round(1)
#listings

# Add Census Data to listings dataframe

In [50]:
# Assuming both DataFrames have a 'zipcode' column to align on
listings = listings.merge(census[['zipcode', 'medianIncome']], on='zipcode', how='left')
#listings

In [51]:
# Individual Monetary Estiamtions

In [52]:
# Assume a 20% Down Payment of Listing Price
listings['downPayment'] = (listings['price']*.20).round(0)

In [53]:
# Reoder columns

In [54]:
column_order = [
    # Location Information
    'zipID',
    'city',
    'state',
    'streetAddress',
    'zipcode',
    'latitude',
    'longitude',
    'metro',
    'county',

    # Property Details
    'homeType',
    'beds',
    'baths',
    'sqft',
    'acreage',
    'newConstructionType',
    'is_newHome',

    # Pricing Information
    'price',
    'priceChange',
    'zestimate',
    'rentZestimate',
    'taxAssessedValue',
    'pricePerSqft',
    'bedToBathRatio',

    # Listing Information
    'listingType',
    'daysOnZillow',
    'statusText',
    'statusType',
    'isFeaturedListing',
    'isShowcaseListing',
    'brokerName',
    'builderName',
    'isPremierBuilder',
    'flexFieldText',
    'flexFieldType',

    # Date Information
    'datePriceChanged',

    # Monetary Estimations
    'downPayment',
    
    # Income Information
    'medianIncome',

]

# Reorder the DataFrame columns
listings = listings[column_order]

In [55]:
listings

Unnamed: 0,zipID,city,state,streetAddress,zipcode,latitude,longitude,metro,county,homeType,...,isFeaturedListing,isShowcaseListing,brokerName,builderName,isPremierBuilder,flexFieldText,flexFieldType,datePriceChanged,downPayment,medianIncome
0,351078673,Huffman,TX,"1647 Plan, Woodland Lakes",77336.0,30.030027,95.112015,Houston-The Woodlands-Sugar Land,Harris,SINGLE_FAMILY,...,False,False,,Cyrene Homes,True,Lake breezes,homeInsight,,53798.0,82105
1,351078673,Huffman,TX,"1647 Plan, Woodland Lakes",77336.0,30.030027,95.112015,,Harris,SINGLE_FAMILY,...,False,False,,Cyrene Homes,True,Lake breezes,homeInsight,,53798.0,82105
2,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,Houston-The Woodlands-Sugar Land,Harris,SINGLE_FAMILY,...,False,True,Corcoran Prestige Realty,,False,Showcase,showcase,,24000.0,42186
3,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,,Harris,SINGLE_FAMILY,...,False,True,Corcoran Prestige Realty,,False,Showcase,showcase,,24000.0,42186
4,28020741,Houston,TX,10407 Royal Oaks Dr,77016.0,29.862747,95.302640,,Harris,SINGLE_FAMILY,...,False,True,Corcoran Prestige Realty,,False,Showcase,showcase,,24000.0,42186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33836,2060332592,Houston,TX,2915 Newington Ln,77047.0,29.620047,95.376785,,Harris,SINGLE_FAMILY,...,False,False,Collective Realty Co.,,False,Modern elegance,homeInsight,,69000.0,70666
33837,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,Houston-The Woodlands-Sugar Land,Harris,SINGLE_FAMILY,...,False,False,G. K. Coleman & Son Realty,,False,"Price cut: $10,000 (Oct 02)",priceCut,2024-10-02,36000.0,70666
33838,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,Houston-The Woodlands-Sugar Land,Brazoria,SINGLE_FAMILY,...,False,False,G. K. Coleman & Son Realty,,False,"Price cut: $10,000 (Oct 02)",priceCut,2024-10-02,36000.0,70666
33839,28094072,Houston,TX,11011 Carlton Dr,77047.0,29.639648,95.366730,,Harris,SINGLE_FAMILY,...,False,False,G. K. Coleman & Son Realty,,False,"Price cut: $10,000 (Oct 02)",priceCut,2024-10-02,36000.0,70666


# Save File

In [56]:
# Define the file path
csv_file_path = os.path.join(master_dir, master_file)
listings.to_csv(csv_file_path, index=False)