# Zillow for a Zipcode


This tool allows users to enter a zipcode, and see the following stats on Zillow:
(1) Mean size in sqft of all properties
(2) Mean no. of bedrooms
(3) Mean price per sqft for houses on sale
(4) Mean price per bedroom for houses on sale
(5) Mean rent
(6) Rent vs buy

In [1]:
## Import beautiful soup to scrape data from Zillow
!pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup



In [2]:
## Import pandas and plotting tools
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
# Make a property dataframe for a zipcode with beautiful soup scraper
# The scraper uses a loop to loop through all pages containing the result
def zip_dataframe(zipcode):
    #zipcode = raw_input(" Type a zipcode ")
    property_list=[]
    count = 1
    url = "https://www.zillow.com/homes/"+zipcode +"_rb/"+ str(count) +'_p/'
    req = requests.get(url)
    soup = BeautifulSoup(req.content) #make a soup for each url
    prop_info = soup.find_all("div",{"class":"zsg-photo-card-caption"})

    while soup.find_all("li", {'class':'zsg-pagination-next'}):
        '''keep running the loop if there is a next bottom
        Save all items in the new page to the property_list'''
        for item in prop_info:
            for i in item.find_all("span"):
                if i.get('class')==['zsg-photo-card-status']:
                    try: 
                        status = i.text
                    except:
                        status = 'NA'               
                if i.get('class')==['zsg-photo-card-price']:
                    try:
                        price = float(i.text.replace("$","").replace("/mo","").replace(",","").replace("+",""))
                    except:
                        price = 0.0
                    #print price
                if i.get('class')==['zsg-photo-card-info']:
                    info = i.text.replace("Studio","0.5 bd")
                    try:
                        bed = float(info.split()[0]) 
                    except:
                        bed = 0.0
                    try:
                        bath = float(info.split()[3])
                    except:
                        bath = 0.0
                    try:
                        size = float(info.split()[6].replace("--","0").replace("sqft","0").replace(",","").replace("+",""))
                    except:
                        size = 0.0
                if i.get('class')==['zsg-photo-card-address']:
                    try:
                        address = i.text
                    except:
                        address = ''
            prop=[status, price, bed, bath, size, address]
            property_list.append(prop)
        
        #Make a new soup if the next page exists    
        count = count + 1
        url = "https://www.zillow.com/homes/"+zipcode +"_rb/"+ str(count) +'_p/'
        req = requests.get(url)
        soup = BeautifulSoup(req.content) #make a soup for each url
        prop_info = soup.find_all("div",{"class":"zsg-photo-card-caption"})
        
    #End the loop for multiple pages of search results, and make a dataframe   
    labels = ['status','price','bed','bath','size','address']
    zipcode_df = pd.DataFrame.from_records(property_list, columns = labels)
    return zipcode_df

#print ' There are ' + str(len(property_list)) + " properties listed in the zipcode of " + zipcode

In [4]:
zip_02420 = zip_dataframe('02420')



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [8]:
zip_02148 = zip_dataframe('02148')

In [9]:
zip_02148.head()

Unnamed: 0,status,price,bed,bath,size,address
0,Apartment For Rent,1400.0,0.5,1.0,500.0,"20 Washington Pl # ID, Malden, MA"
1,For Rent,1400.0,0.0,0.0,0.0,"2 Webster St, Malden, MA"
2,House For Sale,449900.0,3.0,4.0,1846.0,"12 Centennial Cir, Malden, MA"
3,Condo For Rent,1200.0,1.0,1.0,960.0,"260 Main St, Malden, MA"
4,Apartment For Rent,1300.0,1.0,1.0,0.0,"Fellsway E, Malden, MA"


In [10]:
zip_02420.head()

Unnamed: 0,status,price,bed,bath,size,address
0,Apartment For Rent,2200.0,1.0,1.0,800.0,"2 Opi Cir # 2B, Lexington, MA"
1,Apartment For Rent,4500.0,3.0,1.0,0.0,"Mass Ave, Lexington, MA"
2,House For Sale,1475000.0,6.0,5.0,4000.0,"22 Vine St, Lexington, MA"
3,Apartment For Rent,4000.0,3.0,2.0,1727.0,"38 Tyler Rd # 0, Lexington, MA"
4,House For Sale,2698000.0,6.0,8.0,7667.0,"71 Adams St, Lexington, MA"


In [11]:
zip_02420.status.value_counts()

House For Sale               18
Apartment For Rent           15
House For Rent                9
Condo For Rent                2
Make Me Move®                 1
Lot/Land For Sale             1
Pre-Foreclosure               1
Foreclosed                    1
Condo For Sale                1
For Sale by Owner             1
Townhouse For Rent            1
Pre-Foreclosure (Auction)     1
Name: status, dtype: int64

In [12]:
## Make a sub-dataframe for "for_sale" items in the same zipcode
def for_sale(dataframe): #enter the name of a dataframe
    df_for_sale = dataframe[dataframe.status == "House For Sale"]
    return df_for_sale
## Make a sub-dataframe for "for_rent" items in the same zipcode
def for_rent(dataframe): #enter the name of a dataframe
    df_for_rent = dataframe[dataframe.status == "House For Rent"]
    return df_for_rent

In [13]:
for_sale(zip_02148).head()

Unnamed: 0,status,price,bed,bath,size,address
2,House For Sale,449900.0,3.0,4.0,1846.0,"12 Centennial Cir, Malden, MA"
18,House For Sale,379900.0,3.0,2.0,1123.0,"23 Poplar St, Malden, MA"
64,House For Sale,675000.0,5.0,4.0,3081.0,"76 Tea Party Way, Malden, MA"
116,House For Sale,539900.0,5.0,2.0,1981.0,"45 Fairview Ter, Malden, MA"
141,House For Sale,399900.0,3.0,1.0,1339.0,"167 Bainbridge St, Malden, MA"


In [14]:
for_rent(zip_02420).head()

Unnamed: 0,status,price,bed,bath,size,address
14,House For Rent,4000.0,4.0,2.0,1800.0,"6 Rolfe Rd, Lexington, MA"
15,House For Rent,3200.0,5.0,2.5,2800.0,"223 Grove St, Lexington, MA"
16,House For Rent,4400.0,4.0,2.0,1900.0,"5 Lillian Rd, Lexington, MA"
19,House For Rent,2375.0,2.0,1.0,1500.0,"Haskell St, Lexington, MA"
24,House For Rent,4200.0,4.0,2.5,0.0,"99 Lillian Rd, Lexington, MA"


In [15]:
# Function one: mean size of houses on list
def mean_size(dataframe):
    mean_size = dataframe.describe()['size']['mean']
    return str(round(mean_size,2))
mean_size(zip_02148)

'911.61'

In [16]:
# Function two: mean no. of bedrooms for houses on list
def mean_bed(dataframe):
    mean_bed = dataframe.describe()['bed']['mean']
    return str(round(mean_bed,2))
print mean_bed(zip_02148)

1.83


In [17]:
# Function three: mean no. of bathrooms for houses on list
def mean_bath(dataframe):
    mean_bath = dataframe.describe()['bath']['mean']
    return str(round(mean_bath,2))
mean_bath(zip_02148)

'1.26'

In [18]:
# Function four: mean price per square feet [for House for Sale only]
def mean_price_per_sqft_SALE(dataframe):
    mean_price_p_sqft = for_sale(dataframe).describe()['price']['mean']/for_sale(dataframe).describe()['size']['mean']
    return str(round(mean_price_p_sqft,2))
mean_price_per_sqft_SALE(zip_02148)

'265.19'

In [19]:
# Function five: mean price per bedroom [for House for Sale only]
def mean_price_per_bd_SALE(dataframe):
    mean_price_p_bd = for_sale(dataframe).describe()['price']['mean']/for_sale(dataframe).describe()['bed']['mean']
    return str(round(mean_price_p_bd,2))
mean_price_per_bd_SALE(zip_02148)

'136243.88'

In [21]:
# Function six: mean rent 
def mean_rent(dataframe):
    mean_rent = float(for_rent(dataframe).describe()['price']['mean'])
    return str(round(mean_rent,5))
print mean_rent(zip_02148)

2148.57143


In [22]:
# Function seven: rent vs buy ratio
def rent_vs_buy(dataframe):
    mean_price_sale = int(for_sale(dataframe).describe()['price']['mean'])
    mean_price_rent = int(for_rent(dataframe).describe()['price']['mean'])
    rent_vs_buy = float(mean_price_rent)/float(mean_price_sale)
    return str(round(rent_vs_buy,5))
print rent_vs_buy(zip_02148)

0.00441


In [24]:
# Make a new dataframe to compare among zipcodes
zip1 = '02148'
zip2 = '02420'
rowname = ['mean_size','mean_bed','mean_price_per_sqft','mean_price_per_bd','mean_rent','rent_vs_buy']
zip_frame = pd.DataFrame(
                         {zip1: [mean_size(zip_02148),\
                            mean_bed(zip_02148),\
                            mean_price_per_sqft_SALE(zip_02148),\
                            mean_price_per_bd_SALE(zip_02148),\
                            mean_rent(zip_02148),\
                            rent_vs_buy(zip_02148)],\
                          zip2: [mean_size(zip_02420),\
                            mean_bed(zip_02420),\
                            mean_price_per_sqft_SALE(zip_02420),\
                            mean_price_per_bd_SALE(zip_02420),\
                            mean_rent(zip_02420),\
                            rent_vs_buy(zip_02420)]}
                        )
zip_frame.index = rowname
zip_frame

Unnamed: 0,02148,02420
mean_size,911.61,2583.12
mean_bed,1.83,3.63
mean_price_per_sqft,265.19,394.58
mean_price_per_bd,136243.88,373479.59
mean_rent,2148.57143,3850.0
rent_vs_buy,0.00441,0.00189
