In [3]:
import time
import pandas as pd
import zillow_functions as zl
from bs4 import BeautifulSoup

st = zl.zipcodes_list(st_items = ['32603', '32601','32602','32604','32605','32606','32607','32608'])

# Initialize the webdriver.
driver = zl.init_driver('/usr/local/bin/chromedriver')

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({'address' : [], 
                   'bathrooms' : [], 
                   'bedrooms' : [], 
                   'city' : [], 
                   'days_on_zillow' : [], 
                   'price' : [], 
                   'sale_type' : [], 
                   'state' : [], 
                   'sqft' : [], 
                   'url' : [], 
                   'zip' : []})

# Get total number of search terms.
num_search_terms = len(st)

# Start the scraping.
for k in range(num_search_terms):
    # Define search term (must be str object).
    search_term = st[k]

    # Enter search term and execute search.
    if zl.enter_search_term(driver, search_term):
        print("Entering search term number " + str(k+1) + 
              " out of " + str(num_search_terms))
    else:
        print("Search term " + str(k+1) + 
              " failed, moving onto next search term\n***")
        continue
    
    # Check to see if any results were returned from the search.
    # If there were none, move onto the next search.
    if zl.results_test(driver):
        print("Search " + str(search_term) + 
              " returned zero results. Moving onto the next search\n***")
        continue
    
    # Pull the html for each page of search results. Zillow caps results at 
    # 20 pages, each page can contain 26 home listings, thus the cap on home 
    # listings per search is 520.
    raw_data = zl.get_html(driver)
    print(str(len(raw_data)) + " pages of listings found")
    
    # Take the extracted HTML and split it up by individual home listings.
    listings = zl.get_listings(raw_data)
    
    # For each home listing, extract the 11 variables that will populate that 
    # specific observation within the output dataframe.
    for n in range(len(listings)):
        soup = BeautifulSoup(listings[n], "lxml")
        new_obs = []
        
        # List that contains number of beds, baths, and total sqft (and 
        # sometimes price as well).
        card_info = zl.get_card_info(soup)
        
        # Street Address
        new_obs.append(zl.get_street_address(soup))
        
        # Bathrooms
        new_obs.append(zl.get_bathrooms(card_info))
        
        # Bedrooms
        new_obs.append(zl.get_bedrooms(card_info))
        
        # City
        new_obs.append(zl.get_city(soup))
        
        # Days on the Market/Zillow
        new_obs.append(zl.get_days_on_market(soup))
        
        # Price
        new_obs.append(zl.get_price(soup, card_info))
        
        # Sale Type (House for Sale, New Construction, Foreclosure, etc.)
        new_obs.append(zl.get_sale_type(soup))
        
        # Sqft
        new_obs.append(zl.get_sqft(card_info))
        
        # State
        new_obs.append(zl.get_state(soup))
        
        # URL for each house listing
        new_obs.append(zl.get_url(soup))
        
        # Zipcode
        new_obs.append(zl.get_zipcode(soup))
    
        # Append new_obs to df as a new observation
        if len(new_obs) == len(df.columns):
            df.loc[len(df.index)] = new_obs

# Close the webdriver connection.
zl.close_connection(driver)

# Write df to CSV.
columns = ['address', 'city', 'state', 'zip', 'price', 'sqft', 'bedrooms', 
           'bathrooms', 'days_on_zillow', 'sale_type', 'url']
df = df[columns]
dt = time.strftime("%Y-%m-%d") + "_" + time.strftime("%H%M%S")
file_name = str(dt) + ".csv"
df.to_csv(file_name, index = False)

Entering search term number 1 out of 8
1 pages of listings found
7 home listings scraped
***
Entering search term number 2 out of 8
3 pages of listings found
73 home listings scraped
***
Entering search term number 3 out of 8
Search 32602 returned zero results. Moving onto the next search
***
Entering search term number 4 out of 8
Search 32604 returned zero results. Moving onto the next search
***
Entering search term number 5 out of 8
7 pages of listings found
171 home listings scraped
***
Entering search term number 6 out of 8
7 pages of listings found
168 home listings scraped
***
Entering search term number 7 out of 8
7 pages of listings found
166 home listings scraped
***
Entering search term number 8 out of 8
14 pages of listings found
344 home listings scraped
***
