In [10]:
import requests, sys
from bs4 import BeautifulSoup
import re
import json
import pandas as pd
import numpy as np
import time
from lxml import html
from tqdm import tqdm
from datetime import datetime

In [15]:
def get_zillow_listings(pid, nIdx, url, file_dir):
    now = datetime.now()  # current date and time
    date_time = now.strftime("%m/%d/%Y, %H:%M:%S")

    out_file_bldg = file_dir + 'zillow_listings_dtl_out_blgds_1.csv'
    out_file_othr = file_dir + 'zillow_listings_dtl_out_othrs_1.csv'

    req_headers = {
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Connection': 'close',
        'Host': 'www.zillow.com',
        'Accept': 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.8',
        'upgrade-insecure-requests': '1',
        'Cache-Control': 'max-age=0',
        'Cookie': 'AWSALB=update_your_cookie_here',
        'user-agent': 'Mozilla/2.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    }
    req_headers['Referer'] = url
    # print(response.status_code)

    try:
        response = requests.get(url, headers=req_headers)
        # print(response.status_code)
        resp_code = response.status_code
        resp_text = response.text

        if 'name="robots"' in resp_text:
            print(f"[DEBUG] CAPTCHA")
            sys.exit()

        soup = BeautifulSoup(resp_text, 'html.parser')
        # print(soup.title)

        script = soup.find('script', {'id': '__NEXT_DATA__',
                                      'type': 'application/json'}).text
        dict_1 = json.loads(script)
        sPage = dict_1['page'][1:]  # get rid of the leading "/"

        if sPage == 'building':
            # Get Listing Details for buildings
            get_lstng_dtl_bldng(pid, nIdx, url, out_file_bldg, dict_1, date_time)
        else:
            # Get Listing Details for others
            get_lstng_dtl_other(pid, nIdx, url, out_file_othr, dict_1, date_time)

    except Exception as e:
        print(f"[FAILURE] [{date_time}] PID# {nIdx}: {url} [ERROR] {e}")
        return

    # debug print
    # for k in dctPdtl: print("{0:25} {1}".format(k, dctPdtl[k]))

def get_lstng_dtl_bldng(pid, nIdx, url, out_file, dict_1, date_time):
    dctPdtl = {}
    dctPdtl['pid'] = pid

    dict_2 = dict_1['props']['pageProps']['initialData']['building']

    lstKeys = ['zpid', '__typename', 'buildingType', 'streetAddress', 'latitude', 'longitude', 'city', 'state',
               'zipcode',
               'county', 'buildingName', 'buildingPhoneNumber', 'buildingAttributes', 'amenitySummary', 'homeTypes']

    for l in lstKeys:
        if l in dict_2.keys():
            dctPdtl[l] = dict_2[l]
        else:
            dctPdtl[l] = ''

    lstUnits = dict_2['ungroupedUnits']

    # lstUnitPID, lstUnitNum, lstUnitURL, lstUnitTyp, lstUnitLstTyp = [], [], [], [], []
    lstUnitPID, lstUnitURL = [], []

    for u in lstUnits:
        lstUnitPID.append(u['zpid'])
        # lstUnitNum.append(u['unitNumber'])
        lstUnitURL.append(u['hdpUrl'])
        # lstUnitTyp.append(u['__typename'])
        # lstUnitLstTyp.append(u['listingType'])

    dctPdtl['unit_pid'] = lstUnitPID
    # dctPdtl['unit_num'] = lstUnitNum
    dctPdtl['unit_url'] = lstUnitURL
    # dctPdtl['unit_typ'] = lstUnitTyp
    # dctPdtl['unit_lst_typ'] = lstUnitLstTyp

    row = list(dctPdtl.values())
    df = pd.DataFrame(row).T
    df.to_csv(out_file, mode='a', index=False, header=False)

    print(f"[SUCCESS] [{date_time}] PID# {nIdx}: {url}")


def get_lstng_dtl_other(pid, nIdx, url, out_file, dict_1, date_time):
    # Dictionary to hold all the fields to be extracted for this pid.
    dctPdtl = {}
    dctPdtl['pid'] = pid

    s = dict_1['props']['pageProps']['gdpClientCache']
    dict_2 = json.loads(s)
    key_dict_2 = list(dict_2.keys())[0]
    dict_3 = dict_2[key_dict_2]
    dict_4 = dict_3['property']

    lstKeys = \
        ['zpid', 'homeType', 'streetAddress', 'city', 'state', 'zipcode', 'latitude', 'longitude', 'county', 'country',
         'parcelId', 'bedrooms', 'bathrooms', 'zestimate', 'rentZestimate', 'yearBuilt', 'livingArea',
         'livingAreaValue',
         'livingAreaUnitsShort', 'lotSize', 'lotAreaValue', 'lotAreaUnits', 'currency', 'taxAssessedValue',
         'taxAssessedYear',
         'monthlyHoaFee', 'propertyTaxRate', 'lastSoldPrice', 'dateSoldString', 'parentRegion', 'neighborhoodRegion',
         'building', 'boroughId', 'providerListingID', 'hdpUrl']

    for l in lstKeys:
        if l in dict_4.keys():
            dctPdtl[l] = dict_4[l]
        else:
            dctPdtl[l] = ''

    lstKeys = \
        ['appliances', 'heating', 'cooling', 'fireplaceFeatures', 'fireplaces', 'flooring', 'levels', 'stories',
         'storiesTotal', 'ownershipType', 'parkingCapacity', 'parkingFeatures', 'otherParking', 'roofType', 'rooms',
         'propertyCondition', 'constructionMaterials', 'exteriorFeatures', 'architecturalStyle', 'waterView',
         'waterViewYN', 'windowFeatures', 'hasAdditionalParcels', 'hasPetsAllowed', 'hasRentControl', 'hasHomeWarranty',
         'isNewConstruction', 'hasAssociation', 'hasAttachedGarage', 'hasAttachedProperty', 'hasCooling', 'hasCarport',
         'hasElectricOnProperty', 'hasFireplace', 'hasGarage', 'hasHeating', 'hasLandLease', 'hasOpenParking',
         'hasSpa', 'hasPrivatePool', 'hasView', 'hasWaterfrontView', 'elementarySchool', 'elementarySchoolDistrict']

    dict_5 = dict_4['resoFacts']

    for l in lstKeys:
        if l in dict_5.keys():
            dctPdtl[l] = dict_5[l]
        else:
            dctPdtl[l] = ''

    row = list(dctPdtl.values())
    df = pd.DataFrame(row).T
    df.to_csv(out_file, mode='a', index=False, header=False)

    print(f"[SUCCESS] [{date_time}] PID# {nIdx}: {url}")

In [16]:
# main call

if __name__ == "__main__":

    inp_dir, out_dir = '../data_inp/', '../data_out/'
    inp_file = inp_dir + 'zillow_listings_url_inp_2.csv'
    #out_file = out_dir + 'zillow_listings_dtl_out.csv'
    nSleep=5

    df = pd.read_csv(inp_file)
    dct = df.to_dict('index')
    #lstKeys = df.to_dict('index').keys()

    for nIdx in dct.keys():
        #nURLIdx += 1
        pid = dct[nIdx]['property_id']
        url = dct[nIdx]['url']

        get_zillow_listings(pid, nIdx, url, out_dir)

        # nSleep += 2
        # if nSleep>=60: nSleep=10 #Reset sleep time back to 10seconds
        time.sleep(nSleep)

[SUCCESS] [05/01/2023, 22:41:07] PID# 0: https://www.zillow.com/homedetails/421-Angela-St-Key-West-FL-33040/306089930_zpid/
[SUCCESS] [05/01/2023, 22:41:13] PID# 1: https://www.zillow.com/homedetails/325-E-Gulf-Beach-Dr-Eastpoint-FL-32328/44756541_zpid/
[SUCCESS] [05/01/2023, 22:41:19] PID# 2: https://www.zillow.com/homedetails/1125-Seasons-Blvd-Kissimmee-FL-34746/69319229_zpid/
[SUCCESS] [05/01/2023, 22:41:24] PID# 3: https://www.zillow.com/homedetails/4295-Beachside-Two-Dr-Miramar-Beach-FL-32550/2138231598_zpid/
[SUCCESS] [05/01/2023, 22:41:30] PID# 4: https://www.zillow.com/homedetails/6800-Sunset-Way-APT-404-Saint-Pete-Beach-FL-33706/47287895_zpid/
[SUCCESS] [05/01/2023, 22:41:36] PID# 5: https://www.zillow.com/homedetails/16787-Perdido-Key-Dr-APT-A301-Pensacola-FL-32507/44703963_zpid/
[SUCCESS] [05/01/2023, 22:41:42] PID# 6: https://www.zillow.com/homedetails/3000-Gulf-Dr-APT-1-Holmes-Beach-FL-34217/45790038_zpid/
[SUCCESS] [05/01/2023, 22:41:48] PID# 7: https://www.zillow.com/hom

[FAILURE] [05/01/2023, 22:46:03] PID# 52: https://www.zillow.com/homedetails/200-Henderson-Resort-Way-Destin-FL-32541/245168909_zpid/ [ERROR] [Errno 13] Permission denied: '../data_out/zillow_listings_dtl_out_othrs_1.csv'
[FAILURE] [05/01/2023, 22:46:09] PID# 53: https://www.zillow.com/homedetails/200-Henderson-Resort-Way-Destin-FL-32541/245168909_zpid/ [ERROR] [Errno 13] Permission denied: '../data_out/zillow_listings_dtl_out_othrs_1.csv'
[FAILURE] [05/01/2023, 22:46:14] PID# 54: https://www.zillow.com/homedetails/16551-Perdido-Key-Dr-UNIT-302-Perdido-Key-FL-32507/80764633_zpid/ [ERROR] [Errno 13] Permission denied: '../data_out/zillow_listings_dtl_out_othrs_1.csv'
[FAILURE] [05/01/2023, 22:46:20] PID# 55: https://www.zillow.com/homedetails/9900-Thomas-Dr-UNIT-1331-Panama-City-Beach-FL-32408/87630299_zpid/ [ERROR] [Errno 13] Permission denied: '../data_out/zillow_listings_dtl_out_othrs_1.csv'
[FAILURE] [05/01/2023, 22:46:26] PID# 56: https://www.zillow.com/b/23223-front-beach-rd-panam

[FAILURE] [05/01/2023, 22:51:29] PID# 109: https://www.zillow.com/homedetails/11600-1st-Ave-Marathon-FL-33050/2141552682_zpid/ [ERROR] 'NoneType' object has no attribute 'text'
[SUCCESS] [05/01/2023, 22:51:35] PID# 110: https://www.zillow.com/homedetails/2811-Atlantic-Ave-UNIT-202-Fernandina-Beach-FL-32034/82226886_zpid/
[SUCCESS] [05/01/2023, 22:51:41] PID# 111: https://www.zillow.com/homedetails/1246-Fulgur-St-UNIT-406-Sanibel-FL-33957/45527978_zpid/
[SUCCESS] [05/01/2023, 22:51:47] PID# 112: https://www.zillow.com/homedetails/4816-Cayview-Ave-Orlando-FL-32819/84648371_zpid/
[SUCCESS] [05/01/2023, 22:51:52] PID# 113: https://www.zillow.com/homedetails/5036-Shoreway-Loop-Orlando-FL-32819/84657337_zpid/
[SUCCESS] [05/01/2023, 22:51:58] PID# 114: https://www.zillow.com/homedetails/15625-Front-Beach-Rd-UNIT-1905-Panama-City-Beach-FL-32413/81787128_zpid/
[SUCCESS] [05/01/2023, 22:52:03] PID# 115: https://www.zillow.com/homedetails/111-Sycamore-Ave-Anna-Maria-FL-34216/45784976_zpid/
[SUCCE

[SUCCESS] [05/01/2023, 22:57:15] PID# 169: https://www.zillow.com/homedetails/5830-Midnight-Pass-Rd-74-Sarasota-FL-34242/47519591_zpid/
[SUCCESS] [05/01/2023, 22:57:21] PID# 170: https://www.zillow.com/homedetails/1160-Scenic-Gulf-Dr-A408-Miramar-Beach-FL-32550/55743886_zpid/
[FAILURE] [05/01/2023, 22:57:27] PID# 171: https://www.zillow.com/homedetails/5043-Alta-Vista-Ave-Saint-Augustine-FL-32080/47771495_zpid/ [ERROR] 'NoneType' object has no attribute 'text'
[SUCCESS] [05/01/2023, 22:57:32] PID# 172: https://www.zillow.com/homedetails/2996-Scenic-Highway-98-E-702-Destin-FL-32541/45972751_zpid/
[SUCCESS] [05/01/2023, 22:57:38] PID# 173: https://www.zillow.com/homedetails/356-Rookery-Ct-Marco-Island-FL-34145/43774466_zpid/
[SUCCESS] [05/01/2023, 22:57:44] PID# 174: https://www.zillow.com/homedetails/5075-Tideview-Cir-Orlando-FL-32819/84648005_zpid/
[SUCCESS] [05/01/2023, 22:57:49] PID# 175: https://www.zillow.com/homedetails/4804-Cayview-Ave-308-Orlando-FL-32819/2096177205_zpid/
[SUCCE

[SUCCESS] [05/01/2023, 23:03:01] PID# 230: https://www.zillow.com/homedetails/87-Plimsoll-Way-Santa-Rosa-Beach-FL-32459/121168772_zpid/
[SUCCESS] [05/01/2023, 23:03:06] PID# 231: https://www.zillow.com/homedetails/19-Crystal-Ct-Santa-Rosa-Beach-FL-32459/48121568_zpid/
[SUCCESS] [05/01/2023, 23:03:13] PID# 232: https://www.zillow.com/homedetails/69-Pointe-Dr-Santa-Rosa-Beach-FL-32459/48113092_zpid/
[SUCCESS] [05/01/2023, 23:03:18] PID# 233: https://www.zillow.com/homedetails/631-Nerita-St-5A-Sanibel-FL-33957/2096091183_zpid/
[SUCCESS] [05/01/2023, 23:03:24] PID# 234: https://www.zillow.com/homedetails/627-Nerita-St-UNIT-B-Sanibel-FL-33957/45527853_zpid/
[SUCCESS] [05/01/2023, 23:03:29] PID# 235: https://www.zillow.com/b/631-nerita-st-sanibel-fl-5XwDn9/
[SUCCESS] [05/01/2023, 23:03:35] PID# 236: https://www.zillow.com/homedetails/8247-Estero-Blvd-Fort-Myers-Beach-FL-33931/66146909_zpid/
[SUCCESS] [05/01/2023, 23:03:41] PID# 237: https://www.zillow.com/homedetails/7298-A1a-S-Saint-Augusti

[SUCCESS] [05/01/2023, 23:08:48] PID# 291: https://www.zillow.com/homedetails/1332-N-Fletcher-Ave-Fernandina-Beach-FL-32034/45935394_zpid/
[SUCCESS] [05/01/2023, 23:08:54] PID# 292: https://www.zillow.com/homedetails/8501-Gulf-Blvd-UNIT-15F-Navarre-Beach-FL-32566/90165404_zpid/
[SUCCESS] [05/01/2023, 23:08:59] PID# 293: https://www.zillow.com/homedetails/Westwinds-Dr-Miramar-Beach-FL-32550/251145763_zpid/
[SUCCESS] [05/01/2023, 23:09:05] PID# 294: https://www.zillow.com/homedetails/10513-Front-Beach-Rd-UNIT-306-Panama-City-Beach-FL-32407/42800150_zpid/
[SUCCESS] [05/01/2023, 23:09:11] PID# 295: https://www.zillow.com/homedetails/2800-Estero-Blvd-APT-302-Fort-Myers-Beach-FL-33931/45528699_zpid/
[SUCCESS] [05/01/2023, 23:09:17] PID# 296: https://www.zillow.com/homedetails/9815-Us-Highway-98-W-A1700-Miramar-Beach-FL-32550/48125404_zpid/
[SUCCESS] [05/01/2023, 23:09:22] PID# 297: https://www.zillow.com/homedetails/10811-Front-Beach-Rd-UNIT-407-Panama-City-Beach-FL-32407/70734408_zpid/
[FAI

[SUCCESS] [05/01/2023, 23:14:28] PID# 351: https://www.zillow.com/homedetails/5142-Adelaide-Dr-Kissimmee-FL-34746/131681550_zpid/
[SUCCESS] [05/01/2023, 23:14:34] PID# 352: https://www.zillow.com/homedetails/850-A1a-Beach-Blvd-UNIT-11-Saint-Augustine-FL-32080/47767722_zpid/
[SUCCESS] [05/01/2023, 23:14:40] PID# 353: https://www.zillow.com/homedetails/900-Gulf-Blvd-APT-108-Indian-Rocks-Beach-FL-33785/47056951_zpid/
[SUCCESS] [05/01/2023, 23:14:45] PID# 354: https://www.zillow.com/homedetails/100-Matthew-Blvd-Destin-FL-32541/195124855_zpid/
[SUCCESS] [05/01/2023, 23:14:51] PID# 355: https://www.zillow.com/homedetails/8008-King-Palm-Cir-Kissimmee-FL-34747/53801859_zpid/
[SUCCESS] [05/01/2023, 23:14:56] PID# 356: https://www.zillow.com/homedetails/21014-Front-Beach-Rd-B-Panama-City-Beach-FL-32413/251474657_zpid/
[SUCCESS] [05/01/2023, 23:15:02] PID# 357: https://www.zillow.com/homedetails/1800-Atlantic-Blvd-Key-West-FL-33040/2141499332_zpid/
[SUCCESS] [05/01/2023, 23:15:08] PID# 358: https

[SUCCESS] [05/01/2023, 23:20:15] PID# 412: https://www.zillow.com/homedetails/8300-Estero-Blvd-APT-303-Fort-Myers-Beach-FL-33931/45387976_zpid/
[SUCCESS] [05/01/2023, 23:20:20] PID# 413: https://www.zillow.com/homedetails/7712-Comrow-St-Kissimmee-FL-34747/66191158_zpid/
[SUCCESS] [05/01/2023, 23:20:26] PID# 414: https://www.zillow.com/homedetails/13999-Gulf-Blvd-401-Madeira-Beach-FL-33708/81409708_zpid/
[SUCCESS] [05/01/2023, 23:20:31] PID# 415: https://www.zillow.com/homedetails/381-Santa-Rosa-Blvd-UNIT-C306-Fort-Walton-Beach-FL-32548/45974756_zpid/
[SUCCESS] [05/01/2023, 23:20:38] PID# 416: https://www.zillow.com/homedetails/1048-Highway-98-E-UNIT-1404-Destin-FL-32541/63839861_zpid/
[SUCCESS] [05/01/2023, 23:20:43] PID# 417: https://www.zillow.com/homedetails/15300-Emerald-Coast-Pkwy-UNIT-1404-Destin-FL-32541/52954770_zpid/
