<a href="https://colab.research.google.com/github/grasael/PPCIntern/blob/main/New_Real_Estate_Allegheny_County.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Zillow Property Search
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1afLUQVYLIhv0vDB-GNuYgxfZrJhTzSM5?usp=sharing)

## Overview
| Detail Tag            | Information                                                                                        |
|-----------------------|----------------------------------------------------------------------------------------------------|
| External References   | API |
| Input Datasets        | Source name |
| Output Datasets       | Source name |
| Input Data Source     | Pandas DataFrame |
| Output Data Source    | Pandas DataFrame |
Reference: https://www.youtube.com/watch?v=bcZe01LhdFc&t=132s&ab_channel=TechinRealEstate


## <font color="blue">Install Packages</font>

In [None]:
pip install usps-api


Collecting usps-api
  Downloading usps-api-0.5.tar.gz (5.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from usps-api)
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Building wheels for collected packages: usps-api
  Building wheel for usps-api (setup.py) ... [?25l[?25hdone
  Created wheel for usps-api: filename=usps_api-0.5-py3-none-any.whl size=5893 sha256=fc71a6a48544fa8c2b55e36ccded53c4d744962f724d0f81c1367bc62d244476
  Stored in directory: /root/.cache/pip/wheels/bb/99/fd/b92ad0410977916aa50fe09b8d5a0cc8a19be86445dbe07ba0
Successfully built usps-api
Installing collected packages: xmltodict, usps-api
Successfully installed usps-api-0.5 xmltodict-0.13.0


## <font color="blue">Imports</font>

In [None]:
from google.colab import output, drive, files # specific to Google Colab
import pandas as pd
import numpy as np
import plotly.express as px
import requests
import warnings
import re

# settings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

## <font color="blue">Functions</font>

In [None]:
def get_listings(api_key, listing_url):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/listing"

    querystring = {
        "api_key": api_key,
        "url":listing_url
    }

    return requests.request("GET", url, params=querystring)

def get_property_detail(api_key, zpid):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/property"

    querystring = {
        "api_key": api_key,
        "zpid":zpid
    }

    return requests.request("GET", url, params=querystring)

def get_zpid(api_key, street, city, state, zip_code=None):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/zpidByAddress"

    querystring = {
        "api_key": api_key,
        "street": street,
        "city": city,
        "state": state,
        "zip_code":zip_code
    }

    return requests.request("GET", url, params=querystring)

def split_address(full_address):
    abbreviations = {
        'Ave': 'Avenue',
        'Blvd': 'Boulevard',
        'Cir': 'Circle',
        'Ct': 'Court',
        'Dr': 'Drive',
        'Ln': 'Lane',
        'Pl': 'Place',
        'Rd': 'Road',
        'St': 'Street',
        'Sq': 'Square',
        'Ter': 'Terrace',
        'Trl': 'Trail',
        'Pkwy': 'Parkway',
        'Hwy': 'Highway'
    }

    address1 = ""
    address2 = ""
    for abbreviation in abbreviations:
        pattern = r'(?<=\b' + abbreviation + r'\b)(.*)'
        match = re.search(pattern, full_address)
        if match:
            address1 = full_address[:match.start()].strip()
            address2 = match.group(1).strip()
            break

    return address1, address2

## <font color="blue">Locals & Constants</font>

In [None]:
# # read in api key file
# df_api_keys = pd.read_csv(file_dir + "api_keys.csv")

# # get keys
api_key = "6482e290-cfb0-4c53-aad1-1d82cad30381"

## <font color="blue">Data</font>

### <font color="green">1. Retrieving Addresses </font>

In [None]:
# zillow search url
listing_url = "https://www.zillow.com/pittsburgh-pa-15213/sold/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%2215213%22%2C%22mapBounds%22%3A%7B%22west%22%3A-80.01516565124513%2C%22east%22%3A-79.92418512145997%2C%22south%22%3A40.412972189053114%2C%22north%22%3A40.46967273849898%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A63944%2C%22regionType%22%3A7%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22rs%22%3A%7B%22value%22%3Atrue%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22schu%22%3A%7B%22value%22%3Afalse%7D%2C%22sche%22%3A%7B%22value%22%3Afalse%7D%2C%22schm%22%3A%7B%22value%22%3Afalse%7D%2C%22schh%22%3A%7B%22value%22%3Afalse%7D%2C%22schp%22%3A%7B%22value%22%3Afalse%7D%2C%22schr%22%3A%7B%22value%22%3Afalse%7D%2C%22schc%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D"
# get listings
listing_response = get_listings(api_key, listing_url)

In [None]:
# view all keys
listing_response.json().keys()

dict_keys(['is_success', 'data', 'message'])

In [None]:
# check if request is successful
listing_response.json()["is_success"]

True

In [None]:
# view count of properies returned in request
num_of_properties = listing_response.json()["data"]["categoryTotals"]["cat1"]["totalResultCount"]
print("Count of properties:", num_of_properties)

Count of properties: 824


In [None]:
# view all listings
df_listings = pd.json_normalize(listing_response.json()["data"]["cat1"]["searchResults"]["mapResults"])
print("Number of rows:", len(df_listings))
print("Number of columns:", len(df_listings.columns))
df_listings

Number of rows: 460
Number of columns: 86


Unnamed: 0,zpid,price,priceLabel,beds,baths,area,statusType,statusText,isFavorite,isUserClaimingOwner,isUserConfirmedClaim,imgSrc,hasImage,visited,listingType,shouldShowZestimateAsPrice,detailUrl,pgapt,sgapt,has3DModel,hasVideo,isHomeRec,address,info6String,hasAdditionalAttributions,isFeaturedListing,isShowcaseListing,availabilityDate,timeOnZillow,latLong.latitude,latLong.longitude,variableData.type,variableData.text,hdpData.homeInfo.zpid,hdpData.homeInfo.streetAddress,hdpData.homeInfo.zipcode,hdpData.homeInfo.city,hdpData.homeInfo.state,hdpData.homeInfo.latitude,hdpData.homeInfo.longitude,hdpData.homeInfo.price,hdpData.homeInfo.dateSold,hdpData.homeInfo.bathrooms,hdpData.homeInfo.bedrooms,hdpData.homeInfo.livingArea,hdpData.homeInfo.homeType,hdpData.homeInfo.homeStatus,hdpData.homeInfo.daysOnZillow,hdpData.homeInfo.isFeatured,hdpData.homeInfo.shouldHighlight,hdpData.homeInfo.zestimate,hdpData.homeInfo.rentZestimate,hdpData.homeInfo.isUnmappable,hdpData.homeInfo.isPreforeclosureAuction,hdpData.homeInfo.homeStatusForHDP,hdpData.homeInfo.priceForHDP,hdpData.homeInfo.isNonOwnerOccupied,hdpData.homeInfo.isPremierBuilder,hdpData.homeInfo.isZillowOwned,hdpData.homeInfo.currency,hdpData.homeInfo.country,hdpData.homeInfo.taxAssessedValue,hdpData.homeInfo.lotAreaValue,hdpData.homeInfo.lotAreaUnit,hdpData.homeInfo.isShowcaseListing,buildingId,lotId,minBeds,minBaths,minArea,unitCount,isBuilding,badgeInfo,canSaveBuilding,hdpData.homeInfo.isRentalWithBasePrice,lotAreaString,streetViewMetadataURL,streetViewURL,hdpData.homeInfo.unit,hdpData.homeInfo.videoCount,hdpData.homeInfo.listing_sub_type.is_FSBA,hdpData.homeInfo.datePriceChanged,hdpData.homeInfo.priceReduction,hdpData.homeInfo.priceChange,hdpData.homeInfo.contingentListingType,plid
0,11407883,$1.00M,$1.00M,6.0,4.0,5255.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/c61dc6f88e0...,True,False,,False,/homedetails/4305-Bigelow-Blvd-Pittsburgh-PA-1...,RecentlySold,Unknown Listed By,False,False,False,"4305 Bigelow Blvd, Pittsburgh, PA 15213",Lauren Klein,True,False,False,,6.726636e+09,40.447483,-79.954360,RECENTLY_SOLD,Sold 07/10/2023,11407883.0,4305 Bigelow Blvd,15213,Pittsburgh,PA,40.447483,-79.954360,1000000.0,1.688972e+12,4.0,6.0,5255.0,SINGLE_FAMILY,RECENTLY_SOLD,-1.0,False,False,985200.0,6146.0,False,False,RECENTLY_SOLD,1000000.0,True,False,False,USD,USA,471000.0,9448.164,sqft,False,,,,,,,,,,,,,,,,,,,,,
1,11407340,"$675,000",$675K,4.0,5.0,2085.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/ad6fe457f55...,True,False,,False,/homedetails/4433-Schenley-Farms-Ter-Pittsburg...,RecentlySold,RecentChange,False,False,False,"4433 Schenley Farms Ter, Pittsburgh, PA 15213",Lisa Wagner,True,False,False,,7.653167e+09,40.451897,-79.954940,RECENTLY_SOLD,Sold 07/07/2023,11407340.0,4433 Schenley Farms Ter,15213,Pittsburgh,PA,40.451897,-79.954940,675000.0,1.688713e+12,5.0,4.0,2085.0,SINGLE_FAMILY,RECENTLY_SOLD,-1.0,False,False,665300.0,4338.0,False,False,RECENTLY_SOLD,675000.0,True,False,False,USD,USA,133400.0,6451.236,sqft,False,,,,,,,,,,,,,,,,,,,,,
2,11419828,"$290,000",$290K,3.0,2.0,1100.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/bb8ce132462...,True,False,,False,/homedetails/519-Cato-St-Pittsburgh-PA-15213/1...,RecentlySold,RecentChange,False,False,False,"519 Cato St, Pittsburgh, PA 15213",Anthony Sciulli,True,False,False,,4.126332e+09,40.433193,-79.955530,RECENTLY_SOLD,Sold 07/06/2023,11419828.0,519 Cato St,15213,Pittsburgh,PA,40.433193,-79.955530,290000.0,1.688627e+12,2.0,3.0,1100.0,SINGLE_FAMILY,RECENTLY_SOLD,-1.0,False,False,349200.0,1900.0,False,False,RECENTLY_SOLD,290000.0,True,False,False,USD,USA,76800.0,1912.284,sqft,False,,,,,,,,,,,,,,,,,,,,,
3,11420187,"$170,000",$170K,4.0,2.0,2152.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/60c4331a573...,True,False,,False,/homedetails/3204-Ward-St-Pittsburgh-PA-15213/...,RecentlySold,RecentChange,False,False,False,"3204 Ward St, Pittsburgh, PA 15213",Tina Nobers,True,False,False,,3.533543e+09,40.430786,-79.955055,RECENTLY_SOLD,Sold 07/03/2023,11420187.0,3204 Ward St,15213,Pittsburgh,PA,40.430786,-79.955055,170000.0,1.688368e+12,2.0,4.0,2152.0,SINGLE_FAMILY,RECENTLY_SOLD,-1.0,False,False,120600.0,2389.0,False,False,RECENTLY_SOLD,170000.0,True,False,False,USD,USA,113400.0,4199.184,sqft,False,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,SOLD,For Rent,False,,,https://photos.zillowstatic.com/fp/532db3efc7d...,True,,,,/b/5-bayard-rd-pittsburgh-pa-5XrWYn/,,,False,,False,"5 Bayard Rd, Pittsburgh, PA",,False,False,False,,6.546278e+09,40.452713,-79.942530,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40.452713--79.94253,1.002196e+09,1.0,1.0,709.0,68.0,True,,False,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,11420319,"$335,000",$335K,6.0,3.0,1960.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/77ac9fb2878...,True,False,,False,/homedetails/3225-Parkview-Ave-Pittsburgh-PA-1...,RecentlySold,Unknown Listed By,False,False,False,"3225 Parkview Ave, Pittsburgh, PA 15213",,False,False,False,,,40.430157,-79.953440,RECENTLY_SOLD,Sold 07/22/2020,11420319.0,3225 Parkview Ave,15213,Pittsburgh,PA,40.430157,-79.953440,335000.0,1.595401e+12,3.0,6.0,1960.0,SINGLE_FAMILY,RECENTLY_SOLD,-1.0,False,False,425500.0,2749.0,False,False,RECENTLY_SOLD,335000.0,True,False,False,USD,USA,176800.0,2700.000,sqft,False,,,,,,,,,,,,,,,,,,,,,
456,11528845,"$175,000",$175K,1.0,1.0,1090.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/c3219e76f98...,True,False,,False,/homedetails/128-N-Craig-St-APT-407-North-Oakl...,RecentlySold,Unknown Listed By,False,False,False,"128 N Craig St APT 407, North Oakland, PA 15213",Maggie Jayson,True,False,False,,1.269576e+10,40.447884,-79.949040,RECENTLY_SOLD,Sold 07/15/2020,11528845.0,128 N Craig St APT 407,15213,North Oakland,PA,40.447884,-79.949040,175000.0,1.594796e+12,1.0,1.0,1090.0,CONDO,RECENTLY_SOLD,-1.0,False,False,199600.0,1311.0,False,False,RECENTLY_SOLD,175000.0,True,False,False,USD,USA,80700.0,,,False,,,,,,,,,,,,,,Apt 407,,,,,,,
457,11528950,"$390,000",$390K,4.0,3.0,1688.0,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/7fe1f46b644...,True,False,,False,/homedetails/12-Ellsworth-Ter-Pittsburgh-PA-15...,RecentlySold,Unknown Listed By,False,False,False,"12 Ellsworth Ter, Pittsburgh, PA 15213",Todd Mercer,True,False,False,,1.269575e+10,40.449577,-79.946590,RECENTLY_SOLD,Sold 07/15/2020,11528950.0,12 Ellsworth Ter,15213,Pittsburgh,PA,40.449577,-79.946590,390000.0,1.594796e+12,3.0,4.0,1688.0,TOWNHOUSE,RECENTLY_SOLD,-1.0,False,False,479700.0,2846.0,False,False,RECENTLY_SOLD,390000.0,True,False,False,USD,USA,188900.0,2308.680,sqft,False,,,,,,,,,,,,,,,,,,,,,
458,164658134,"$390,000",$390K,3.0,2.5,,SOLD,Sold,False,False,False,https://photos.zillowstatic.com/fp/bbe092b816b...,True,False,,False,/homedetails/12-Ellsworth-Ter-Pittsburgh-PA-15...,RecentlySold,Unknown Listed By,False,False,False,"12 Ellsworth Ter, Pittsburgh, PA 15213",,False,False,False,,,40.449905,-79.946910,RECENTLY_SOLD,Sold 07/15/2020,164658134.0,12 Ellsworth Ter,15213,Pittsburgh,PA,40.449905,-79.946910,390000.0,1.594796e+12,2.5,3.0,,TOWNHOUSE,RECENTLY_SOLD,-1.0,False,False,443700.0,3864.0,False,False,RECENTLY_SOLD,390000.0,True,False,False,USD,USA,7200.0,5560.000,sqft,False,,,,,,,,,,,,,,,,,,,,,


In [None]:
# df_listings = pd.json_normalize(listing_response.json()["data"]["cat1"]["searchResults"]["mapResults"]).fillna(0)
# print("Number of rows:", len(df_listings))
# print("Number of columns:", len(df_listings.columns))
# extracted_data = []

# for _, item in df_listings.iterrows():
#     address = item["hdpData.homeInfo.streetAddress"]
#     zipcode = item["hdpData.homeInfo.zipcode"]
#     city = item["hdpData.homeInfo.city"]
#     state = item["hdpData.homeInfo.state"]

#     if address != "0":
#         extracted_data.append({
#             "address": address,
#             "zipcode": zipcode,
#             "city": city,
#             "state": state
#         })

# df_extracted = pd.DataFrame(extracted_data)
# df_extracted


Number of rows: 460
Number of columns: 86


Unnamed: 0,address,zipcode,city,state
0,4305 Bigelow Blvd,15213,Pittsburgh,PA
1,4433 Schenley Farms Ter,15213,Pittsburgh,PA
2,519 Cato St,15213,Pittsburgh,PA
3,3204 Ward St,15213,Pittsburgh,PA
4,0,0,0,0
...,...,...,...,...
455,3225 Parkview Ave,15213,Pittsburgh,PA
456,128 N Craig St APT 407,15213,North Oakland,PA
457,12 Ellsworth Ter,15213,Pittsburgh,PA
458,12 Ellsworth Ter,15213,Pittsburgh,PA


In [None]:
df_listings = pd.json_normalize(listing_response.json()["data"]["cat1"]["searchResults"]["mapResults"])
df_listings["hdpData.homeInfo.streetAddress"].fillna(0, inplace=True)


extracted_data = []

for _, item in df_listings.iterrows():
    address = item["hdpData.homeInfo.streetAddress"]
    zipcode = item["hdpData.homeInfo.zipcode"]
    city = item["hdpData.homeInfo.city"]
    state = item["hdpData.homeInfo.state"]

    if address != 0:
        address1, address2 = split_address(address)

        extracted_data.append({
            "address1": address1,
            "address2": address2,
            "zipcode": zipcode,
            "city": city,
            "state": state
        })

df_extracted = pd.DataFrame(extracted_data)


print("Number of rows:", len(df_extracted))
print("Number of columns:", len(df_extracted.columns))
df_extracted

Number of rows: 420
Number of columns: 5


Unnamed: 0,address1,address2,zipcode,city,state
0,4305 Bigelow Blvd,,15213,Pittsburgh,PA
1,4433 Schenley Farms Ter,,15213,Pittsburgh,PA
2,519 Cato St,,15213,Pittsburgh,PA
3,3204 Ward St,,15213,Pittsburgh,PA
4,203 Tennyson Ave,,15213,Pittsburgh,PA
...,...,...,...,...,...
415,3225 Parkview Ave,,15213,Pittsburgh,PA
416,128 N Craig St,APT 407,15213,North Oakland,PA
417,12 Ellsworth Ter,,15213,Pittsburgh,PA
418,12 Ellsworth Ter,,15213,Pittsburgh,PA


In [None]:
from usps import USPSApi, Address

address = Address(
    name='current resident',
    address_1='3538 Frazier St',
    city='oakland',
    state='pa',
    zipcode='15213'
)
usps = USPSApi('68048APITTS33', test=True)
validation = usps.validate_address(address)
print(validation.result)


{'AddressValidateResponse': {'Address': {'@ID': '0', 'Address1': '-', 'Address2': '3538 FRAZIER ST', 'City': 'OAKLAND', 'State': 'PA', 'Zip5': '15213', 'Zip4': '4402'}}}
