In [1]:
import requests
from parsel import Selector


headers = {
    'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0'
}
response = requests.get('https://www.redfin.com/GA/Atlanta/8-Normandy-Ct-30324/home/24729436', headers=headers)
selector = Selector(response.text)

In [2]:
address = selector.xpath('//span[@data-rf-test-id="abp-homeinfo-homeaddress"]')
r = {
    'locality': address.xpath('.//span/span[@data-rf-test-id="abp-cityStateZip"]/span/text()').get(),
    'postal_code': address.xpath('.//span/span[@data-rf-test-id="abp-cityStateZip"]/span[@class="postal-code"]/text()').get(),
    'region': address.xpath('.//span/span[@data-rf-test-id="abp-cityStateZip"]/span[@class="region"]/text()').get(),
    'street': address.xpath('.//span/span/text()').get(),
}
r

{'locality': 'Atlanta',
 'postal_code': '30324',
 'region': 'GA',
 'street': '8 Normandy Ct '}

In [3]:
# agent name & company
from w3lib.html import remove_tags

agent_basic_details = selector.xpath('//div[@class="agent-info-item"]/div/span')
name = agent_basic_details.xpath('.//a/text()').get()
company = agent_basic_details.xpath('.//span').get()
print(name, remove_tags(company).replace('•', ''), company)

Ronisha Carson   Redfin Corporation  <span> <span class="font-dot">•</span> <!-- -->Redfin Corporation<!-- --> </span>


In [4]:
 # Bath
selector.xpath('//div[@data-rf-test-id="abp-baths"]/div[@class="statsValue"]/text()').get()

'2'

In [5]:
# Beds
selector.xpath('//div[@data-rf-test-id="abp-beds"]/div[@class="statsValue"]/text()').get()

'2'

In [6]:
# Brokerage Compensation
selector.xpath('//span[text()="Buyer\'s Brokerage Compensation"]/following-sibling::span/text()').get()

'3.0%'

In [7]:
# build date
selector.xpath('//span[@data-rf-test-id="abp-yearBuilt"]/span[@class="value"]/text()').get()

'1983'

In [8]:
# description
selector.xpath('//div[@data-rf-test-id="listingRemarks"]/p/span/text()').get()

'This fantastic 2 bedroom, 2 bathroom gem nestled in the gated Walden on Lenox has everything. The fireside family room welcomes you to the home where lots of windows provide openness and natural light. Updated kitchen with lots of cabinets, granite countertops, and stainless appliances. The home also offers a covered balcony, perfect for morning coffee or evening cocktails. Close to everything: the shopping and restaurants of buckhead, Shady Valley Park, Marta Lenox station, and easy access to highways. Excellent community amenities, including a sparkling pool and fitness center. See this one soon, it will not last long. '

In [9]:
# est. mo payment
selector.xpath('//span[text()="Est. Mo. Payment"]/following-sibling::span/text()').get()

'$1,640'

In [10]:
# estimated payment & breakdown
calc = selector.xpath('//div[@data-rf-test-name="mc-summary"]/div')
monthly = calc.xpath('.//p[contains(@class, "title")]/text()').get()
estimated_payment = {'monthly': monthly}
for row in calc.xpath('.//div[contains(@class, "Row")]'):
    key = row.xpath('.//span/div/span/text()').get()
    value = row.xpath('.//span[2]/text()').get()
    estimated_payment[key] = value
print(estimated_payment)

{'monthly': '$1,640 per month', 'Principal and Interest': '$858', 'Property Taxes': '$162', 'HOA Dues': '$495', "Homeowners' Insurance": '$125'}


In [11]:
# home facts
hf = {}
s = selector.xpath('//div[contains(@class, "keyDetails--HomeFacts")]/following-sibling::div')
for kvp in s.xpath('.//div[contains(@class, "keyDetail")]'):
    key = kvp.xpath('.//span/text()').get()
    value = kvp.xpath('.//span[2]/text()').get()
    hf[key] = value
print(hf)

{'Status': 'Pending', 'Time on Redfin': '15 days', 'Property Type': 'Condo/Co-op', 'HOA Dues': '$495/month', 'Year Built': '1983', 'Style': 'Traditional', 'Community': 'Walden on Lenox', 'Lot Size': '1,437 Sq. Ft.', 'MLS#': '6845921'}


In [12]:
# lot size
selector.xpath('//span[@data-rf-test-id="abp-lotSize"]/span[@class="value"]/text()').get()

'1,437 Sq. Ft.'

In [117]:
# offer stats
_os = {}
div_os = selector.xpath('//div[@class="offer-stats"]')
for td in div_os.xpath('.//td'):
    title = td.xpath('.//span/text()').get()
    value = td.xpath('.//span[2]/text()').get()
    _os[title] = value
print(os)

Median List Price $262K
Avg. # Offers 1
Median $ / Sq. Ft. $175
Avg. Down Payment —
Median Sale / List 100.0%
# Sold Homes 1


In [13]:
# price
selector.xpath('//div[@data-rf-test-id="abp-price"]/div/div/span[2]/text()').get()

'259,000'

In [14]:
# price/sqft
selector.xpath('//div[@data-rf-test-id="abp-sqFt"]/span/div/text()').get()

'$179 / Sq. Ft.'

In [15]:
# property details
_pd = {}
pd = selector.xpath('//div[@data-rf-test-id="propertyDetails"]')
content = pd.xpath('.//div[contains(@class, "sectionContentContainer")]')
for sgt, sgc in zip(
    content.xpath('.//div[@class="super-group-title"]'),
    content.xpath('.//div[@class="super-group-content"]')
):
    title = sgt.xpath('.//text()').get()
    _pd[title] = {}
    for ag in sgc.xpath('.//div[@class="amenity-group"]'):
        amenity_group_title = remove_tags(ag.xpath('.//h3').get())
        amenities = [remove_tags(a) for a in ag.xpath('.//li').getall()]
        _pd[title][amenity_group_title] = amenities
        # print(amenities)
print(_pd)

{'Virtual Tour, Parking / Garage, Multi-Unit Information, Homeowners Association': {'Virtual Tour': ['Virtual Tour (External Link)', 'Virtual Tour PP (External Link)'], 'Parking Information': ['Parking Features: Assigned, Parking Lot'], 'Multi-Unit Information': ['# Of Units In Community: 137'], 'Homeowners Association Information': ['Has HOA', 'Association Fee: $495', 'Association Fee Frequency: Monthly', 'Association Fee Includes: Insurance, Maintenance Exterior, Maintenance Grounds, Pest Control, Security, Sewer, Swim/Tennis, Termite, Trash, Water', 'HOA Rent Restrictions']}, 'Interior Features': {'Bedroom Information': ['# of Bedrooms (Main): 2', 'Roomate Floor Plan, Split Bedroom Plan'], 'Bathroom Information': ['# of Baths (Full): 2', '# of Main Baths (Full): 2', 'Master Bath Features: Double Vanity, Shower Only'], 'Fireplace Information': ['# of Fireplaces: 1', 'Gas Starter, Living Room'], 'Heating &amp; Cooling': ['Cooling: Ceiling Fan(s), Central Air', 'Heating: Central, Hot W

In [16]:
# public facts
_pf = {}
div_pr = selector.xpath('//div[@data-rf-test-id="publicRecords"]')
for row in div_pr.xpath('.//div[@class="table-row"]'):
    key = row.xpath('.//span/text()').get()
    value = row.xpath('./div/text()').get()
    _pf[key] = value
print(_pf)

{'Beds': '2', 'Baths': '2', 'Finished Sq. Ft.': '1,450', 'Unfinished Sq. Ft.': '—', 'Total Sq. Ft.': '1,450', 'Stories': '1', 'Lot Size': '—', 'Style': 'Condo/Co-op', 'Year Built': '1983', 'Year Renovated': '—', 'County': 'Fulton County', 'APN': '17 000700070083'}


In [116]:
scores = selector.xpath('//div[@class="walk-score"]')
for name, value in zip(
    scores.xpath('.//div[contains(@class, "walkscore-trademark")]/text()').getall(),
    scores.xpath('.//span[contains(@class, "value")]/text()').getall()
):
    print(name, value)

Walk Score® 26
Transit Score® 44
Bike Score® 18


In [17]:
# Sq. Ft
selector.xpath('//div[@data-rf-test-id="abp-sqFt"]/span/span/text()').get()

'1,450'

In [18]:
# status
selector.xpath('//span[text()="Status"]/following-sibling::span/text()').get()

'Pending'

In [19]:
# time on redfin
selector.xpath('//span[text()="Time on Redfin"]/following-sibling::span/text()').get()

'15 days'