In [17]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [18]:
#Testing
scrape_link = 'https://www.apartments.com/apartments/ca/'

response = requests.get(scrape_link)
soup = BeautifulSoup(response.text, 'html.parser')
result = soup.find_all('div', class_="placardContainer")
result

[<div class="placardContainer" id="placardContainer">
 <ul>
 <li>
 <article class="diamond placard" data-ck="fgm2gzv" data-listingid="y9fg55t" data-url="https://www.apartments.com/homecoming-at-eastvale-eastvale-ca/y9fg55t/">
 <header class="placardHeader">
 <a class="placardTitle js-placardTitle " href="https://www.apartments.com/homecoming-at-eastvale-eastvale-ca/y9fg55t/" title="Homecoming At Eastvale, Eastvale, CA">
 Homecoming At Eastvale</a>
 <img alt="Lewis Management Corp" class="propertyLogo" src="https://images1.apartments.com/i2/8oJ0IbanGmv-y64BtgyDHrIkQjD07arn9GPAfsdd30g/110/lewis-management-corp-logo.png"/>
 <div class="location" title="5464 W Homecoming Cir, Eastvale, CA 91752">5464 W Homecoming Cir, Eastvale, CA 91752</div>
 </header>
 <section class="placardContent">
 <div class="media">
 <div class="placardCarouselImgCount">
 <span class="js-spnImgNum">1</span> / <span class="js-spnImgCount">25</span>
 </div>
 <div class="galleryControl">
 <button class="imageCarouselL

In [19]:
#Set up list to hold response info
link_dict = []
all_property = []

In [20]:
# Loop through x pages of the website with filter of Single House
for scrape_links in [scrape_link.format(page) for page in range(1,2)]:
    res = requests.get(scrape_links)
    new_soup = BeautifulSoup(res.text, 'html.parser')
    new_results = new_soup.find_all('article', class_="placard")
    
    # Loop through returned results
    for result in new_results:
        find_tag = result.find('a')['href']
        link_dict.append(find_tag)

for property_link in link_dict:
    res = requests.get(property_link)
    new_soup = BeautifulSoup(res.text, 'html.parser')
    new_results = new_soup.find_all('div', class_="mainWrapper")
    
    
    for result in new_results:
        try:
            # Identify and return property name of Rental Place
            property_name = result.find('h1', class_="propertyName")
            # Identify and return address information of Rental Place
            clean_property_name = property_name.text.strip().replace("\n", "")
            street_data = result.find('div', class_="propertyAddress")
            street = street_data.text.strip().replace("\n", "").replace("\r", "")
            street_address = street.split(",")
            city = " ".join(street_address[1].split())
            state = " ".join(street_address[2].split())[:2]
            zip_code = " ".join(street_address[2].split())[2:7]
            # Identify and return number of bedroom and price of Rental Place
            bedroom_container = result.find('div', class_="rentRollupContainer")
            all_the_rents = bedroom_container.findAll('span', class_="rentRollup")
            
            bedroom_info = []
            for rent_block in all_the_rents:
                
                bedroom_type = rent_block.find('span', class_="longText").text
                try:
                    price = re.search('\$.* ', rent_block.text)
                    price = price.group(0)[:2]
                except:
                    price = ""
                
                bedroom_info.append({
                    'bedroom_type': bedroom_type,
                    'price': price
                })
            
            rent_obj = {
                "Property_Name_Rent": clean_property_name,
                "City_Rent": city,
                "Type": "Apartment",
                "State_Rent": state,
                "Zip_code": zip_code,
                "Bedroom_Info": bedroom_info,
                
            }
            
            for item in bedroom_info:
                rent_obj[item["bedroom_type"]] = item["price"]
                

        
            all_property.append(rent_obj)
        except AttributeError as e:
            print(e)

# print(all_property)

In [21]:
house_data = pd.DataFrame(all_property)
house_data

Unnamed: 0,1 Bedroom,2 Bedrooms,3 Bedrooms,Bedroom_Info,City_Rent,Property_Name_Rent,State_Rent,Studio,Type,Zip_code
0,"$1,788 –","$1,819 –","$2,191 –","[{'bedroom_type': '1 Bedroom', 'price': '$1,78...",Eastvale,Homecoming At Eastvale,CA,,Apartment,91752
1,"$2,372 –","$3,077 –",,"[{'bedroom_type': 'Studio', 'price': '$1,879 –...",Los Angeles,The Pearl,CA,"$1,879 –",Apartment,90005
2,"$1,450 –","$1,890 –","$2,160 –","[{'bedroom_type': '1 Bedroom', 'price': '$1,45...",Folsom,The Falls at Willow Creek,CA,,Apartment,95630
3,"$2,128 –","$2,986 –",,"[{'bedroom_type': '1 Bedroom', 'price': '$2,12...",Westlake Village,Westcreek Apartments,CA,,Apartment,91362
4,,"$1,085 –",,"[{'bedroom_type': '1 Bedroom', 'price': ''}, {...",Hemet,Vista Gardens Apartments,CA,,Apartment,92543
5,"$1,600 –","$1,930 –",,"[{'bedroom_type': '1 Bedroom', 'price': '$1,60...",Anaheim,The Jackson,CA,,Apartment,92806
6,"$1,118 –","$1,295 –",,"[{'bedroom_type': 'Studio', 'price': ''}, {'be...",Sacramento,Wedgewood Apartments,CA,,Apartment,95831
7,"$2,305 –","$2,650 –",,"[{'bedroom_type': '1 Bedroom', 'price': '$2,30...",Fremont,eaves Fremont,CA,,Apartment,94539
8,"$2,500 –","$3,050 –",,"[{'bedroom_type': 'Studio', 'price': '$1,870 –...",Glendale,Harrison,CA,"$1,870 –",Apartment,91203
9,"$2,390 –","$3,095 –","$3,395 –","[{'bedroom_type': '1 Bedroom', 'price': '$2,39...",San Diego,Torrey Villas Apartment Homes,CA,,Apartment,92130


In [16]:
# Export file as a CSV, without the Pandas index, but with the header
house_data.to_csv("Output/Q2-2018_Apartment_Apartment_Data2.csv", index=False, header=True)