# Aggregates the metadata for apartments in an area

In [1]:
from selenium import webdriver
import pandas as pd
import os

In [2]:
apartment_links_path = os.path.relpath("./data/Seattle, WA_links.csv")
apartment_links = pd.read_csv(apartment_links_path)

## Create dictionaries that will represent information groupings 

In [3]:
vendor_info = {
    "propertyName":[],
    "verified":[],
    "street":[],
    "city":[],
    "state":[],
    "zipCode":[],
    "neighborhood": [],
    "reviewScore":[],
    "reviewCount":[],
    "walkScore":[],
    "transitScore":[]
}
 
aptType_info = {
    "propertyName":[],
    "room":[],
    "costRange": []
}

amenites_info = {
    "propertyName":[],
    "amenitytype":[],
    "amenity":[]
}

listing_info = {
    "propertyName":[],
    "beds": [],
    "baths": [],
    "rent": [],
    "sqft": [],
    "availability": [],
    "deposit": [],
    "unit": [],
    "leaseLength": [],
    "name":[],
    "new": [],
    "applyNow" : []
    
}

## Get vendor Information

In [4]:
def getVendorInformation(vendor_info, driver):
    property_class_name = "propertyName"
    property_field = driver.find_element_by_class_name(property_class_name)
    propertyname = property_field.text
    vendor_info["propertyName"].append(propertyname)
    
    verified_class_name = "costarVerified"
    verified = False
    try:
        verified_field = driver.find_element_by_class_name(verified_class_name)
        verified_field = True
    except: 
        verified = False
    vendor_info["verified"].append(verified)
    
    
    propertyAddress_class_name = "propertyAddress"
    propertyAddress_field = driver.find_element_by_class_name(propertyAddress_class_name)
    
    address = propertyAddress_field.find_elements_by_tag_name("span")
    i = 1
    street=""
    if(address[3].text=='–'):
        street = propertyname
        i = 0
    else:
        street = address[0].text
        
    vendor_info["street"].append(street)
    
    city = address[i].text
    vendor_info["city"].append(city)
    
    state = address[i+1].text
    vendor_info["state"].append(state)
    
    zipcode = address[i+2].text
    vendor_info["zipCode"].append(zipcode)
    
    neighborhood_class_name = "neighborhoodAddress"
    neighborhood_field = driver.find_element_by_class_name(neighborhood_class_name)
    neighborhood_a_tag = neighborhood_field.find_element_by_tag_name("a")
    neighborhood = neighborhood_a_tag.text
    vendor_info['neighborhood'].append(neighborhood)
    

    reviews=0
    reviewCount=0
    
    try:
        rating_class_name = "rating"
        rating_field = driver.find_element_by_class_name(rating_class_name)
        reviews = len(rating_field.find_elements_by_class_name("starFullIcon"))
        
        reviewCountstr = rating_field.find_element_by_class_name("reviewCount").text
        reviewCountstr = reviewCountstr.replace('(',"")
        reviewCountstr = reviewCountstr.replace(')',"")
        reviewCount = int(reviewCountstr.split()[0])
        
    except:
        reviews= 0
        reviewCount= 0
        
    vendor_info["reviewScore"].append(reviews)
    vendor_info["reviewCount"].append(reviewCount)
    
    score_class_name = "score"
    score_class_fields = driver.find_elements_by_class_name(score_class_name)
    
    walkScore = score_class_fields[0].text
    vendor_info["walkScore"].append(walkScore)
    
    transitScore = score_class_fields[1].text
    vendor_info["transitScore"].append(transitScore)    

## Get Apartment type Information

In [5]:
def getAptInformation(propertyName, aptType_info, driver):
    apartmentType_class_name = "rentRollup"
    aptType_field = driver.find_elements_by_class_name(apartmentType_class_name)
    
    current = 1
    for aptType in aptType_field:
        aptType_info['propertyName'].append(propertyName)
        data = aptType.text.split('$')
        if( len(data) == 1):
            data = data[0].split(" C")
            data = "C"+data[1]
        
        costRange = data[1]
        aptType_info["costRange"].append(costRange)
        
        room = data[0]
        aptType_info['room'].append(room)
    
    

## Get Amenities Information

In [6]:
def getAmenityInformation(propertyName, amenites_info, driver):
    section_class_name = "specGroup"
    section_field = driver.find_element_by_class_name(section_class_name)
    
    specList_class_name = "specList"
    specList = section_field.find_elements_by_class_name(specList_class_name)
    
    for spec in specList:
        amenitytype = spec.find_element_by_tag_name("h3").text
        
        try:
            p_field = spec.find_element_by_tag_name("p")
            amenites_info["amenity"].append(p.text)
            amenites_info["propertyName"].append(propertyName)
            amenites_info["amenitytype"].append(amenitytype)
        except:
            time.sleep(.001)
        try:
            ul_field = spec.find_element_by_tag_name("ul")
            li_fields = ul_field.find_elements_by_tag_name("li")
            for li in li_fields:
                amenites_info["amenity"].append(li.text)
                amenites_info["propertyName"].append(propertyName)
                amenites_info["amenitytype"].append(amenitytype)
        except:
            time.sleep(.001)
            
        try:
            h4_field = spec.find_element_by_tag_name("h4")
            amenites_info["amenity"].append(h4.text)
            amenites_info["propertyName"].append(propertyName)
            amenites_info["amenitytype"].append(amenitytype)
        except:
            time.sleep(.001)

## Get listing Information

In [7]:
def getListingInformation(propertyName, listing_info, driver):
    table_class_name = "availabilityTable"
    table_field = driver.find_element_by_class_name(table_class_name)
    
    tablerow_class_name = "rentalGridRow"
    table_rows = table_field.find_elements_by_class_name(tablerow_class_name)
    
    for row in table_rows:
        listing_info["propertyName"].append(propertyName)
        
        bed_class_name = "beds"
        bed_field = row.find_element_by_class_name(bed_class_name)
        bed = bed_field.text
        listing_info["beds"].append(bed)
        
        bath_class_name = "baths"
        bath_field = row.find_element_by_class_name(bath_class_name)
        bath = bath_field.text
        listing_info["baths"].append(bath)
            
        rent_class_name = "rent"
        rent_field = row.find_element_by_class_name(rent_class_name)
        rent = rent_field.text
        listing_info["rent"].append(rent)
        
        deposit=""
        try:
            deposit_class_name = "deposit"
            deposit_field = row.find_element_by_class_name(deposit_class_name)
            deposit = deposit_field.text
        except:
            time.sleep(0.001)
        listing_info["deposit"].append(deposit)
        
        unit=""
        try:
            unit_class_name = "unit"
            unit_field = row.find_element_by_class_name(unit_class_name)
            unit = unit_field.text
        except:
            time.sleep(0.001)
        listing_info["unit"].append(unit)
        
        sqft_class_name = "sqft"
        sqft_field = row.find_element_by_class_name(sqft_class_name)
        sqft = sqft_field.text
        listing_info["sqft"].append(sqft)
        
        leaseLength=""
        try:
            leaseLength_class_name = "leaseLength"
            leaseLength_field = row.find_element_by_class_name(leaseLength_class_name)
            leaseLength = leaseLength_field.text
        except:
            time.sleep(.001)
        listing_info["leaseLength"].append(leaseLength)
        
        name=""
        try:
            name_class_name = "name"
            name_field = row.find_element_by_class_name(name_class_name)
            name = name_field.text
        except:
            time.sleep(.001)
        listing_info["name"].append(name)
        
        available=""
        try:
            available_class_name = "available"
            available_field = row.find_element_by_class_name(available_class_name)
            available = available_field.text
        except:
            time.sleep(.001)
        listing_info["availability"].append(available)
        
        new= ""
        try:
            new_class_name = "new"
            new_field = row.find_element_by_class_name(new_class_name)
            new = new_field.text
        except:
            time.sleep(.001)
        listing_info["new"].append(new)
        
        applyNow=""
        try:
            applyNow_class_name = "applyNow"
            applyNow_field = row.find_element_by_class_name(applyNow_class_name)
            applyNow = applyNow_field.text
        except:
            time.sleep(.001)
        listing_info["applyNow"].append(applyNow)
        

## Create Web Driver

In [8]:
driverpath = os.path.relpath("./drivers/chromedriver.exe") 
driver = webdriver.Chrome(executable_path=driverpath)

## load data from links

In [9]:
import time

i = 0
for link in apartment_links.values:
    print(link[1])
    
    driver.get(link[1])
    time.sleep(1)

    getVendorInformation(vendor_info, driver)

    propertyName = vendor_info["propertyName"][i]

    getAptInformation(propertyName, aptType_info, driver)

    getAmenityInformation(propertyName, amenites_info, driver)

    getListingInformation(propertyName, listing_info, driver)
    i+=1


https://www.apartments.com/luna-seattle-wa/yey9vtr/
https://www.apartments.com/elan-41-seattle-wa/84egbw0/
https://www.apartments.com/jackson-apartments-seattle-wa/vblnwkf/
https://www.apartments.com/circa-green-lake-apartments-seattle-wa/bwmn6x8/
https://www.apartments.com/tower-12-seattle-wa/sx3e5w7/
https://www.apartments.com/common-summit-seattle-wa/1kqlte6/
https://www.apartments.com/the-mill-at-first-hill-seattle-wa/3z5s8mg/
https://www.apartments.com/amli-arc-seattle-wa/1yf8vx5/
https://www.apartments.com/hana-apartments-seattle-wa/8mhszb0/
https://www.apartments.com/the-olivian-seattle-wa/srj9ny6/
https://www.apartments.com/amli-south-lake-union-seattle-wa/wec85fk/
https://www.apartments.com/amli-mark24-seattle-wa/twk16f9/
https://www.apartments.com/mckenzie-seattle-wa/tc4dx7b/
https://www.apartments.com/cityline-seattle-wa/l4knj3g/
https://www.apartments.com/the-kennedy-building-seattle-wa/fd6hspr/
https://www.apartments.com/lightbox-seattle-wa/np3qvbp/
https://www.apartments.

https://www.apartments.com/washington-terrace-senior-affordable-apart-seattle-wa/1t52ye5/
https://www.apartments.com/metro-on-first-seattle-wa/ycyc6kv/
https://www.apartments.com/urbana-seattle-wa/hsfjf32/
https://www.apartments.com/2300-elliott-seattle-wa/4n7xxvh/
https://www.apartments.com/youngstown-flats-seattle-wa/7s2qyhy/
https://www.apartments.com/coppins-well-seattle-wa/fhknn4v/
https://www.apartments.com/john-winthrop-apartments-seattle-wa/c886jld/
https://www.apartments.com/taylor-28-seattle-wa/z14p66x/
https://www.apartments.com/aspira-apartments-seattle-wa/f2c4n0r/
https://www.apartments.com/leva-on-market-seattle-wa/qqn6e1z/
https://www.apartments.com/lighthouse-apartments-seattle-wa/x2d837m/
https://www.apartments.com/adell-apartments-seattle-wa/d14ergl/
https://www.apartments.com/union-bay-seattle-wa/l7flz7e/
https://www.apartments.com/angeline-apartments-seattle-wa/66tvzer/
https://www.apartments.com/the-heights-on-capitol-hill-seattle-wa/xpk1vsq/
https://www.apartments

https://www.apartments.com/carolina-court-seattle-wa/pepz1js/
https://www.apartments.com/northline-apartments-seattle-wa/d2scj5l/
https://www.apartments.com/the-galleria-seattle-wa/qctzxyz/
https://www.apartments.com/addison-on-fourth-seattle-wa/6tfx0gt/
https://www.apartments.com/the-blakely-at-echo-lake-senior-55-seattle-wa/cnl684g/
https://www.apartments.com/zindorf-seattle-wa/cmkvh2e/
https://www.apartments.com/beacon-view-seattle-wa/nffsfmn/
https://www.apartments.com/700-broadway-apartments-seattle-wa/09hhwk1/
https://www.apartments.com/janus-apartments-seattle-wa/fbcqkwr/
https://www.apartments.com/wharfside-pointe-seattle-wa/2pml6tc/
https://www.apartments.com/o2-seattle-wa/ynmvz1v/
https://www.apartments.com/avant-apartments-seattle-wa/ntnpjxq/
https://www.apartments.com/sundodger-apartments-seattle-wa/wqqzmgj/
https://www.apartments.com/thai-binh-apartments-seattle-wa/cp71zlf/
https://www.apartments.com/pike-flats-seattle-wa/zep5k7x/
https://www.apartments.com/1404-boylston-s

https://www.apartments.com/the-mill-at-first-hill-seattle-wa/3z5s8mg/
https://www.apartments.com/lakeview-apartments-seattle-wa/jy0e037/
https://www.apartments.com/the-q-apartments-seattle-wa/r27w5z3/
https://www.apartments.com/mio-apartments-seattle-wa/vwsrwjf/
https://www.apartments.com/nova-seattle-wa/zhfzv0x/
https://www.apartments.com/normandy-apartments-seattle-wa/b353wfb/
https://www.apartments.com/spencer-house-seattle-wa/ylfgwlt/
https://www.apartments.com/star-apartments-seattle-wa/xwmfdlp/
https://www.apartments.com/blue-ridge-apartments-seattle-wa/4thgyql/
https://www.apartments.com/the-portland-building-seattle-wa/scvcpk5/
https://www.apartments.com/altamira-apartments-seattle-wa/xq7ntvq/
https://www.apartments.com/hana-apartments-seattle-wa/8mhszb0/
https://www.apartments.com/paul-revere-seattle-wa/hl8fnz1/
https://www.apartments.com/vermont-inn-seattle-wa/7xt12dz/
https://www.apartments.com/725c-16th-ave-seattle-wa/nt7mqeq/
https://www.apartments.com/2100-3rd-ave-seattle

ValueError: invalid literal for int() with base 10: '-'

## Create dataframes 

In [None]:
print(len(amenites_info["propertyName"]))
print(len(amenites_info["amenitytype"]))
print(len(amenites_info["amenity"]))

In [None]:
area = "Seattle, WA"
path = "./data/"+area

vendor_info_df = pd.DataFrame.from_dict(vendor_info)
vendor_info_df.to_csv(path + "_vendor_info.csv")

In [None]:
aptType_info_df = pd.DataFrame.from_dict(aptType_info)
aptType_info_df.to_csv(path + "_aptType_info.csv")

In [None]:
amenites_info_df = pd.DataFrame.from_dict(amenites_info)
amenites_info_df.to_csv(path + "_amenites_info.csv")

In [None]:
listing_info_df = pd.DataFrame.from_dict(listing_info)
listing_info_df.to_csv(path + "_listing_info.csv")

In [None]:
driver.close()