In [100]:
import requests
from bs4 import BeautifulSoup as soup

# constructs the query for webscraping, main variable is the url string
def scrapeData(city, province):
    header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)AppleWebKit/537.36 (KHTML, like Gecko)Chrome/83.0.4103.97 Safari/537.36',        
          'referer':'https://www.zillow.com/homes/for_rent/Manhattan,-New-York,-NY_rb/?searchQueryState=%7B%22pagination'}
 
    # Enter Zillow URL for the city of your preference
    url = 'https://www.zillow.com/homes/for_sale/' + city + ',-' + province + '_rb/'
    html = requests.get(url=url,headers=header)
    html.status_code
    bsobj = soup(html.content,'lxml')
    return bsobj

# webscraping guide for zillow found here https://www.octoparse.com/blog/how-to-scrape-zillow-data#

In [101]:
#grab home price from soup object
def getPrice(bsobj):
    price_list = [] 

    # loop through the price section of the data and extract

    # the text and store it in the list.
    for price in bsobj.findAll('span',{'data-test':'property-card-price'}):
        price = price.get_text()[2::]
        price_list.append(int(price.replace(',','')))
    
    return price_list

In [102]:
def getAddress(bsobj):
    address = []


    # loop through the address section of the data

    # and extract the text and store it in the list.
    for adr in bsobj.findAll('address',{'data-test':'property-card-addr'}):
        address.append(adr.get_text().strip())
    return address

In [151]:
import re
def getMLS(bsobj):
    mls = []


    # loop through the MLS section of the data

    # and extract the text and store it in the list.
    for adr in bsobj.findAll('div',{'class':'StyledPropertyCardDataArea-c11n-8-73-8__sc-yipmu-0 hTcpwx'}):
        try:
            mls.append(re.findall(r"\w+", adr.get_text().strip())[2])
        except:
            mls.append("Null")
    return mls

In [154]:
def getPropertyFeature(bsobj):
    bed = [] 
    bath = []
    footage = []

    # loop through the features of the home (# of bds, ba, and house size) of the data
    for item in bsobj.findAll('span',{'class':'StyledPropertyCardHomeDetails-c11n-8-73-8__sc-1mlc4v9-0 jlVIIO'}):
        x = re.findall(r"\d+", item.get_text().strip().replace(',',''))
        try:
            bed.append(int(x[0]))
        except:
            bed.append(0)
        
        try:
            bath.append(int(x[1]))
        except:
            bath.append(0)
            
        try:
            footage.append(int(x[2]))
        except:
            footage.append(0)
            
    return bed, bath, footage

In [170]:
import pandas as pd
df = pd.DataFrame(columns = ['MLS','City','Province','Address', 'Price', 'NumberOfBeds', 'NumberOfBaths', 'Footage'] )
#some random locations for us to scrape, if scraping 2 cities in the same province, we should switch to a list of tuples
queryLocations = {"SK": "Saskatoon", 
                  "AB": "Edmonton", 
                  "ON": "Mississauga",
                  "NS": "Halifax",
                  "MB": "Winnipeg",
                  "BC": "Vancouver"}

for province in queryLocations:
    city = queryLocations.get(province)
    bsobj = scrapeData(city, province)
    price = getPrice(bsobj)
    address = getAddress(bsobj)
    mls = getMLS(bsobj)
    bed, bath, footage = getPropertyFeature(bsobj)
    
    #build a dataframe for each query
    tempDF = pd.DataFrame()
    tempDF['MLS'] = mls
    tempDF['City'] = city
    tempDF['Province'] = province
    tempDF['Address'] = address
    tempDF['Price'] = price
    tempDF['NumberOfBeds'] = bed
    tempDF['NumberOfBaths'] = bath
    tempDF['Footage'] = footage
    print(tempDF)
    df = df.append(tempDF)

print(df)

        MLS       City Province                                     Address  \
0  SK913993  Saskatoon       SK     207 Lehrer PLACE, Saskatoon, SK S7R 0L4   
1  SK914061  Saskatoon       SK   3104 Ortona STREET, Saskatoon, SK S7M 3R4   
2  SK914086  Saskatoon       SK    739 Hastings COVE, Saskatoon, SK S7V 0G6   
3  SK914075  Saskatoon       SK   334 Crean CRESCENT, Saskatoon, SK S7J 3X2   
4  SK914082  Saskatoon       SK  2435 Rosewood DRIVE, Saskatoon, SK S7V 0Z3   
5  SK914108  Saskatoon       SK          1 Bow COURT, Saskatoon, SK S7K 1H9   
6  SK914132  Saskatoon       SK    46 Peeling AVENUE, Saskatoon, SK S7M 4K4   
7  SK914049  Saskatoon       SK       831 F AVENUE N, Saskatoon, SK S7L 1W6   
8  SK914096  Saskatoon       SK       739 L AVENUE S, Saskatoon, SK S7M 2H8   

    Price  NumberOfBeds  NumberOfBaths  Footage  
0  669900             3              2     1744  
1  619900             3              3     1470  
2  899900             4              3     1837  
3  419900

  df = df.append(tempDF)


        MLS      City Province  \
0  E4320360  Edmonton       AB   
1  E4320364  Edmonton       AB   
2  E4320358  Edmonton       AB   
3  E4320355  Edmonton       AB   
4  E4320309  Edmonton       AB   
5  E4320357  Edmonton       AB   
6  E4320331  Edmonton       AB   
7  E4320291  Edmonton       AB   
8  E4320295  Edmonton       AB   

                                             Address   Price  NumberOfBeds  \
0             14611 95th St NW, Edmonton, AB T5E 3Y7  575000             6   
1            10465 42nd Ave NW, Edmonton, AB T6J 7C7  429900             2   
2            10346 142nd St NW, Edmonton, AB T5N 2P1  393000             5   
3            5512 145th Ave NW, Edmonton, AB T5A 3R3  185000             3   
4            4611 128th Ave NW, Edmonton, AB T5A 2M7  399900             4   
5  7339 S Terwillegar Dr NW #1418, Edmonton, AB T...  130000             1   
6             1711 109th St NW, Edmonton, AB T6J 5Z8  519900             5   
7          13239 Delwood Rd NW, Edm

  df = df.append(tempDF)


        MLS         City Province  \
0  W5827705  Mississauga       ON   
1      Null  Mississauga       ON   
2  W5827674  Mississauga       ON   
3  W5827547  Mississauga       ON   
4      Null  Mississauga       ON   
5  W5827299  Mississauga       ON   
6  W5826773  Mississauga       ON   
7  40331876  Mississauga       ON   
8  40181595  Mississauga       ON   

                                             Address     Price  NumberOfBeds  \
0      485 Meadows Blvd #12, Mississauga, ON L4Z 1H1    824900             3   
1   2929 Aquitaine Ave #405, Mississauga, ON L5N 2C7    500000             2   
2  6860 Glen Erin Dr UNIT 30, Mississauga, ON L5N...    789900             3   
3          325 Webb Dr #310, Mississauga, ON L5B 3Z9    549999             2   
4         1425 Chriseden Dr, Mississauga, ON L5H 1V3   4125000             4   
5              660 Arbor Rd, Mississauga, ON L5G 2J9    999000             4   
6       2358 Cobbinshaw Cir, Mississauga, ON L5N 2G3   1089000       

  df = df.append(tempDF)


         MLS     City Province                                      Address  \
0  202225990  Halifax       NS      55 Kearney Lake Rd, Halifax, NS B3M 2S6   
1  202215459  Halifax       NS      1160 Rockcliffe St, Halifax, NS B3H 3Y6   
2  202226090  Halifax       NS  1650 Granville St #904, Halifax, NS B3J 0E1   
3  202225003  Halifax       NS          68 Hartlen Ave, Halifax, NS B3R 1R6   
4  202223264  Halifax       NS   1233 Purcells Cove Rd, Halifax, NS B3P 1B3   
5  202212337  Halifax       NS   2947 Purcells Cove Rd, Halifax, NS B3P 2G2   
6  202224206  Halifax       NS        13 McFatridge Rd, Halifax, NS B3N 2R2   
7  202225587  Halifax       NS           29 Gateway Rd, Halifax, NS B3M 1M6   
8  202223702  Halifax       NS       26 Springvale Ave, Halifax, NS B3N 2A4   

     Price  NumberOfBeds  NumberOfBaths  Footage  
0   569900             2              2     1886  
1  7450000             4              5     5550  
2   869000             2              2      963  
3   3

  df = df.append(tempDF)


    MLS      City Province                                Address   Price  \
0  Null  Winnipeg       MB    534 Spence St, Winnipeg, MB R3B 2R7  359900   
1  Null  Winnipeg       MB  137 Burrows Ave, Winnipeg, MB R2W 1Z3  219900   

   NumberOfBeds  NumberOfBaths  Footage  
0             4              2     2114  
1             3              2     1380  


  df = df.append(tempDF)


        MLS       City Province                                       Address  \
0  R2738427  Vancouver       BC  1020 Harwood St #1504, Vancouver, BC V6E 4R1   
1  R2738459  Vancouver       BC  1718 Venables St #404, Vancouver, BC V5L 2H4   
2  R2738462  Vancouver       BC   1330 Burrard St #306, Vancouver, BC V6Z 2B8   
3  R2738122  Vancouver       BC        4515 W 14th Ave, Vancouver, BC V6R 2Y5   
4  R2738271  Vancouver       BC        3678 E 25th Ave, Vancouver, BC V5R 1K5   
5  R2738190  Vancouver       BC          3537 Osler St, Vancouver, BC V6H 2W4   
6  R2738436  Vancouver       BC         621 W 51st Ave, Vancouver, BC V6P 1B9   
7  R2697014  Vancouver       BC        1318 Minto Cres, Vancouver, BC V6H 2J5   
8  R2738458  Vancouver       BC         345 E 64th Ave, Vancouver, BC V5X 2M8   

      Price  NumberOfBeds  NumberOfBaths  Footage  
0   1199000             2              2      940  
1    689000             1              1      703  
2    519000             2        

  df = df.append(tempDF)


In [172]:
df.to_csv('properties.csv')

In [171]:
for i in queryLocations:
    print(i)

SK
AB
ON
NS
MB
BC
