In [6]:
import requests
from bs4 import BeautifulSoup as soup
import re

In [46]:
def scrapeData(city, province):
    url = 'https://www.remax.ca/{}/{}-real-estate'.format(province, city)
    html = requests.get(url=url)
    html.status_code
    bsobj = soup(html.content,'lxml') 
    return bsobj

In [48]:
#grab home price from soup object
def getPrice(bsobj):
    price_list = [] 
    for price in bsobj.findAll('h2',{'class':'listing-card_price__sL9TT'}):
        price = ''.join(re.findall(r"\w+", price.get_text().strip()))
        price_list.append(int(price))
    return price_list

In [49]:
#grab home address from soup object
def getAddress(bsobj):
    price_list = [] 
    for price in bsobj.findAll('div',{'data-cy':'property-address'}):
        price = ' '.join(re.findall(r"[#-,\w]+", price.get_text().strip()))
        price_list.append(price)
    return price_list

In [53]:
#grab home MLS from soup object
def getMLS(bsobj):
    price_list = [] 
    for price in bsobj.findAll('div', {'class': 'listing-card_mlsNumber__6KRDy'},{'data-cy':'property-mls'}):
        price = re.findall(r"\w+", price.get_text().strip())[1]
        price_list.append(price)
    return price_list

In [56]:
#grab number of rooms from soup object
def getPropertyFeature(bsobj):
    bed = [] 
    bath = []
    for price in bsobj.findAll('div', {'class': 'property-details_detailsRow__nilLP'}):
        price = re.findall(r"\d+", price.get_text().strip())
        try:
            bed.append(int(price[0]))
        except:
            bed.append(0)
        
        try:
            bath.append(int(price[1]))
        except:
            bath.append(0)
    return bed,bath

In [57]:
import pandas as pd
df = pd.DataFrame(columns = ['MLS','City','Province','Address', 'Price', 'NumberOfBeds', 'NumberOfBaths'] )
#some random locations for us to scrape, if scraping 2 cities in the same province, we should switch to a list of tuples

queryLocations = [["SK", "Saskatoon"]
                 ,["AB", "Edmonton"]
                 ,["AB", "Calgary"]
                 ,["ON", "Toronto"]
                 ,["ON", "Mississauga"]
                 ,["NS", "Halifax"]
                 ,["BC", "Vancouver"]
                 ,["MB", "Winnipeg"]]

for i in queryLocations:
    province = i[0]
    city = i[1]
    bsobj = scrapeData(city, province)
    price = getPrice(bsobj)
    address = getAddress(bsobj)
    mls = getMLS(bsobj)
    bed, bath = getPropertyFeature(bsobj)
    
    #build a dataframe for each query
    tempDF = pd.DataFrame()
    tempDF['MLS'] = mls
    tempDF['City'] = city
    tempDF['Province'] = province
    tempDF['Address'] = address
    tempDF['Price'] = price
    tempDF['NumberOfBeds'] = bed
    tempDF['NumberOfBaths'] = bath
    print(tempDF)
    df = df.append(tempDF)

print(df)

         MLS       City Province                                    Address  \
0   SK914096  Saskatoon       SK              739 L AVENUE S, Saskatoon, SK   
1   SK914132  Saskatoon       SK           46 Peeling AVENUE, Saskatoon, SK   
2   SK914089  Saskatoon       SK            115 Ash STREET E, Saskatoon, SK   
3   SK914075  Saskatoon       SK          334 Crean CRESCENT, Saskatoon, SK   
4   SK914086  Saskatoon       SK           739 Hastings COVE, Saskatoon, SK   
5   SK914121  Saskatoon       SK             1227 D AVENUE N, Saskatoon, SK   
6   SK914104  Saskatoon       SK         2415 Rosewood DRIVE, Saskatoon, SK   
7   SK914117  Saskatoon       SK         1 927 Heritage VIEW, Saskatoon, SK   
8   SK914082  Saskatoon       SK         2435 Rosewood DRIVE, Saskatoon, SK   
9   SK914134  Saskatoon       SK              323 H AVENUE S, Saskatoon, SK   
10  SK914106  Saskatoon       SK         2427 Rosewood DRIVE, Saskatoon, SK   
11  SK914056  Saskatoon       SK          3409 Orton

  df = df.append(tempDF)


         MLS      City Province                                 Address  \
0   E4319228  Edmonton       AB          14425 110A AV NW, Edmonton, AB   
1   A2008195  Edmonton       AB            7224 81 Avenue, Edmonton, AB   
2   E4318803  Edmonton       AB         9124 141 ST NW NW, Edmonton, AB   
3   E4318797  Edmonton       AB  #7 15710 BEAUMARIS RD NW, Edmonton, AB   
4   E4318817  Edmonton       AB            6843 111 ST NW, Edmonton, AB   
5   E4318811  Edmonton       AB   #602 12303 JASPER AV NW, Edmonton, AB   
6   E4318805  Edmonton       AB           11710 122 ST NW, Edmonton, AB   
7   E4318453  Edmonton       AB     #1502 10303 105 ST NW, Edmonton, AB   
8   E4318462  Edmonton       AB       16 West Point WD NW, Edmonton, AB   
9   E4318292  Edmonton       AB             9220 75 ST NW, Edmonton, AB   
10  E4318289  Edmonton       AB      #120 10407 122 ST NW, Edmonton, AB   
11  E4318264  Edmonton       AB           11134 101 ST NW, Edmonton, AB   
12  E4318270  Edmonton   

  df = df.append(tempDF)


         MLS     City Province                                        Address  \
0   A2009409  Calgary       AB                  253 22 Avenue NW, Calgary, AB   
1   A2008802  Calgary       AB         159 New Brighton Close SE, Calgary, AB   
2   A2003495  Calgary       AB               1029 Fonda Court SE, Calgary, AB   
3   A2005616  Calgary       AB                 1716 42 Street NE, Calgary, AB   
4   A2011129  Calgary       AB             111 Deerpath Court SE, Calgary, AB   
5   A2012027  Calgary       AB         1 1720 kensington Road NW, Calgary, AB   
6   A2011096  Calgary       AB               180 Gordon Drive SW, Calgary, AB   
7   A2011719  Calgary       AB             2 930 Royal Avenue SW, Calgary, AB   
8   A2011914  Calgary       AB  1413 151 Country Village Road NE, Calgary, AB   
9   A2012019  Calgary       AB         27 Hunterhorn Crescent NE, Calgary, AB   
10  A2007664  Calgary       AB        325 Mckenzie Towne Gate SE, Calgary, AB   
11  A2009419  Calgary       

  df = df.append(tempDF)


         MLS     City Province                                        Address  \
0   C5827918  Toronto       ON               #423 1005 KING ST W, Toronto, ON   
1   W5827885  Toronto       ON                 11 SUNNYBRAE CRES, Toronto, ON   
2   C5827921  Toronto       ON                  #714 629 KING ST, Toronto, ON   
3   W5827929  Toronto       ON                 38 VEROBEACH BLVD, Toronto, ON   
4   W5827880  Toronto       ON                  51 BURNFIELD AVE, Toronto, ON   
5   C5827952  Toronto       ON            #ON 2007 HILLCREST AVE, Toronto, ON   
6   W5827886  Toronto       ON           #1012 816 LANSDOWNE AVE, Toronto, ON   
7   E5826980  Toronto       ON  #304 452 SCARBOROUGH GOLFCLUB RD, Toronto, ON   
8   C5827246  Toronto       ON           #PH05 15 FORT YORK BLVD, Toronto, ON   
9   W5827399  Toronto       ON                    33 WICKFORD DR, Toronto, ON   
10  C5827589  Toronto       ON                     137 SEATON ST, Toronto, ON   
11  E5827590  Toronto       

  df = df.append(tempDF)


         MLS         City Province  \
0   W5827909  Mississauga       ON   
1   W5827928  Mississauga       ON   
2   W5827892  Mississauga       ON   
3   W5827911  Mississauga       ON   
4   W5827935  Mississauga       ON   
5   W5827299  Mississauga       ON   
6   W5827400  Mississauga       ON   
7   40349553  Mississauga       ON   
8   W5827557  Mississauga       ON   
9   W5827674  Mississauga       ON   
10  40349719  Mississauga       ON   
11  W5826905  Mississauga       ON   
12  W5826773  Mississauga       ON   
13  W5826834  Mississauga       ON   
14  W5827351  Mississauga       ON   
15  W5827448  Mississauga       ON   
16  H4151093  Mississauga       ON   
17  40349185  Mississauga       ON   
18  W5826874  Mississauga       ON   
19  W5827446  Mississauga       ON   

                                              Address    Price  NumberOfBeds  \
0                 3179 IBBETSON CRES, Mississauga, ON  1100000             4   
1                #2109 1 VALHALLA INN RD,

  df = df.append(tempDF)


Empty DataFrame
Columns: [MLS, City, Province, Address, Price, NumberOfBeds, NumberOfBaths]
Index: []


  df = df.append(tempDF)


         MLS       City Province  \
0   R2738520  Vancouver       BC   
1   R2738552  Vancouver       BC   
2   R2738530  Vancouver       BC   
3   R2738528  Vancouver       BC   
4   R2738554  Vancouver       BC   
5   R2738518  Vancouver       BC   
6   R2738548  Vancouver       BC   
7   R2738556  Vancouver       BC   
8   R2738581  Vancouver       BC   
9   R2738546  Vancouver       BC   
10  R2738543  Vancouver       BC   
11  R2738527  Vancouver       BC   
12  X5826720  Vancouver       BC   
13  R2738231  Vancouver       BC   
14  C8047859  Vancouver       BC   
15  R2738215  Vancouver       BC   
16  R2738301  Vancouver       BC   
17  R2738168  Vancouver       BC   
18  R2738296  Vancouver       BC   
19  R2738293  Vancouver       BC   

                                         Address     Price  NumberOfBeds  \
0        805 1225 RICHARDS STREET, Vancouver, BC    574999             0   
1             208 8070 OAK STREET, Vancouver, BC   1179700             2   
2         1106 

  df = df.append(tempDF)


          MLS      City Province                                Address  \
0   202226988  Winnipeg       MB           13 Hull Avenue, Winnipeg, MB   
1   202227043  Winnipeg       MB     190 Springwater Road, Winnipeg, MB   
2   202227026  Winnipeg       MB    680 Wellington Avenue, Winnipeg, MB   
3   202227027  Winnipeg       MB        475 Besant Street, Winnipeg, MB   
4   202227046  Winnipeg       MB        669 Alfred Avenue, Winnipeg, MB   
5   202224448  Winnipeg       MB          38 Inman Avenue, Winnipeg, MB   
6   202226958  Winnipeg       MB  32 SOUTHLANDS Drive, East St Paul, MB   
7   202226877  Winnipeg       MB     55 Forestgate Avenue, Winnipeg, MB   
8   202226999  Winnipeg       MB        32 Crystal Avenue, Winnipeg, MB   
9   202227009  Winnipeg       MB          206 Yale Avenue, Winnipeg, MB   
10  202225990  Winnipeg       MB          20 Whidden Gate, Winnipeg, MB   
11  202226895  Winnipeg       MB           34 Hood Avenue, Winnipeg, MB   
12  202227006  Winnipeg  

  df = df.append(tempDF)


In [64]:
df =df[(df.NumberOfBeds != 0) & (df.NumberOfBaths != 0)]

In [65]:
df

Unnamed: 0,MLS,City,Province,Address,Price,NumberOfBeds,NumberOfBaths
0,SK914096,Saskatoon,SK,"739 L AVENUE S, Saskatoon, SK",229900,3,2
1,SK914132,Saskatoon,SK,"46 Peeling AVENUE, Saskatoon, SK",329900,4,2
2,SK914089,Saskatoon,SK,"115 Ash STREET E, Saskatoon, SK",259900,3,1
3,SK914075,Saskatoon,SK,"334 Crean CRESCENT, Saskatoon, SK",419900,4,3
4,SK914086,Saskatoon,SK,"739 Hastings COVE, Saskatoon, SK",899900,4,3
...,...,...,...,...,...,...,...
15,202226898,Winnipeg,MB,"251 Cullen Drive, Winnipeg, MB",349900,4,2
16,202226907,Winnipeg,MB,"150 Nordstrom Drive, Winnipeg, MB",499900,3,3
17,202226946,Winnipeg,MB,"789 Sherburn Street, Winnipeg, MB",229900,2,1
18,202225745,Winnipeg,MB,"389 Beverley Street, Winnipeg, MB",74900,3,1


In [66]:
df.to_csv('remax_properties.csv')