# RightMove(UK region)

In [22]:
# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup
import csv
 
class RightmoveScraper:
    results = []
    def fetch (self,url):
        print("HTTP GET request to URL: %s" %url, end="")
        response = requests.get(url)
        print(" ¦ Status code: %s" %response.status_code)
        return response
    def parse(self,html):
        content = BeautifulSoup(html,"lxml")
        
        titles = [title.text.strip() for title in content.findAll("h2",{"class":"propertyCard-title"})]
        address = [address ["content"] for address in content.findAll("meta",{"itemprop":"streetAddress"})]
        descriptions = [description.text for description in content.findAll("span",{"data-test":"property-description"})]
        prices = [price.text.strip() for price in content.findAll("div", {"class": "propertyCard-priceValue"})]
        dates = [date.text for date in content.findAll("span",{"class": "propertyCard-branchSummary-addedOrReduced"})]
        sellers = [seller.text.split("by")[-1].strip() for seller in content.findAll("span",{"class": "propertyCard-branchSummary-branchName"})]
        
        for index in range(0,len(titles)):
            self.results.append({
                "title": titles[index],
                "address":address[index],
                "descriptions": descriptions[index],
                "price":prices[index],
                "date":dates[index],
                "seller":sellers[index]
                    })
         
    def to_csv(self):
        with open("property_rightmove.csv","w") as csv_file:
            writer=csv.DictWriter(csv_file, fieldnames=self.results[0].keys())
            writer.writeheader()
            
            for row in self.results:
                writer.writerow(row)
            print ('Stored results to "property_rightmove.csv"')
            
    def run(self):
        for page in range(0, 23):
            index = page * 24
            #change url here
            url="https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords="
            response = self.fetch(url)
            self.parse(response.text)   
            
        self.to_csv()    
        
if __name__=='__main__':
    scraper = RightmoveScraper()
    scraper.run()

HTTP GET request to URL: https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords= ¦ Status code: 200
HTTP GET request to URL: https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords= ¦ Status code: 200
HTTP GET request to URL: https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords= ¦ Status code: 200
HTTP GET request to URL: https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords= ¦ Status code: 200
HTTP GET request to URL: https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=4&propertyTypes=&mustHave=&dontShow=&furnishT

# **Rew (US region)**

In [1]:
# Import Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [14]:
# Locating the number of pages
def find_pages(url):
  import time
  import re
  n_url=url
  page = ''
  while page == '':
    try:
        r = requests.get(n_url)
        c = r.content
        break
    except:
        print("Connection refused by the server..")
        print("Let me sleep for 5 seconds")
        print("ZZzzzz...")
        time.sleep(5)
        print("Was a nice sleep, now let me continue...")
        continue
  
  # Cleaning the URL
  soup = BeautifulSoup(c, "html.parser")
  
  # Search for the link with the info.
  n_pageStr = soup.find_all("a",{"class":"visible-xs"})
  
  pages= (n_pageStr[0].text.replace(" ",""))
  
  pattern = r'[0-1a-z]'
  # Replace all occurrences of character s with an empty string
  mod_string = re.sub(pattern, '', pages )

  return (mod_string)

In [21]:
print(240*"-")
print("Enter the URL")
user_input=str(input())
print(240*"-")
base_url = user_input+"/page/"

# Defining List:
list = []
 
# Interating with each page
for page in range(1, int(find_pages(user_input))):
    
    # Printing the current page of interation
    print(base_url+str(page))
    
    # Getting and Cleaning the page content
    r = requests.get(base_url+str(page))
    c = r.content
    soup = BeautifulSoup(c, "html.parser")
    
    # Assigning Main TAG of the resources
    all = soup.find_all("div",{"class":"displaypanel-wrapper"})
 
    # Interating with each item of the tag:
    for item in all:
        # Defining temp list
        d = {}
        
        # Getting the Price
        d["Price"] = int(item.find("div",{"class":"displaypanel-price"}).text.replace("\n","").replace("$","").replace(",",""))
 
        # Getting the Address
        d["Address"] = item.find_all("a")[0]["title"]

        #getting pincode
        Postal_Code = item.find_all("a")[0]["title"]
        d["Postal Code"]=Postal_Code[-7:]
        
        # Assigning a specific location of the other details
        details = item.find_all("ul",{"class":"inlinelist"})[1].find_all("li")
        for i in range(len(details)):
            
            # Getting the Bedroom
            if 'bd' in details[i].text:
                d["Bedroom"] = details[i].text.replace(" bd","")
                
            # Getting the Bathroom
            elif 'ba' in details[i].text:
                d["Bathroom"] = details[i].text.replace(" ba","")
                
            # Getting the Square Foot
            elif 'sf' in details[i].text:
                d["Square Foot"] = details[i].text.replace(" sf","")
                
            # Getting the Feet
            elif 'ft' in details[i].text:
                d["Feet"] = details[i].text.replace(" ft","")
 
        # Finally, adding the temp. dic (item dic) of each item to the main dic (list)
        list.append(d)

# Transforming the dic into a dataframe
df = pd.DataFrame(list)
 
#droping empty data and resetting index
df.dropna(inplace = True)
df.reset_index(drop=True,inplace = True)

# Saving the data on CSV file
df.to_csv("properties_rew.csv")
        

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Enter the URL
https://www.rew.ca/properties/areas/north-york-toronto-on
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
https://www.rew.ca/properties/areas/north-york-toronto-on/page/1
https://www.rew.ca/properties/areas/north-york-toronto-on/page/2
https://www.rew.ca/properties/areas/north-york-toronto-on/page/3
https://www.rew.ca/properties/areas/north-york-toronto-on/page/4
https://www.rew.ca/properties/areas/north-york-toronto-on/page/5
https://www.rew.ca/properties/areas/north-york-toronto-on/page/6
https://www.rew.ca/properties/areas/north-york-toronto-o