In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
CLASS_NAMES = ["odTr" , "evendTr"]
PARENT_TAG = "tr"
PRICE_TAG = "span"
OTHERS_TAG = "td"
PHONE = 0
TIME = 3
STATE = 4
CITY = 5

In [3]:
address = {
    "class_names":CLASS_NAMES,
    "parent_tag":PARENT_TAG ,
    "price_tag" : PRICE_TAG ,
    "others_tag": OTHERS_TAG ,
    "index":{
        "phone":PHONE ,
        "time":TIME ,
        "state": STATE ,
        "city": CITY
    }
}

In [4]:
class Scraper:
    def __init__(self , url , pages , data_address , file_name):
        self.url = url
        self.pages = pages
        self.address = data_address
        self.df = pd.DataFrame(columns=["phone_number" , "price" , "status" , "city" , "time"])
        self.file_name = file_name
        
    @staticmethod    
    def extract_single_record(desc):
        price = desc.find(PRICE_TAG).text.strip()
        others = desc.find_all(OTHERS_TAG)
        phone_number = others[PHONE].text.strip()
        time = others[TIME].text.strip()
        state = others[STATE].text.strip()
        city = others[CITY].text.strip()
        record = {
            "phone_number":phone_number,
            "price":price,
            "status":state,
            "city":city,
            "time":time
        }
        return record
        
    @staticmethod    
    def get_page_contents(url):
        page = requests.get(url)
        return page
    
    def scrape_page(self , page):
        records = []
        rows = []
        soup = BeautifulSoup(page.text, "html.parser")
        for name in self.address["class_names"]:
            records.extend(soup.find_all(self.address["tag"], class_=name))
        for index , record in enumerate(records):
            row = Scraper.extract_single_record(record)
            rows.append(row)
        
        page_df = pd.DataFrame.from_dict(rows)
        self.df = pd.concat([self.df , page_df] , axis=0)
    
        
    def get_next_page_url(self , page_num):
        start = self.url.find("page=") + 5
        return url[:start] + str(page_num) + url[start+1:]
    
    def save_data(self):
        self.df.to_csv(self.file_name , index=False)
    
    def run(self):
        for page_number in range(1,self.pages+1):
            url = self.get_next_page_url(page_number)
            page = Scraper.get_page_contents(url)
            self.scrape_page(page)
            print(f'page {page_number} scraped')
        self.save_data()
    

In [5]:
url = "https://www.rond.ir/SearchSim?page=1&StateId=0&CityId=0&SimOrderBy=Update"

In [None]:
test = Scraper(url , 1000 , address , 'rond1000.csv')
test.run()