In [1]:
import time
from datetime import datetime

from selenium import webdriver
from selenium.webdriver.common.by import By
#from selenium.webdriver.common.keys import Keys

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore')

In [2]:
# Sleep function 
def sleep(x):
    time.sleep(x)

# Wait for a certain measure of time before throwing an exception
def wait(x):
    driver.implicitly_wait(x)

    
# Find Elements Function
def find_elements_TAPF(T,A,P,F):

    titles = driver.find_elements(By.CLASS_NAME, T)
    address = driver.find_elements(By.CLASS_NAME, A)
    prices = driver.find_elements(By.CLASS_NAME, P)
    features = driver.find_elements(By.CLASS_NAME, F) 

    list_title = [title.text for title in titles]
    list_addres = [addr.text for addr in address]
    list_price = [price.text for price in prices]
    list_features = [feature.text for feature in features]
    return list_title, list_addres, list_price, list_features

In [3]:
print('---------------------- Selenium House-Rotterdam Web-Scraping Project ----------------------')
start=datetime.now()

#  1 - Create Driver
Path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/chromedriver'
driver = webdriver.Chrome(Path)

#  2 - Go to Website
link = 'https://www.pararius.com/apartments/rotterdam?ac=1'

driver.get(link)
wait(10)
sleep(2)

# 3 - number of results
total_house_number = driver.find_elements(By.CLASS_NAME, 'search-list-header__count')
list_result = [tt.text for tt in total_house_number]
print(f' Total Number of house = {int(list_result[0])}\n' )


# Number of Page
#num_of_page = driver.find_elements(By.CLASS_NAME, '')

# 4 -  Take Infos from Page
# 4.1 - Headers, Publish_Time ,Company, City
T, A, P, F = 'listing-search-item__title', "listing-search-item__sub-title\\'", 'listing-search-item__price', 'listing-search-item__features'
list_title, list_addres, list_price, list_features = find_elements_TAPF(T,A,P,F)

print(f'{list_title[1]} , {list_addres[1]} , {list_price[1]} , {list_features[1]}\n')

all_list = [list_title, list_addres, list_price, list_features]

for i in all_list:
    print(len(i))

    
# 4.6 - DataFrame df
d = dict(title=np.array(list_title), address=np.array(list_addres), price=np.array(list_price),
         duzenle=np.array(list_features) )
df = pd.DataFrame.from_dict(d, orient='index')
df = df.T
    
# Page number deneme
page_num = driver.find_elements(By.CLASS_NAME, 'pagination')
list_page = [i.text for i in page_num]
a = str(list_page)
page_number = int(a[-10:-8])
print('Total Page number = ', page_number)

num = 1
while num < 3:
    try:
        num+=1

        # 4.7.1 - Go to another page
        'https://www.pararius.com/apartments/rotterdam/page-2'
        link = f'https://www.pararius.com/apartments/rotterdam/page-{page_number}'
        driver.get(link)
        wait(10)
        sleep(1.5)

        # Take infos from page
        list_title, list_addres, list_price, list_features = find_elements_TAPF(T,A,P,F)

        # Data Frame
        d = dict(title=np.array(list_title), address=np.array(list_addres), price=np.array(list_price),
             duzenle=np.array(list_features) )
        df2 = pd.DataFrame.from_dict(d, orient='index')
        df2 = df2.T
    except:
        pass

    # 4.7.4 - Concatenate the DataFrames
    df = pd.concat([df,df2], axis=0, ignore_index=True)
    print(f'Page Number : {num}, DataFrame Shape : {df2.shape}')
    

# Data Cleaning
df.duzenle = df.duzenle.str.replace('\n', '/')
df[['m2', 'rooms', 'furnished']] = df.duzenle.str.split('/', expand=True)
    
# 5.1 - Save Data as csv 
print(f'DataFrame End : {df.shape}')
df.to_csv('rotterdam.csv', index=False)


# 6 - Quit
end =datetime.now() 
print('Code Runned, No Problem')
print(f'Time = {end - start}')
sleep(5)
driver.quit()

df.head()

---------------------- Selenium House-Rotterdam Web-Scraping Project ----------------------
 Total Number of house = 617

Apartment Oostmaaslaan , 3063 AN Rotterdam (Struisenburg) , €1,450 per month , 65 m²
3 rooms
Furnished

32
32
32
32
Total Page number =  21
Page Number : 2, DataFrame Shape : (19, 4)
Page Number : 3, DataFrame Shape : (19, 4)
DataFrame End : (70, 7)
Code Runned, No Problem
Time = 0:00:21.736268


Unnamed: 0,title,address,price,duzenle,m2,rooms,furnished
0,Apartment Stationssingel,3033 HE Rotterdam (Provenierswijk),"€1,595 per month",99 m²/3 rooms/Upholstered,99 m²,3 rooms,Upholstered
1,Apartment Oostmaaslaan,3063 AN Rotterdam (Struisenburg),"€1,450 per month",65 m²/3 rooms/Furnished,65 m²,3 rooms,Furnished
2,Apartment Boompjes,3011 XZ Rotterdam (Stadsdriehoek),"€1,625 per month",59 m²/2 rooms/Furnished,59 m²,2 rooms,Furnished
3,Apartment Hermonlaan 64,3061 BH Rotterdam (Kralingen West),"€2,000 per month",80 m²/3 rooms/Furnished,80 m²,3 rooms,Furnished
4,Apartment Hermonlaan 35,3061 BH Rotterdam (Kralingen West),"€1,495 per month",75 m²/3 rooms/Upholstered,75 m²,3 rooms,Upholstered


In [6]:
df.duplicated().sum()

20

In [7]:
df[df.duplicated()]

Unnamed: 0,title,address,price,duzenle,m2,rooms,furnished
32,Apartment Stationssingel,3033 HE Rotterdam (Provenierswijk),"€1,595 per month",99 m²/3 rooms/Upholstered,99 m²,3 rooms,Upholstered
51,House Burgemeester Meineszlaan,3022 BC Rotterdam (Nieuwe Westen),"€2,800 per month",82 m²/5 rooms,82 m²,5 rooms,
52,Apartment Stationssingel,3033 HE Rotterdam (Provenierswijk),"€1,595 per month",99 m²/3 rooms/Upholstered,99 m²,3 rooms,Upholstered
53,Apartment Avenue Concordia 42 B,3062 LJ Rotterdam (Kralingen Oost),"€1,800 per month",82 m²/3 rooms/Furnished,82 m²,3 rooms,Furnished
54,Apartment Essenburgsingel 115 A,3022 EJ Rotterdam (Nieuwe Westen),"€2,450 per month",145 m²/4 rooms/1925,145 m²,4 rooms,1925
55,Apartment Stadhoudersweg,3039 ED Rotterdam (Blijdorp),"€3,500 per month",170 m²/6 rooms/Upholstered,170 m²,6 rooms,Upholstered
56,Apartment Joost van Geelstraat,3021 VJ Rotterdam (Middelland),"€1,175 per month",70 m²/3 rooms/Upholstered,70 m²,3 rooms,Upholstered
57,Apartment Berberisweg 440 B,3053 PL Rotterdam (Schiebroek),"€1,592 per month",89 m²/3 rooms/Shell,89 m²,3 rooms,Shell
58,Apartment Berberisweg 442 G,3053 PL Rotterdam (Schiebroek),"€1,592 per month",89 m²/3 rooms/Shell,89 m²,3 rooms,Shell
59,Apartment Berberisweg 440 C,3053 PL Rotterdam (Schiebroek),"€1,592 per month",89 m²/3 rooms/Shell,89 m²,3 rooms,Shell
