Importing required libraries and modules

In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from geopy.geocoders import Nominatim
from geopy import geocoders
import folium
from folium.plugins import MarkerCluster
import time

Setting up chrome web driver

In [2]:
s=Service('/usr/local/bin/chromedriver')
driver=webdriver.Chrome(service=s)
driver.get("https://www.kijiji.ca/h-windsor-area-on/1700220")

Locating the elements using xpath

In [3]:
category = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH,"//button[@data-id='SearchCategory']"))
    )
time.sleep(5)
category.click()

In [4]:
realestate = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//li[@id='SearchCategorySelector-item-3']"))
    )

realestate.click()  
search=driver.find_element(By.XPATH,"//button[@data-qa-id='header-button-submit']")
search.click()

In [5]:
driver.execute_script("window.scrollTo(0,300)")
rent = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//a[@href='https://www.kijiji.ca/b-for-rent/windsor-area-on/c30349001l1700220?sort=dateDesc']"))
    )
time.sleep(5)
rent.click()
time.sleep(20)

Scraping the required datas and storing them into an empty list

In [6]:
results=[]
addresses=[]
url_datas=[]
titles=driver.find_elements(By.XPATH,"//a[@data-testid='listing-link']")
address=driver.find_elements(By.XPATH,"//p[@data-testid='listing-location']")
prices=driver.find_elements(By.XPATH,"//p[@data-testid='listing-price']")
links=driver.find_elements(By.XPATH,"//a[@data-testid='listing-link'][@href]")

try:
    for i in range(len(titles)):
        data={'Titles': titles[i].text,
              'Address': address[i].text,
              'Prices': prices[i].text,
              'Links': links[i].get_attribute("href")}
        addresses.append(data['Address'])
        results.append(data)
    df=pd.DataFrame(results)

except NoSuchElementException:
    results.append(None)

Storing the list of addresses into a txt file

In [7]:
with open('addresses.txt','w+') as myfile:
    for item in addresses:
        myfile.write(item.replace('Marys',"Mary's") + ', Windsor' + ', Ontario' + '\n')
myfile.close()    

Use Geopy to fetch geocode data

In [8]:
coordinates=[]
geolocator = Nominatim(user_agent="AKG")
with open("addresses.txt",'r') as fp:
    for line in fp:
        location = geolocator.geocode(line)
        if location is not None:
            data3={'Latitude': location.latitude,'Longitude': location.longitude}
            coordinates.append(data3)
        else:
              print(f"Could not find Location for {line}")
fp.close()
coordinates_df=pd.DataFrame(coordinates)

Could not find Location for Belle River, Windsor, Ontario

Could not find Location for LaSalle, Windsor, Ontario

Could not find Location for Belle River, Windsor, Ontario

Could not find Location for Maidstone, Windsor, Ontario



Combining two dataframes

In [9]:
final_df=pd.concat([df,coordinates_df],axis = 1)
final_df

Unnamed: 0,Titles,Address,Prices,Links,Latitude,Longitude
0,Now Leasing! Beautiful One Bedroom Suites Awai...,Windsor Region,"$1,675.00",https://www.kijiji.ca/v-apartments-condos/wind...,43.509077,-79.633537
1,PRIVATE SECURE CLEAN ROOM RENTAL,Windsor,$690.00,https://www.kijiji.ca/v-room-rental-roommate/w...,42.31674,-83.037339
2,Waterfront High-Rise - 2 Bedroom Vacancy,Windsor,"$1,774.00",https://www.kijiji.ca/v-apartments-condos/wind...,42.31674,-83.037339
3,$1000 Move-In Bonus | Open Concept 1 Bedroom S...,Windsor Region,"$1,599.00",https://www.kijiji.ca/v-apartments-condos/wind...,43.509077,-79.633537
4,Detached House for rent in Lakeshore (near Tec...,Belle River,"$3,250.00",https://www.kijiji.ca/v-apartments-condos/wind...,42.31674,-83.037339
5,Cozy 2 Bedroom Basement Rental Unit in South W...,Windsor,"$1,350.00",https://www.kijiji.ca/v-apartments-condos/wind...,42.31674,-83.037339
6,Spacious Newly Renovated Suites! Call Hazelvie...,Windsor,"$1,495.00",https://www.kijiji.ca/v-apartments-condos/wind...,43.509077,-79.633537
7,Now Leasing! Bright Two Bedroom Suites Await Y...,Windsor Region,"$2,160.00",https://www.kijiji.ca/v-apartments-condos/wind...,42.31674,-83.037339
8,"Riverside Tower - 2 Bedroom, 1 Bathroom Apartm...",Windsor,"$1,950.00",https://www.kijiji.ca/v-apartments-condos/wind...,42.31674,-83.037339
9,"Front store for rent on 2739 SEMINOLEWindsor, ...",Windsor,"$1,750.00",https://www.kijiji.ca/v-commercial-office-spac...,42.31674,-83.037339


After obtaining the coordinates, we can plot them on map using Folium

In [10]:
windsor_location=[42.314197, -83.037055]
wmap = folium.Map(windsor_location,zoom_start = 11, width=1500, height=700,control_scale=True)
locations=coordinates_df
locations_list=locations.values.tolist()

mCluster = MarkerCluster(name="Cluster Name").add_to(wmap)

#href='<a href="URL" target="_blank">mytext</a>'
university=folium.Marker(location=[42.304591479548215,-83.06623226439827],popup='University Of Windsor',icon=folium.Icon(color="red")).add_to(mCluster)

for point in range(len(locations_list)):
    folium.Marker(locations_list[point], popup=(f"<a href='{final_df['Links'][point+1]}'' target='_blank'>{final_df['Titles'][point+1]}</a>")).add_to(mCluster)  
wmap.save("index.html")   
wmap