### Webscrapping a car listing site with BeautifulSoup and Requests

In [1]:
# import required libraries

from bs4 import BeautifulSoup
import requests
import pandas as pd

### Http Request

In [2]:
# Store the web url in a variable

website = 'https://www.kaiandkaro.com/vehicles?availability=available&availability=sell_on_behalf&model__make__vehicle_type=Automobile'


In [3]:
# Get request

response = requests.get(website)

In [4]:
# Check status

response.status_code

200

#### Create soup object

In [5]:
# creating soup object

soup = BeautifulSoup(response.content, 'html.parser')

In [6]:
soup

<!DOCTYPE html>
<html lang="en"><head><meta charset="utf-8"/><meta content="width=device-width" name="viewport"/><meta content="Kai and Karo imports available cars" property="og:title"/><meta content="Kai and Karo car imports. Available vehicles for sale on our portfolio" property="og:description"/><title>Kai &amp; Karo | Car Dealership Kenya | New &amp; Used Cars for Sale</title><meta content="Your Trusted Car Dealership in Nairobi Kenya. We offer an extensive inventory of new and used cars from Top Car Brands. Visit us Today!" name="description"/><meta content="6" name="next-head-count"/><link href="/logo.ico" rel="shortcut icon" type="image/x-icon"/><link href="/logo.ico" rel="shortcut icon"/><meta content="Kai &amp; Karo Car Market place" property="og:site_name"/><meta content="Kai &amp; Karo | Car Dealership Kenya | New &amp; Used Cars for Sale." property="og:title"/><meta content="webapp" property="og:type"/><meta content="Kai and Karo is your one stop Market Place for Brand new 

### Results

In [7]:
# Get results

results = soup.find_all('div', {'class': 'chakra-card css-1ndte01'})

In [10]:
# Check lenght of results
# Expected lenght is 25

assert len(results) == 25

In [11]:
# Check results for the first listing

results[0]

<div class="chakra-card css-1ndte01"><style data-emotion="css 1hnz6hu">.css-1hnz6hu{position:static;}.css-1hnz6hu::before{content:'';cursor:inherit;display:block;position:absolute;top:0px;left:0px;z-index:0;width:100%;height:100%;}</style><a class="chakra-linkbox__overlay css-1hnz6hu" href="/vehicles/mitsubishi-canter-st56nza"><div style="object-fit:cover;position:relative"><style data-emotion="css kx9vo4 animation-14pkoxc">.css-kx9vo4{--skeleton-start-color:var(--chakra-colors-gray-100);--skeleton-end-color:var(--chakra-colors-gray-400);background:var(--skeleton-start-color);border-color:var(--skeleton-end-color);opacity:0.7;border-radius:var(--chakra-radii-sm);-webkit-animation:0.8s linear infinite alternate animation-14pkoxc;animation:0.8s linear infinite alternate animation-14pkoxc;box-shadow:var(--chakra-shadows-none);-webkit-background-clip:padding-box;background-clip:padding-box;cursor:default;color:var(--chakra-colors-transparent);pointer-events:none;-webkit-user-select:none;-m

### Target the required data

In [12]:
# Required data

# Name
# Transmission
# Engine rating
# Usage
# YOM
# Price

In [13]:
# Name of first listing

results[0].find('h2').get_text()

' Mitsubishi Canter'

In [18]:
# Transmission  of first listing

results[0].find('span',{'class':'css-evl6jo'}).get_text()

'Automatic'

In [23]:
# Engine rating of first listing

results[0].find('span',{'class':'css-evl6jo'}).find_next_sibling('span').get_text()

'3000 CC'

In [19]:
# Usage of first listing

results[0].find('span',{'class':'css-b03jaa'}).get_text()

'Kenyan Used '

In [24]:
# Year of manufacture of first listing

results[0].find('span',{'class':'chakra-badge css-1dub5x4'}).get_text()

'2013'

In [26]:
# Price of first listing

results[0].find('span', {'class':'chakra-text'}).get_text().replace('\xa0', ' ')

'KES 1,149,999'

### Putting everything together using a for loop

In [30]:
# Create empty list for each data

name = []
transmission = []
engine_rating = []
usage = []
year_of_manufacture = []
price = []

for result in results:

    # Name
    try:
        name.append(result.find('h2').get_text())
    except:
        name.append('n/a')
    # Transmission
    try:
        transmission.append(result.find('span',{'class':'css-evl6jo'}).get_text())
    except:
        transmission.append('n/a')
    # Engine rating
    try:
        engine_rating.append(result.find('span',{'class':'css-evl6jo'}).find_next_sibling('span').get_text())
    except:
        engine_rating.append('n/a')
    # Usage
    try:
        usage.append(result.find('span',{'class':'css-b03jaa'}).get_text())
    except:
        usage.append('n/a')
    # Year of Manufacture
    try:
      year_of_manufacture.append(result.find('span',{'class':'chakra-badge css-1dub5x4'}).get_text())
    except:
        year_of_manufacture.append('n/a')
    # Price
    try:
        price.append(result.find('span', {'class':'chakra-text'}).get_text().replace('\xa0', ' '))
    except:
        price.append('n/a')

### Create a Pandas Dataframe

In [31]:
car_listings = pd.DataFrame({'Name': name, 'Transmission': transmission, 'Engine Rating': engine_rating,
                             'Usage': usage, 'Year of Manufacture': year_of_manufacture, 'Price': price})

In [32]:
# View the dataframe

car_listings

Unnamed: 0,Name,Transmission,Engine Rating,Usage,Year of Manufacture,Price
0,Mitsubishi Canter,Automatic,3000 CC,Kenyan Used,2013,"KES 1,149,999"
1,Mitsubishi RVR,Automatic,1800 CC,Kenyan Used,2011,"KES 1,399,999"
2,BMW 320i,Automatic,2000 CC,Kenyan Used,2008,"KES 799,999"
3,Subaru Forester,Automatic,2000 CC,Kenyan Used,2016,"KES 2,699,999"
4,Suzuki Baleno,Automatic,1400 CC,Kenyan Used,2019,"KES 1,449,999"
5,Nissan Note,Automatic,1200 CC,Kenyan Used,2015,"KES 829,999"
6,Nissan March,Automatic,1200 CC,Kenyan Used,2017,"KES 829,999"
7,Toyota Landcruiser 200 series (4.6L V8),Automatic,4600 CC,Kenyan Used,2011,"KES 5,499,999"
8,Subaru Levorg,Automatic,2000 CC,Kenyan Used,2014,"KES 1,667,499"
9,Volkswagen Golf Variant 1.4,Automatic,1400 CC,Kenyan Used,2016,"KES 1,749,999"


### Store Output in Excel

In [33]:
# Storing the output in Excel

car_listings.to_excel('car_listings.xlsx', index=False)

#### Pagination

In [34]:
# create empty list for each variable
name = []
transmission = []
engine_rating = []
usage = []
year_of_manufacture = []
price = []

# get listings from the first 50 pages
for i in range(1, 51):
  # website in a variable
  website = 'https://www.kaiandkaro.com/vehicles?availability=available&availability=sell_on_behalf&model__make__vehicle_type=Automobile&page='+str(i)

  # send request to website
  response = requests.get(website)

  # create soup object
  soup = BeautifulSoup(response.content, 'html.parser')

  # results
  results = soup.find_all('div', {'class': 'chakra-card css-1ndte01'})

  # loop through results
  for result in results:

    # Name
    try:
        name.append(result.find('h2').get_text())
    except:
        name.append('n/a')
    # Transmission
    try:
        transmission.append(result.find('span',{'class':'css-evl6jo'}).get_text())
    except:
        transmission.append('n/a')
    # Engine rating
    try:
        engine_rating.append(result.find('span',{'class':'css-evl6jo'}).find_next_sibling('span').get_text())
    except:
        engine_rating.append('n/a')
    # Usage
    try:
        usage.append(result.find('span',{'class':'css-b03jaa'}).get_text())
    except:
        usage.append('n/a')
    # Year of Manufacture
    try:
      year_of_manufacture.append(result.find('span',{'class':'chakra-badge css-1dub5x4'}).get_text())
    except:
        year_of_manufacture.append('n/a')
    # Price
    try:
        price.append(result.find('span', {'class':'chakra-text'}).get_text().replace('\xa0', ' '))
    except:
        price.append('n/a')


In [35]:
# Create a dataframe for listings
car_listings = pd.DataFrame({'Name': name, 'Transmission': transmission, 'Engine Rating': engine_rating,
                             'Usage': usage, 'Year of Manufacture': year_of_manufacture, 'Price': price})
car_listings

Unnamed: 0,Name,Transmission,Engine Rating,Usage,Year of Manufacture,Price
0,Mitsubishi Canter,Automatic,3000 CC,Kenyan Used,2013,"KES 1,149,999"
1,Mitsubishi RVR,Automatic,1800 CC,Kenyan Used,2011,"KES 1,399,999"
2,BMW 320i,Automatic,2000 CC,Kenyan Used,2008,"KES 799,999"
3,Subaru Forester,Automatic,2000 CC,Kenyan Used,2016,"KES 2,699,999"
4,Suzuki Baleno,Automatic,1400 CC,Kenyan Used,2019,"KES 1,449,999"
...,...,...,...,...,...,...
1245,Mazda Premacy 2.2L Diesel,Automatic,2200 CC,Kenyan Used,2012,"KES 1,099,999"
1246,Toyota Landcruiser GXR 4.5L V8 / MANUAL,Automatic,4500 CC,Kenyan Used,2016,"KES 8,199,999"
1247,Audi A4 2.0T QUATTRO,Automatic,2000 CC,Kenyan Used,2014,"KES 2,299,999"
1248,Audi Q7 3.0L TDI,Automatic,3000 CC,Kenyan Used,2007,"KES 2,199,999"


In [36]:
# store output in excel

car_listings.to_excel('car_listings_pagination.xlsx', index=False)